import json
import re
import anthropic

mmlu_prompt = json.load(open('lib_prompt/mmlu-cot.json'))

with open('lib_prompt/mmlu-cot-claude-multiple.json', 'w') as f:
    json.dump(mmlu_prompt, f)

mmlu_prompt_claude = json.load(open('lib_prompt/mmlu-cot-claude-multiple.json'))

subjects = mmlu_prompt_claude.keys()
for subject in subjects:
    prompt = mmlu_prompt_claude[subject]
    subject_mod = subject.replace("_", " ")

    #modify human prompt
    prompt = anthropic.HUMAN_PROMPT + "\n" + prompt
    prompt = prompt.replace(
        "The following are multiple choice questions (with answers) about %s." % subject_mod,
        "You are an expert in %s. You will be given a question in %s." % (subject_mod, subject_mod) + \
        " First give step-by-step reasoning about how to solve the question. Then output the answer.\n" + \
        anthropic.AI_PROMPT + "\n" + \
        "Sure, I can do that. I will be an expert in %s and answer your question." % subject_mod   
            )
    
    # add human prompt prior to question
    prompt = re.sub(r'Q:.*\n', anthropic.HUMAN_PROMPT + "\n" + r'\g<0>', prompt)

    #add clarifying prompt based on subject
    if subject_mod in ["business ethics", 
                       "computer security",
                       "marketing"]:
        prompt = prompt.replace("\n(A)", "\nWhich one of the four choices completes the question correctly, (A), (B), (C) or (D)?" + "\nChoices:" + " (A)")
    elif subject_mod in ["college medicine", 
                         "high school biology", 
                         "high school european history", 
                         "high school geography", 
                         "high school government and politics",
                         "high school macroeconomics",
                         "moral disputes"]:
        prompt = prompt.replace("\n(A)", "\nChoices:" + " (A)")
    elif subject_mod == "college physics":
        prompt = prompt.replace("\n(A)", "\nWhich one of the four choices is correct about the question, (A), (B), (C) or (D)?" + "\nChoices:" + " (A)")
    else:
        prompt = prompt.replace("\n(A)", "\nWhich one of the four choices is correct, (A), (B), (C) or (D)?" + "\nChoices:" + " (A)")

    # add a line break after each choice
    prompt = re.sub(r'Choices: \(A\).*\(B\).*\(C\).*\(D\).*\n', 
                    lambda m: m.group().replace('(A', '\n(A').replace('(B', '\n(B').replace('(C', '\n(C').replace('(D', '\n(D'), prompt)

    # remove the parentheses of the choice in answer
    pattern = re.compile(r'The answer is \([A-D]\)')
    prompt = pattern.sub(lambda m: m.group().replace('(', '').replace(')', ''), prompt)

    # add line break to the answer
    prompt = prompt.replace("The answer is ", "\nThe answer is ")

    # move the reasoning prompt after the choices, add AI prompt
    prompt = prompt.replace("\nA: Let's think step by step.", anthropic.AI_PROMPT + "\nLet's think step by step. \nA:")
    mmlu_prompt_claude[subject] = prompt

    #print(mmlu_prompt_claude[subject])

with open('lib_prompt/mmlu-cot-claude-multiple.json', 'w') as f:
    json.dump(mmlu_prompt_claude, f)

# convert the json file to txt file to easy view
with open('lib_prompt/mmlu-cot-claude-multiple.txt', 'w') as f:
    for subject in subjects:
        f.write(subject + "\n")
        f.write(mmlu_prompt_claude[subject] + "\n\n")

