import anthropic
from openai import OpenAI
import json

client = anthropic.Anthropic(
    # defaults to os.environ.get("ANTHROPIC_API_KEY")
    api_key="",
)

client2 = OpenAI(api_key="")

file_path_1 = ""
file_path_2 = ""
correct_1 = 0
total_1 = 0

correct_2 = 0
total_2 = 0

with open(file_path_1, 'r', encoding='utf-8') as f:
    for i, line in enumerate(f):
        if i <= 313: 
            continue
        if line.strip():
            item = json.loads(line)
            if item['story_type'] != "true_belief":
                continue
            choices_text = item["containers"]
            if isinstance(choices_text, list):
                choices_text = ", ".join(choices_text)
          

            entire_instruction = f"Story: {item['story']} Question: {item['question']} Choices: {choices_text}"
            #messages = {
                #"role":"system", "content":"Read the following social event related to you and answer the questions.",
                #"role":"user", "content": entire_instruction
            #}
            answer = item['answer']
            #print(entire_instruction)
            #print(answer)

            message = client.messages.create(
                model="claude-3-7-sonnet-20250219",
                max_tokens=1024,
                temperature = 0,
                system ="You are a helpful assistant",
                messages=[
                    {"role": "user", "content": entire_instruction}
                 ]
            )
            #print(message.content[0].text)

            #response = client.responses.create(
                #model="o3-2025-04-16",
                #instructions="You are a helpful assistant",
                #input=entire_instruction,
                #temperature = 0
            #)

            model_output = message.content[0].text
            #print(model_output)

            ##对结果进行judge 用gpt-4o

            judge_instruction = f"The correct answer is {answer}. The model output is {model_output}. Determine whether the model output is correct. If it is correct, output True If it is incorrect, output False"

            judge_response = client2.responses.create(
                model="gpt-4o-2024-08-06",
                input=judge_instruction,
                temperature = 0
            )

            print(judge_response.output_text)

            if judge_response.output_text == "True":
                total_1+=1
                correct_1+=1
            else:
                total_1+=1

with open(file_path_2, 'r', encoding='utf-8') as f:
    for i, line in enumerate(f):
        if i <= 313: 
            continue
        if line.strip():
            item = json.loads(line)
            if item['story_type'] != "true_belief":
                continue
            choices_text = item["containers"]
            if isinstance(choices_text, list):
                choices_text = ", ".join(choices_text)

            entire_instruction = f"Story: {item['story']} Question: {item['question']} Choices: {choices_text}"
            answer = item['answer']
            #print(entire_instruction)
            #print(answer)

            message = client.messages.create(
                model="claude-3-7-sonnet-20250219",
                max_tokens=1024,
                temperature = 0,
                system ="Read the following social event related to you and answer the questions.",
                messages=[
                    {"role": "user", "content": entire_instruction}
                 ]
            )

            #response = client.responses.create(
                #model="o3-2025-04-16",
                #instructions="Read the following social event related to you and answer the questions.",
                #input=entire_instruction,
                #temperature = 0
            #)

            model_output = message.content[0].text
            #print(model_output)

            ##对结果进行judge 用gpt-4o

            judge_instruction = f"The correct answer is {answer}. The model output is {model_output}. Determine whether the model output is correct. If it is correct, output True If it is incorrect, output False"

            judge_response = client2.responses.create(
                model="gpt-4o-2024-08-06",
                input=judge_instruction,
                temperature = 0
            )

            print(judge_response.output_text)

            if judge_response.output_text == "True":
                total_2+=1
                correct_2+=1
            else:
                total_2+=1
            
accuracy_1 = correct_1/total_1
accuracy_2 = correct_2/total_2
print(correct_1)
print(total_1)
print(correct_2)
print(total_2)
print(f"{accuracy_1:.4f}")
print(f"{accuracy_2:.4f}")
    


#print(response.output_text)

