from openai import OpenAI
import json

client = OpenAI(api_key="")

file_path_1 = ""
file_path_2 = ""
correct_1 = 0
total_1 = 0

correct_2 = 0
total_2 = 0

with open(file_path_1, 'r', encoding='utf-8') as f:
    for i, line in enumerate(f):
        #if i >= 300: 
            #break
        if line.strip():
            item = json.loads(line)
            if item['story_type'] != "true_belief":
                continue
            choices_text = item["containers"]
            if isinstance(choices_text, list):
                choices_text = ", ".join(choices_text)
          

            entire_instruction = f"Story: {item['story']} Question: {item['question']} Choices: {choices_text}"
            #messages = {
                #"role":"system", "content":"Read the following social event related to you and answer the questions.",
                #"role":"user", "content": entire_instruction
            #}
            answer = item['answer']
            #print(entire_instruction)
            #print(answer)

            response = client.responses.create(
                model="chatgpt-4o-latest",
                instructions="You are a helpful assistant",
                input=entire_instruction,
                temperature = 0
            )

            model_output = response.output_text
            #print(model_output)

            ##对结果进行judge 用gpt-4o

            judge_instruction = f"The correct answer is {answer}. The model output is {model_output}. Determine whether the model output is correct. If it is correct, output True If it is incorrect, output False"

            judge_response = client.responses.create(
                model="gpt-4o-2024-08-06",
                input=judge_instruction,
                temperature = 0
            )

            print(judge_response.output_text)

            if judge_response.output_text == "True":
                total_1+=1
                correct_1+=1
            else:
                total_1+=1

with open(file_path_2, 'r', encoding='utf-8') as f:
    for i, line in enumerate(f):
        #if i >= 300: 
            #break
        if line.strip():
            item = json.loads(line)
            if item['story_type'] != "true_belief":
                continue
            choices_text = item["containers"]
            if isinstance(choices_text, list):
                choices_text = ", ".join(choices_text)

            entire_instruction = f"Story: {item['story']} Question: {item['question']} Choices: {choices_text}"
            answer = item['answer']
            #print(entire_instruction)
            #print(answer)


            response = client.responses.create(
                model="chatgpt-4o-latest",
                instructions="Read the following social event related to you and answer the questions.",
                input=entire_instruction,
                temperature = 0
            )

            model_output = response.output_text
            #print(model_output)

            ##对结果进行judge 用gpt-4o

            judge_instruction = f"The correct answer is {answer}. The model output is {model_output}. Determine whether the model output is correct. If it is correct, output True If it is incorrect, output False"

            judge_response = client.responses.create(
                model="gpt-4o-2024-08-06",
                input=judge_instruction,
                temperature = 0
            )

            print(judge_response.output_text)

            if judge_response.output_text == "True":
                total_2+=1
                correct_2+=1
            else:
                total_2+=1
            
accuracy_1 = correct_1/total_1
accuracy_2 = correct_2/total_2
print(correct_1)
print(total_1)
print(correct_2)
print(total_2)
print(f"{accuracy_1:.4f}")
print(f"{accuracy_2:.4f}")
    


#print(response.output_text)