import os
from openai import OpenAI

client = OpenAI(
    # 若没有配置环境变量，请用百炼API Key将下行替换为：api_key="sk-xxx",
    api_key="",
    base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
)


from openai import OpenAI
import json

client2 = OpenAI(api_key="")

file_path_1 = ""
file_path_2 = ""
correct_1 = 0
total_1 = 0

correct_2 = 0
total_2 = 0

with open(file_path_1, 'r', encoding='utf-8') as f:
    for i, line in enumerate(f):
        #if i >= 300: 
            #break
        if line.strip():
            item = json.loads(line)
            if item['story_type'] != "true_belief":
                continue
            choices_text = item["containers"]
            if isinstance(choices_text, list):
                choices_text = ", ".join(choices_text)
          

            entire_instruction = f"Story: {item['story']} Question: {item['question']} Choices: {choices_text}"
            #messages = {
                #"role":"system", "content":"Read the following social event related to you and answer the questions.",
                #"role":"user", "content": entire_instruction
            #}
            answer = item['answer']
            #print(entire_instruction)
            #print(answer)
            
            model_output=""
            completion = client.chat.completions.create(
                model="qwen2.5-7b-instruct",
                messages=[
                    {"role": "system", "content": "You are a helpful assistant."},
                    {"role": "user", "content": entire_instruction},
                ],
                stream=True
            )
            for chunk in completion:
                #print(chunk.choices[0].delta.content, end="", flush=True)
                model_output+= chunk.choices[0].delta.content
            

            #response = client.responses.create(
                #model="chatgpt-4o-latest",
                #instructions="You are a helpful assistant",
                #input=entire_instruction,
                #temperature = 0
            #)

            #model_output = response.output_text
            #print(model_output)

            ##对结果进行judge 用gpt-4o

            judge_instruction = f"The correct answer is {answer}. The model output is {model_output}. Determine whether the model output is correct. If it is correct, output True If it is incorrect, output False"

            judge_response = client2.responses.create(
                model="gpt-4o-2024-08-06",
                input=judge_instruction,
                temperature = 0
            )

            print(judge_response.output_text)

            if judge_response.output_text == "True":
                total_1+=1
                correct_1+=1
            else:
                total_1+=1

with open(file_path_2, 'r', encoding='utf-8') as f:
    for i, line in enumerate(f):
        #if i >= 300: 
            #break
        if line.strip():
            item = json.loads(line)
            if item['story_type'] != "true_belief":
                continue
            choices_text = item["containers"]
            if isinstance(choices_text, list):
                choices_text = ", ".join(choices_text)

            entire_instruction = f"Story: {item['story']} Question: {item['question']} Choices: {choices_text}"
            answer = item['answer']
            #print(entire_instruction)
            #print(answer)


            #response = client.responses.create(
                #model="chatgpt-4o-latest",
                #instructions="Read the following social event related to you and answer the questions.",
                #input=entire_instruction,
                #temperature = 0
            #)

            #model_output = response.output_text
            #print(model_output)
            model_output=""
            completion = client.chat.completions.create(
                model="qwen2.5-7b-instruct",
                messages=[
                    {"role": "system", "content": "Read the following social event related to you and answer the questions."},
                    {"role": "user", "content": entire_instruction},
                ],
                stream=True
            )
            for chunk in completion:
                #print(chunk.choices[0].delta.content, end="", flush=True)
                model_output+= chunk.choices[0].delta.content

            ##对结果进行judge 用gpt-4o

            judge_instruction = f"The correct answer is {answer}. The model output is {model_output}. Determine whether the model output is correct. If it is correct, output True If it is incorrect, output False"

            judge_response = client2.responses.create(
                model="gpt-4o-2024-08-06",
                input=judge_instruction,
                temperature = 0
            )

            print(judge_response.output_text)

            if judge_response.output_text == "True":
                total_2+=1
                correct_2+=1
            else:
                total_2+=1
            
accuracy_1 = correct_1/total_1
accuracy_2 = correct_2/total_2
print(correct_1)
print(total_1)
print(correct_2)
print(total_2)
print(f"{accuracy_1:.4f}")
print(f"{accuracy_2:.4f}")