# 给出原来的题目，以及子步骤，预测是否能答对。输出yes/no。
#可以根据状态进行批量处理。
# 把题目的解决分成几个独立的，有先后顺序的步骤
import json
import os
from langchain import PromptTemplate
from langchain_community.chat_models import ChatOpenAI


# 初始化 ChatOpenAI
llm = ChatOpenAI(
    model_name="gpt-4o",
    temperature=0,
    openai_api_base="https://api.xty.app/v1",
    max_tokens=1500
)

# 加载第一阶段生成的 JSON 数据（标注了知识点序列）
def modify_path(file_path):
    return file_path.replace('data_for_fun', 'tt_data')

# 读取 JSON 文件
input_dir = ''
json_files = ['math1.json', "physics1.json"]

template = """
You are a teacher who can accurately assess the student's knowledge level. Based on the student's knowledge status, infer whether the student can answer each sub-step of the decomposed question.

Input:
1. Question
2. Knowledge status: The student's knowledge points are presented in a hierarchical structure. The number after each knowledge point indicates the student's mastery level: 0 means not mastered, 1 means mastered.
3. Decomposed sub-steps of the question: Each sub-step contains a guiding question and an explanation.

Task:
Based on the student's knowledge status, determine whether they can answer the question in each sub-step. If the student can answer the sub-step, output "yes", otherwise output "no".
Output format:
First, provide a brief analysis for each sub-step.
Finally, output a list where each element corresponds to the response for each sub-step, such as: `["yes", "no", "no", "no", "yes"]`.

Question: {question}
Knowledge status: {state}
Sub-steps: {steps}
Output:

"""


for json_file in json_files:
    file_path = os.path.join(input_dir, json_file)
    
    with open(file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)

    final_prompts = []
    # 遍历每个 problem 对象
    for problem_data in data:
        question = problem_data['question']
        state = problem_data['state']
        steps = problem_data['steps']
        prompt = PromptTemplate.from_template(template).partial(question=question,state=state,steps=steps).format()  # 你可以自行在这里填写 prompt

        final_prompts.append(prompt)
    responses = llm.batch(final_prompts)

    for i, question in enumerate(data):
        predict = responses[i].content
        question['predict_steps'] = predict

    # 将处理后的数据保存到新的 JSON 文件，覆盖原文件
    output_file_path = modify_path(file_path)
    os.makedirs(os.path.dirname(output_file_path), exist_ok=True)
    with open(output_file_path, 'w', encoding='utf-8') as output_file:
        json.dump(data, output_file, ensure_ascii=False, indent=4)