import os
import json
import argparse
from langchain.chat_models import ChatOpenAI
from langchain import PromptTemplate
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_community.chat_models.tongyi import ChatTongyi
from utils import extract_json

# Add argument parsing
parser = argparse.ArgumentParser(description='Process teaching data')
parser.add_argument('--input', required=True, help='Path to input JSON file')
parser.add_argument('--output_dir', required=True, help='Directory to save output JSON file')
parser.add_argument('--output_file', required=True, help='Output file name')
args = parser.parse_args()

template = """
        According to the content and purpose of the teacher's dialogue, determine whether the teacher has explained a specific knowledge point in detail during this round of dialogue. If so, determine whether the knowledge point is in the knowledge point status table, and output the relevant information.
        Input:
        **Dialogue purpose**: {purpose}
        **Teacher's dialogue**: {teacher_dia}
        **Knowledge point status table**: {state}
        Steps:
                1. Focus on **the purpose of the dialogue** and refer to **the teacher's dialogue content** to determine whether there is an explanation of the knowledge point and the core knowledge point of the explanation. There should be at most one core knowledge point.
                2. Determine whether the core knowledge point is in the knowledge point status table.
                3. If the knowledge point is in the status table, the name of the knowledge point in the status table should be strictly used in the output, together with the student's mastery status. If it is not in the status table, only the name of the knowledge point should be output.
                4. If the teacher has not explained the knowledge point, output an empty list for “knowledge_point” and “is_in_knowlege_tree” as “no”.
        Notes:
        1. The knowledge point in the dialogue purpose is usually the core knowledge point, and then it is determined whether the teacher is explaining this knowledge point.
                2. The knowledge state table contains the name of the knowledge point and the mastery status of the student (1 means mastered, 0 means not mastered), as well as the hierarchical structure of the knowledge point (indicated by the # symbol). If the knowledge point is in the state table, please output the name of the knowledge point and the mastery status of the student, e.g. “Definition of the set of real numbers (1).”
        3. A detailed explanation does not refer to asking relevant questions, but rather to explaining the knowledge point. For example, ”It seems that you already have a good understanding of the modulus calculation of complex numbers. Your calculation is correct, the value of $z \\cdot \\bar{{z}}$ is $\\frac{{1}}{{4}}$, so the answer is A” is not teaching; ‘Let's see how we can apply this knowledge to verify whether xxx is an increasing function. We need to calculate the derivative of this function and determine its monotonicity from the sign of the derivative.’ is teaching.
        
        Output:
        The output should be a Markdown code snippet in the following format, including the surrounding tags “```json” and “```”:
        {{
        “is_teaching”: “yes/no”,
        “knowledge_point”: [“Knowledge point (mastery level)”],
        “is_in_knowlege_tree”: “yes/no”,
        }}"""

llm = ChatOpenAI(
    model_name="gpt-3.5-turbo-1106",
    temperature=0,
    openai_api_base="https://api.xty.app/v1",
    max_tokens=4096
)

# Use provided paths
input_file = args.input
output_path = os.path.join(args.output_dir, args.output_file)

# Ensure output directory exists
os.makedirs(args.output_dir, exist_ok=True)

with open(input_file, 'r', encoding='utf-8') as f:
    data = json.load(f)

# 使用追加模式打开文件
with open(output_path, 'a', encoding='utf-8') as f:
    for item in data:
        # 获取必要信息
        number = item.get('number')
        tutor_data = item.get('tutor', [])
        state = item.get('state', '')
        
        # 处理每个步骤
        prompt_list = []
        for step in tutor_data:
            goal = step.get('teacher_goal', '')
            response = step.get('teacher_res', '')
            
            # 生成prompt
            test_prompt = PromptTemplate.from_template(template).partial(
                teacher_dia=response,
                state=state,
                purpose=goal
            ).format()
            
            prompt_list.append(test_prompt)
        
        llm_response = llm.batch(prompt_list)
        result = extract_json(llm_response)
        
        # 构建当前item的结果
        current_result = {
            "number": number,
            "results": result
        }
        
        # 以JSON Lines格式写入
        json.dump(current_result, f, ensure_ascii=False)
        f.write('\n')  # 每个JSON对象后换行
        f.flush()  # 显式刷新文件，确保数据写入磁盘
        print(f"已写入数据: {number}")