#######################predict改为history键，看情况改
import json
import csv
import re
import os

def extract_knowledge_values(state_str, predict_str):
    # 提取state中的知识点和值
    def process_state(state_str):
        state_dict = {}
        # 使用正则表达式匹配格式：# 知识点名称 （0/1）
        pattern = re.compile(r'#+\s*(.*?)(\s*（([01])）)')
        matches = pattern.findall(state_str)
        for full_match, value_match, value in matches:
            state_dict[full_match.strip()] = value
        return state_dict

    # 提取predict中的知识点和值
    def process_predict(predict_str):
        predict_dict = {}
        # 使用相同的正则表达式匹配predict
        pattern = re.compile(r'#+\s*(.*?)(\s*（([01])）)')
        matches = pattern.findall(predict_str)
        for name, value_match, value in matches:
            predict_dict[name.strip()] = value
        return predict_dict

    # 获取state和predict的字典
    state_dict = process_state(state_str)
    predict_dict = process_predict(predict_str)

    # 生成CSV数据，确保顺序与state一致
    csv_data = []
    for name, state_value in state_dict.items():
        predict_value = predict_dict.get(name, 'N/A')
        csv_data.append([name, state_value, predict_value])

    return csv_data

def save_to_csv(csv_data, output_file):
    with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['知识点', 'State值', 'Predict值'])
        writer.writerows(csv_data)

def process_json_to_csv(input_json, output_csv):
    # 读取JSON文件
    with open(input_json, 'r', encoding='utf-8') as f:
        data = json.load(f)
    
    # 收集所有CSV数据
    all_csv_data = []
    for example in data:
        state_str = example.get('state', '')
        predict_str = example.get('predict', '')
        # predict_str = example.get('history', '')[-1]
        if state_str and predict_str:  # 确保state和predict都存在
            csv_data = extract_knowledge_values(state_str, predict_str)
            all_csv_data.extend(csv_data)
    
    # 保存到CSV文件
    save_to_csv(all_csv_data, output_csv)

# 示例调用
input_dir = ''
json_files = ['physics.json','chemistry.json']
for json_file in json_files:
    file_path = os.path.join(input_dir, json_file)
    base_name = os.path.splitext(json_file)[0]
    output_file_path = f"/csv_data/{base_name}.csv"


    process_json_to_csv(file_path, output_file_path)

