import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
import json
from utils import load_eval_data
from deepscaler.rewards.math_utils.utils import extract_answer, grade_answer_sympy as grade_answer
import pandas as pd
import json


# data_paths = [
#     "processed_data/aime25.parquet",
#     "processed_data/mix_mathematic_problems.parquet",
# ]


# for data_path in data_paths:
#     df = pd.read_parquet(data_path, engine="pyarrow")  # 或 fastparquet

#     json_str = df.to_json(orient="records")

#     json_data = json.loads(json_str)

#     print(json_data[1].keys())  # 打印第一行的数据
#     print(json_data[1])
#     print("-"*10)


# for data_path in data_paths:
#     data = json.load(open(data_path, "r"))
#     print(data[0].keys())
# data_path = "model_eval/DeepSeek-R1-Distill-Qwen-7B/gsm8k.json"
data_path_long = "model_eval/DeepSeek-R1-Distill-Qwen-7B/mix_mathematic_problems.json"
data_path_short = "model_eval/Deepseek-Qwen-7B-Short-COT/mix_mathematic_problems.json"

data_long = load_eval_data(data_path_long)
data_short = load_eval_data(data_path_short)

print(data_long[0].keys())
print(data_long[0]['prompt'])
# check the order of problem
for index in range(0,2500):
    problem_long = data_long[index]['prompt']
    problem_short = data_short[index]['prompt']
    if problem_long != problem_short:
        print("problem not equal at index:", index)
        print(problem_long)
        print(problem_short)
        break
    else:
        print("problem equal")
        print(problem_long)
        print(problem_short)
        print("-"*100)
        input("continue?")

# data = load_eval_data(data_path)
# with open(data_path, 'r', encoding='utf-8') as f:
#     data = [json.loads(line) for line in f]

# 现在 data 是 list of dicts，每个元素是一行 JSON
# print(data[0])  #
# print(list(data[0].keys()))
# print(len(data[0]['responses']))
# print("len data:",len(data))
# print(data[0]['reward_model']['ground_truth'])
# input("continue?")

# for item in data:
#     for index in range(len(item['responses'])):
#         response = item['responses'][index]
#         print(response)
#         print(f"[{index}]","gt answer:", item['reward_model']['ground_truth'], item['correctness'])
#         print("-"*100)
#         input("continue?")


# with open(data_path, "r") as f:
#     data = json.load(f)

# print(data[0])
# print(data[0].keys())