import pandas as pd
from src.early_stop_cot import EarlyStopCoT
from pathlib import Path

rootpath = '/data/project/Reasoning/results/'
model_lis = ['QwQ-32B', 'Qwen3-8B', 'DeepSeek-R1-Distill-Llama-8B']
dataset_lis = ['aime', 'gpqa', 'math', 'minerva', 'olympiadbench']
warmup_lis = [30, 40, 50, 60, 70]
min_slope_lis = [3, 5, 7, 10, 15, 20]
threshold_lis = [0.01, 0.05, 0.1, 0.15, 0.2]

result_lis = []
for model in model_lis:
    for dataset in dataset_lis:
        min_slope = 20
        threshold = 0.05
        path = Path(rootpath) / model / dataset / f'{dataset}_step_results_processed.jsonl'
        # print(f"Processing {path} with warmup={warmup}, threshold={threshold}")
        print(f"Processing {path} with min_slope={min_slope}, threshold={threshold}")
        if not path.exists():
            print(f"File {path} does not exist. Skipping.")
            continue

        early_stop_cot = EarlyStopCoT(path)
        early_stop_cot.early_stop_slope(min_slope=min_slope, threshold=threshold)  # slope版本

        print_info = f"{model} on {dataset}"
        generated_answer = [answer for data in early_stop_cot.data for answer in data['generated_answer']]
        es_answer = [answer for data in early_stop_cot.data for answer in data['early_stop_answer']]
        # 计算对应元素相等的比率
        ratio = sum(g == es for g, es in zip(generated_answer, es_answer)) / len(generated_answer)
        result_lis.append({
            'model': model,
            'dataset': dataset,
            'min_slope': min_slope,
            'threshold': threshold,
            'total_samples': len(generated_answer),
            'ratio': ratio,
        })
df = pd.DataFrame(result_lis)
df.to_csv('/data/project/Reasoning/results/table2.csv', index=False)

