import os
import json
# import t_test
from scipy import stats
from rich import print
import numpy as np
import statsmodels.stats.power as smp

if __name__ == '__main__':
    cot_file = "redacted/CoT_Prompt_gpt4o/eval.jsonl"
    with open(cot_file, 'r', encoding='utf-8') as f:
       cot_data = [json.loads(line) for line in f]
       
    zero_shot_file = "redacted/Zeroshot_Prompt_gpt4o/outputs.jsonl"
    with open(zero_shot_file, 'r', encoding='utf-8') as f:
        zero_shot_data = [json.loads(line) for line in f]

    cot_acc = []
    zero_shot_acc = []
    for cot, zero_shot in zip(cot_data, zero_shot_data):
        cot_acc.append(sum(cot['datapoint_results'])/len(cot['datapoint_results']))
        zero_shot_acc.append(sum(zero_shot['accuracy'])/len(zero_shot['accuracy']))
    
    t_stat, p_val = stats.ttest_rel(cot_acc, zero_shot_acc)
    print(f"t_stat: {t_stat}")
    print(f"p_val: {p_val}")
    
    # Power analysis
    differences = np.array(cot_acc) - np.array(zero_shot_acc)
    mean_diff = np.mean(differences)
    std_diff = np.std(differences, ddof=1)
    cohen_d = mean_diff / std_diff
    print(f"Cohen's d: {cohen_d}")
    
    

    # Perform power analysis
    power_analysis = smp.TTestPower()
    observed_power = power_analysis.solve_power(effect_size=cohen_d, 
                                                nobs=len(cot_acc), 
                                                alpha=0.05, 
                                                alternative='two-sided')
    print(f"Observed Power: {observed_power}")

    