import os
import numpy as np
from scipy import stats
import pandas as pd
from scipy.stats import shapiro, levene, f_oneway, ttest_rel

def load_configs():
    """加载问题配置"""
    with open('problem_configs.txt', 'r', encoding='utf-8') as f:
        config_content = f.read()
    config_content = config_content.replace("problem_configs = ", "")
    return eval(config_content)

def load_probability_data():
    """从probability文件夹加载所有问题的概率数据"""
    problem_configs = load_configs()
    prob_dir = "probability"
    
    # 用于存储每个action_model的所有概率数据
    model_probs = {i: [] for i in range(6)}
    
    for problem_name in problem_configs.keys():
        for action_model in range(6):
            prob_file = os.path.join(prob_dir, f"{problem_name}_action{action_model}_probability.txt")
            if not os.path.exists(prob_file):
                continue
                
            # 读取概率数据
            with open(prob_file, 'r') as f:
                lines = f.readlines()[1:]  # 跳过标题行
                probs = [float(line.strip().split('\t')[1]) for line in lines]
                # 使用平均概率来代表这个轨迹
                model_probs[action_model].append(np.mean(probs))
    
    return model_probs

def check_assumptions(model_probs):
    """检查正态性和方差齐性假设"""
    print("\n正态性检验 (Shapiro-Wilk test):")
    print("-" * 50)
    normality_results = {}
    for model in range(6):
        stat, p_value = shapiro(model_probs[model])
        normality_results[model] = p_value > 0.05
        print(f"Action Model {model}: statistic={stat:.4f}, p-value={p_value:.4f}")
        print(f"数据是否服从正态分布: {'是' if p_value > 0.05 else '否'}")
    
    print("\n方差齐性检验 (Levene test):")
    print("-" * 50)
    stat, p_value = levene(*[model_probs[i] for i in range(6)])
    print(f"Levene test: statistic={stat:.4f}, p-value={p_value:.4f}")
    print(f"各组方差是否相等: {'是' if p_value > 0.05 else '否'}")
    
    return all(normality_results.values()), p_value > 0.05

def perform_anova(model_probs):
    """执行单因素方差分析或Kruskal-Wallis检验"""
    # ANOVA检验
    print("\n单因素方差分析 (ANOVA):")
    print("-" * 50)
    f_stat, p_value = f_oneway(*[model_probs[i] for i in range(5)])
    print(f"F-statistic={f_stat:.4f}, p-value={p_value:.4f}")
    print(f"不同方法是否存在显著差异: {'是' if p_value < 0.05 else '否'}")
    
    # Kruskal-Wallis检验
    print("\nKruskal-Wallis H-test:")
    print("-" * 50)
    h_stat, p_value = stats.kruskal(*[model_probs[i] for i in range(6)])
    print(f"H-statistic={h_stat:.4f}, p-value={p_value:.4f}")
    print(f"不同方法是否存在显著差异: {'是' if p_value < 0.05 else '否'}")

def compare_with_honest(model_probs):
    """与诚实代理进行比较"""
    print("\n与诚实代理的单侧配对t检验:")
    print("-" * 50)
    honest_probs = np.array(model_probs[0])
    
    for model in range(1, 6):
        current_probs = np.array(model_probs[model])
        t_stat, p_value = ttest_rel(current_probs, honest_probs)
        # 由于我们想要检验是否显著低于诚实代理，所以使用单侧p值
        p_value_one_sided = p_value / 2 if t_stat < 0 else 1 - p_value / 2
        
        print(f"\nAction Model {model} vs Honest:")
        print(f"t-statistic={t_stat:.4f}, p-value={p_value_one_sided:.4f}")
        print(f"平均概率: {np.mean(current_probs):.4f} vs {np.mean(honest_probs):.4f}")
        print(f"是否显著低于诚实代理: {'是' if p_value_one_sided < 0.05 else '否'}")
        if p_value_one_sided < 0.05:
            confidence = (1 - p_value_one_sided) * 100
            print(f"置信度: {confidence:.2f}%")

def pairwise_comparison(model_probs):
    """两两比较不同方法"""
    print("\n方法间的两两比较 (双向单侧配对t检验):")
    print("-" * 50)
    
    # 生成所有可能的两两组合
    comparisons = []
    for i in range(6):
        for j in range(i+1, 6):
            comparisons.append((i,j))
    
    for model1, model2 in comparisons:
        probs1 = np.array(model_probs[model1])
        probs2 = np.array(model_probs[model2])
        mean1 = np.mean(probs1)
        mean2 = np.mean(probs2)
        
        # 执行配对t检验
        t_stat, p_value = ttest_rel(probs1, probs2)
        
        print(f"\nAction Model {model1} vs {model2}:")
        print(f"t-statistic={t_stat:.4f}")
        print(f"平均概率: {mean1:.4f} vs {mean2:.4f}")
        
        # 检验model1是否显著低于model2
        p_value_one_sided_1 = p_value / 2 if t_stat < 0 else 1 - p_value / 2
        if p_value_one_sided_1 < 0.05:
            confidence = (1 - p_value_one_sided_1) * 100
            print(f"Action Model {model1} 显著低于 Model {model2}，置信度: {confidence:.2f}%")
        
        # 检验model2是否显著低于model1
        p_value_one_sided_2 = p_value / 2 if t_stat > 0 else 1 - p_value / 2
        if p_value_one_sided_2 < 0.05:
            confidence = (1 - p_value_one_sided_2) * 100
            print(f"Action Model {model2} 显著低于 Model {model1}，置信度: {confidence:.2f}%")
        
        if p_value_one_sided_1 >= 0.05 and p_value_one_sided_2 >= 0.05:
            print("两个模型之间没有显著差异")

def main():
    """主函数"""
    print("加载数据...")
    model_probs = load_probability_data()
    
    print("\n开始统计分析...")
    print("=" * 50)
    
    # 检查假设
    normality_ok, variance_ok = check_assumptions(model_probs)
    
    # 无论是否满足假设，都进行两种检验
    perform_anova(model_probs)
    if not (normality_ok and variance_ok):
        print("\n注意：由于数据不满足ANOVA检验的假设，")
        print("建议主要参考Kruskal-Wallis检验的结果")
    
    # 与诚实代理比较
    compare_with_honest(model_probs)
    
    # 两两比较
    pairwise_comparison(model_probs)
    
    print("\n分析完成！")

if __name__ == "__main__":
    main()
