import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from scipy.stats import ttest_ind

# Helper function for printing insights and creating plots
def plot_and_print(title, xlabel, ylabel, x_data, y_data, kind='bar', rotation=0):
    """
    A helper function to print insights and create plots for the analysis.
    """
    plt.figure(figsize=(10, 6))
    if kind == 'bar':
        plt.bar(x_data, y_data)
    elif kind == 'line':
        plt.plot(x_data, y_data, marker='o')
    elif kind == 'scatter':
        plt.scatter(x_data, y_data)
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.xticks(rotation=rotation)
    plt.tight_layout()
    plt.show()

def compare_success_rates(df, solvers=None):
    if solvers:
        df = df[df['Solver'].isin(solvers)]
    
    result = df.groupby('Solver')['Success Rate'].mean().reset_index()
    
    # Print insights
    print("Success Rates by Solver:")
    print(result)
    
    # Plot insights
    plot_and_print(
        title='Average Success Rate by Solver',
        xlabel='Solver',
        ylabel='Success Rate',
        x_data=result['Solver'],
        y_data=result['Success Rate'],
        kind='bar'
    )

def compare_mistakes(df, solvers=None):
    if solvers:
        df = df[df['Solver'].isin(solvers)]
    
    result = df.groupby('Solver')['Average Mistakes'].mean().reset_index()
    
    # Print insights
    print("Average Mistakes by Solver:")
    print(result)
    
    # Plot insights
    plot_and_print(
        title='Average Mistakes by Solver',
        xlabel='Solver',
        ylabel='Average Mistakes',
        x_data=result['Solver'],
        y_data=result['Average Mistakes'],
        kind='bar'
    )

def compare_efficiency(df, solvers=None):
    if solvers:
        df = df[df['Solver'].isin(solvers)]
    
    result = df.groupby('Solver')['Average Conversation Length'].mean().reset_index()
    
    # Print insights
    print("Average Conversation Length by Solver (Efficiency):")
    print(result)
    
    # Plot insights
    plot_and_print(
        title='Average Conversation Length by Solver',
        xlabel='Solver',
        ylabel='Average Conversation Length',
        x_data=result['Solver'],
        y_data=result['Average Conversation Length'],
        kind='bar'
    )

def analyze_expert_impact(df, solvers=None, experts=None):
    if solvers:
        df = df[df['Solver'].isin(solvers)]
    if experts:
        df = df[df['Expert'].isin(experts)]
    
    result = df.groupby(['Solver', 'Expert']).agg({
        'Success Rate': 'mean',
        'Average Mistakes': 'mean',
        'Average Partial Score': 'mean'
    }).reset_index()

    # Print insights
    print("Expert Impact on Solvers:")
    print(result)
    
    # Plot insights
    for metric in ['Success Rate', 'Average Mistakes', 'Average Partial Score']:
        plot_and_print(
            title=f'{metric} by Solver and Expert',
            xlabel='Solver - Expert',
            ylabel=metric,
            x_data=result['Solver'] + " - " + result['Expert'],
            y_data=result[metric],
            kind='bar',
            rotation=45
        )

def analyze_puzzle_performance(df, puzzles=None, solvers=None, experts=None):
    if puzzles:
        df = df[df['Puzzle'].isin(puzzles)]
    if solvers:
        df = df[df['Solver'].isin(solvers)]
    if experts:
        df = df[df['Expert'].isin(experts)]
    
    result = df.groupby('Puzzle').agg({
        'Success Rate': 'mean',
        'Average Mistakes': 'mean',
        'Highest Partial Score': 'mean',
        'Average Partial Score': 'mean'
    }).reset_index()

    # Print insights
    print("Puzzle-Specific Performance:")
    print(result)
    
    # Plot insights for each metric
    for metric in ['Success Rate', 'Average Mistakes', 'Highest Partial Score', 'Average Partial Score']:
        plot_and_print(
            title=f'{metric} by Puzzle',
            xlabel='Puzzle',
            ylabel=metric,
            x_data=result['Puzzle'],
            y_data=result[metric],
            kind='bar',
            rotation=45
        )

def analyze_conversation_length(df, solvers=None, experts=None):
    if solvers:
        df = df[df['Solver'].isin(solvers)]
    if experts:
        df = df[df['Expert'].isin(experts)]
    
    result = df.groupby(['Solver', 'Expert'])['Average Conversation Length'].mean().reset_index()

    # Print insights
    print("Average Conversation Length by Solver and Expert:")
    print(result)
    
    # Plot insights
    plot_and_print(
        title='Average Conversation Length by Solver and Expert',
        xlabel='Solver - Expert',
        ylabel='Conversation Length',
        x_data=result['Solver'] + " - " + result['Expert'],
        y_data=result['Average Conversation Length'],
        kind='bar',
        rotation=45
    )

def cluster_puzzles(df, num_clusters=3):
    puzzle_data = df.groupby('Puzzle').agg({
        'Success Rate': 'mean',
        'Average Mistakes': 'mean',
        'Average Partial Score': 'mean'
    }).reset_index()

    # Apply KMeans clustering
    kmeans = KMeans(n_clusters=num_clusters)
    puzzle_data['Cluster'] = kmeans.fit_predict(puzzle_data[['Success Rate', 'Average Mistakes', 'Average Partial Score']])
    
    # Print insights
    print("Puzzle Clustering:")
    print(puzzle_data)
    
    # Plot the clusters
    plot_and_print(
        title='Puzzle Clustering (2D Projection)',
        xlabel='Success Rate',
        ylabel='Average Partial Score',
        x_data=puzzle_data['Success Rate'],
        y_data=puzzle_data['Average Partial Score'],
        kind='scatter'
    )
    
    return puzzle_data

def compare_expert_influence(df, experts=None):
    if experts:
        df = df[df['Expert'].isin(experts)]
    
    result = df.groupby('Expert')['Success Rate'].mean().reset_index()

    # Print insights
    print("Success Rate by Expert:")
    print(result)
    
    # Plot insights
    plot_and_print(
        title='Average Success Rate by Expert',
        xlabel='Expert',
        ylabel='Success Rate',
        x_data=result['Expert'],
        y_data=result['Success Rate'],
        kind='bar'
    )

def t_test_solvers(df, solver1, solver2):
    data1 = df[df['Solver'] == solver1]['Success Rate']
    data2 = df[df['Solver'] == solver2]['Success Rate']
    
    t_stat, p_value = ttest_ind(data1, data2, equal_var=False)  # Welch's t-test
    
    # Print the results
    print(f"T-test between {solver1} and {solver2}:")
    print(f"T-statistic: {t_stat}, P-value: {p_value}")
    
    return p_value

