import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np

# Example data
results_avalon = {
    'RL-Training': [-0.058823529, -0.125, -0.142857143, -0.333333333, -0.473684211, -0.529411765, -0.538461538,
                    -0.545454545, -0.555555556, -0.714285714],
    'LLM-Improvement': [0.125, 0, -0.090909, -0.1, -0.2, -0.259259, -0.285714286, -0.411764706, -0.529411765, -0.6521739],
    'Random Rollout': [-0.538461538]
}

results_gops = {
    'RL-Training': [0.968421053, 0.306122449, 0.077669903, -0.009259259, -0.336956522, -0.545454545,
                    -0.714285714, -1.010752688, -1.148148148, -1.478991597],
    'LLM-Improvement': [2.05, 1.3333333, 1.183673469, 0.78095238, 0.64150943, 0.5494505, 0.50617284,
                        -0.303571429, -1.18478261, -2.27956989],
    'Random Rollout': [0.430107527]
}

means_avalon = {'RL-Training': np.mean(results_avalon['RL-Training']), 'LLM-Improvement': np.mean(results_avalon['LLM-Improvement']), 'Random Rollout': np.mean(results_avalon['Random Rollout'])}
stds_avalon = {'RL-Training': np.std(results_avalon['RL-Training']), 'LLM-Improvement': np.std(results_avalon['LLM-Improvement']), 'Random Rollout': np.std(results_avalon['Random Rollout'])}
print('means_avalon = ', means_avalon)
print('stds_avalon = ', stds_avalon)
print()

means_gops = {'RL-Training': np.mean(results_gops['RL-Training']), 'LLM-Improvement': np.mean(results_gops['LLM-Improvement']), 'Random Rollout': np.mean(results_gops['Random Rollout'])}
stds_gops = {'RL-Training': np.std(results_gops['RL-Training']), 'LLM-Improvement': np.std(results_gops['LLM-Improvement']), 'Random Rollout': np.std(results_gops['Random Rollout'])}
print('means_gops = ', means_gops)
print('stds_gops = ', stds_gops)

# Modified prepare_data function
def prepare_data(results):
    data = []
    labels = []
    hue_labels = []
    for key, values in results.items():
        if key != 'Random Rollout':
            data.extend(values)
            labels.extend(['Data'] * len(values))
            hue_labels.extend([key] * len(values))
    return pd.DataFrame({'Data': data, 'Category': labels, 'Type': hue_labels})

# Define custom palette
palette = {'RL-Training': 'LightBlue', 'LLM-Improvement': 'LightGreen'}

# Create DataFrames
df_avalon = prepare_data(results_avalon)
df_gops = prepare_data(results_gops)

# Plot for Avalon
plt.figure(figsize=(12, 10))
sns.boxplot(x='Category', y='Data', hue='Type', data=df_avalon, palette=palette, width=0.1, linecolor='black', linewidth=6)
plt.axhline(y=results_avalon['Random Rollout'][0], color='red', linestyle='--', label='Random Rollout', linewidth=6)
plt.xlabel('Avalon', fontsize=28)
plt.ylabel('Score', fontsize=28)
plt.xticks([])  # Remove x-axis ticks
plt.yticks(fontsize=28)
plt.xlim(-0.15, 0.15)
plt.legend(loc='upper left', fontsize=26)
plt.savefig('LLM_RL_Random_avalon.png')

# Plot for GOPS
plt.figure(figsize=(12, 10))
sns.boxplot(x='Category', y='Data', hue='Type', data=df_gops, palette=palette, width=0.1, linecolor='black', linewidth=6)
plt.axhline(y=results_gops['Random Rollout'][0], color='red', linestyle='--', label='Random Rollout', linewidth=6)
plt.xlabel('GOPS', fontsize=28)
plt.ylabel('Score', fontsize=28)
plt.xticks([])  # Remove x-axis ticks
plt.yticks(fontsize=28)
plt.xlim(-0.15, 0.15)
plt.legend(loc='upper left', fontsize=26)
plt.savefig('LLM_RL_Random_gops.png')