import wandb
import numpy as np
import matplotlib.pyplot as plt
from scipy.ndimage import gaussian_filter1d
from mount_car.utils.utils import gaussian_smooth


# Initialize W&B API
api = wandb.Api()

# Fetch runs from your project
runs = api.runs("albicaron93/MC_EXPLORE")

# Dictionary to store runs
uncer_methods = ['Entropy', 'IG']
noise_models = ['heteroskedastic', 'homoskedastic']
policy_types = ['MPC']

all_runs = {policy_types: {uncer: {noise: [] for noise in noise_models} for uncer in uncer_methods} for policy_types in policy_types}

# The config key that identifies which group the run belongs to.
# Adjust based on how your runs' configs are struct ured.
group_key = "uncertainty_method"

# Define color for each uncertainty method and each noise model combination
colors = {
    'Entropy': {'heteroskedastic': 'lightcoral', 'homoskedastic': 'darkred'},
    'IG': {'heteroskedastic': 'cornflowerblue', 'homoskedastic': 'darkblue'}
}

# ---------------------
# 1) Download data from W&B and group runs
# ---------------------
for run in runs:

    if run.name.startswith('MPC') and run.config[group_key] != 'Error':

        group_value = run.config[group_key]
        pol_name = run.name.split('_')[0]
        noise_model = run.config['noise_model']

        # ownload the full history for this run
        try:
            history = run.history(keys=["Env/Solved at step"])
            solved_ = history["Env/Solved at step"].dropna().to_numpy()
        except KeyError:
            solved_ = np.array([1000])  # If the run did not solve the problem, it is considered as 1000 steps.

        all_runs[pol_name][group_value][noise_model].append(solved_)

# ---------------------
# 2) Work out rewards given the step to solve. In MC every step is -1 while when the problem is solved it is +100.
# ---------------------

print("All runs: ", all_runs)

# Now given all runs, we create a dictionary with the cumulative rewards for each run. The rewards are defined as follows.
# If the problem is solved at step t, then the cumulative reward is -t + 100. If the problem is not solved, then the
# cumulative reward is -1000. We will then take average and 95% confidence intervals of these cumulative rewards.
cumulative_rewards = {policy_types: {uncer: {noise: [] for noise in noise_models} for uncer in uncer_methods} for policy_types in policy_types}

for policy_type in policy_types:
    for uncer in uncer_methods:
        for noise in noise_models:
            for run in all_runs[policy_type][uncer][noise]:
                cumulative_reward = np.zeros(len(run))
                for i, solved_step in enumerate(run):
                    if solved_step == 1000:
                        cumulative_reward[i:] = -1000
                    else:
                        cumulative_reward[i:] = -solved_step + 100
                cumulative_rewards[policy_type][uncer][noise].append(cumulative_reward)

# ---------------------
# 3) Table of results
# ---------------------
# Now we want to create a table of results where we show the average cumulative reward for each configuration, plus
# minus the 95% confidence interval. We will also show the number of runs that solved the problem.

num_runs = len(all_runs[policy_types[0]][uncer_methods[0]][noise_models[0]])

for policy_type in policy_types:
    for uncer in uncer_methods:
        for noise in noise_models:
            rewards = cumulative_rewards[policy_type][uncer][noise]
            avg_rewards = np.mean(rewards, axis=0)
            std_rewards = np.std(rewards, axis=0)
            conf_int = 1.64 * std_rewards / np.sqrt(num_runs)

            # Count number of runs that solved the problem
            solved = [1 for run in all_runs[policy_type][uncer][noise] if run[-1] != 1000]
            num_solved = sum(solved)

            print(f"Policy type: {policy_type}, Uncertainty method: {uncer}, Noise model: {noise}")
            print(f"Average cumulative reward: {avg_rewards[-1]:.2f} +/- {conf_int[-1]:.2f}")
            print(f"Number of runs that solved the problem: {num_solved}/{num_runs}")
            print()

# ---------------------


# Do the same above but instead of cumulative rewards, as a table on steps until solved environment.

for policy_type in policy_types:
    for uncer in uncer_methods:
        for noise in noise_models:
            steps = all_runs[policy_type][uncer][noise]
            avg_steps = np.mean(steps, axis=0)
            std_steps = np.std(steps, axis=0)
            conf_int = 1.64 * std_steps / np.sqrt(num_runs)

            # Count number of runs that solved the problem
            solved = [1 for run in all_runs[policy_type][uncer][noise] if run[-1] != 1000]
            num_solved = sum(solved)

            print(f"Policy type: {policy_type}, Uncertainty method: {uncer}, Noise model: {noise}")
            print(f"Average steps until solved: {avg_steps[-1]:.2f} +/- {conf_int[-1]:.2f}")
            print(f"Number of runs that solved the problem: {num_solved}/{num_runs}")
            print()

print("Done!")
