import numpy as np
import pandas as pd
from tslearn.datasets import UCR_UEA_datasets
from tslearn.barycenters import (
    softdtw_barycenter,
    dtw_barycenter_averaging,
    dtw_barycenter_averaging_subgradient
)
from ns_numba_ops import barycenter as nsdtw_barycenter
from ns_numba_ops import sdtw_value_and_grad
from tslearn.metrics import dtw
from collections import Counter




# --- Settings ---
gammas = [0.1, 0.01, 0.001, 0.0001]  # for both nsdtw and softdtw
max_iter = 50
np.random.seed(42)

ucr = UCR_UEA_datasets()

def my_value_and_grad(X, Y, gamma):
    """Wrapper for nsdtw value and gradient with specific gamma."""
    return sdtw_value_and_grad(X, Y, gamma=gamma)

def choose_max_class(X, y, max_series=10):
    """Select up to max_series samples from the most frequent class."""
    class_counts = Counter(y)
    majority_class = class_counts.most_common(1)[0][0]
    idx = np.where(y == majority_class)[0][:max_series]
    return X[idx]

def process_dataset(name, max_series=10):
    """Compute barycenters and DTW losses for one dataset."""
    X_train, y_train, _, _ = ucr.load_dataset(name)


    # Select subset from majority class
    X_subset = choose_max_class(X_train, y_train, max_series=max_series)

    # Initializations
    euclidean_init = np.mean(X_subset, axis=0)

    results = {}

    # --- NS-DTW Barycenters ---
    for gamma in gammas:
        results[f"nsdtw_euclidean_gamma{gamma}"] = nsdtw_barycenter(
            X_subset, max_iter=max_iter, X_init="euclidean_mean",
            value_and_grad=lambda X, Y: my_value_and_grad(X, Y, gamma)
        )


    X_subset = X_subset.squeeze()

    # --- Soft-DTW Barycenters ---
    for gamma in gammas:
        results[f"softdtw_euclidean_gamma{gamma}"] = softdtw_barycenter(
            X_subset, max_iter=max_iter, init=None, gamma=gamma
        )


    # --- DBA ---
    results["dba_euclidean"] = dtw_barycenter_averaging(
        X_subset, init_barycenter=euclidean_init, max_iter=max_iter
    )

    # --- Subgradient DBA ---
    results["subgradient_euclidean"] = dtw_barycenter_averaging_subgradient(
        X_subset, init_barycenter=euclidean_init.reshape(-1, 1), max_iter=max_iter
    )

    # --- Evaluate DTW losses ---
    losses = {
        method: np.mean([dtw(bary, ts) for ts in X_subset])
        for method, bary in results.items()
    }

    return losses

# --- Main loop over datasets ---
all_results = []

lists = ['Adiac',
'ArrowHead', 'Beef', 'BeetleFly', 'BirdChicken',  'Car', 'CBF',
'ChlorineConcentration', 'CinCECGTorso', 'Coffee', 'Computers', 'CricketX', 'CricketY',
'CricketZ',  'DiatomSizeReduction', 'DistalPhalanxOutlineAgeGroup', 'DistalPhalanxOutlineCorrect',
'DistalPhalanxTW',  'Earthquakes', 'ECG200',
'ECG5000', 'ECGFiveDays',
'FaceAll', 'FaceFour', 'FacesUCR',    'GunPoint',
'Ham',     'MedicalImages',  'MiddlePhalanxOutlineAgeGroup',
'MiddlePhalanxOutlineCorrect', 'MiddlePhalanxTW', 'MoteStrain',
'ProximalPhalanxTW', 'RefrigerationDevices',  'ScreenType',  'ShapeletSim', 'ShapesAll', 'SmallKitchenAppliances',
 'SonyAIBORobotSurface1', 'SonyAIBORobotSurface2',  'SyntheticControl',
 'Trace', 'TwoLeadECG',
 'Wine', 'WordSynonyms', 'Worms', 'WormsTwoClass']

for dataset_name in lists:
    print("-" * 10, dataset_name, "-" * 10)
    losses = process_dataset(dataset_name, max_series=10)
    for method, loss in losses.items():
        all_results.append({
            "Dataset": dataset_name,
            "Method": method,
            "Average_DTW_Loss": loss
        })
    print(f" {dataset_name} processed.")



# After loop
df_all = pd.DataFrame(all_results)

# Pivot to wide format
df_wide = df_all.pivot(index="Dataset", columns="Method", values="Average_DTW_Loss")

# Save to CSV
df_wide.to_csv("ucr_barycenter_dtw_losses.csv")



