#!/usr/bin/env python3
"""
Alpha=1.5 Regret Scaling Experiment

Validates √T regret scaling for theory-compliant exploration ratio α = 1.5.

Experiment design:
- Environment: S5 (5-arm Gaussian, K=5, d=3)
- Algorithm: SP-UCB-α=1.5 only
- T values: 100, 200, 500, 1000, 2000, 5000, 10000
- Seeds: 10 (42-51), run in parallel
- ρ: 0.7 (moderately constrained budget)

Total runs: 7 T × 10 seeds = 70 runs

Usage:
    # Run single seed
    python run_alpha15_regret.py --seed 42

    # Run all 10 seeds in parallel (bash)
    for seed in {42..51}; do python run_alpha15_regret.py --seed $seed & done; wait

    # Combine results
    python run_alpha15_regret.py --combine
"""

import json
import argparse
import time
import numpy as np
import pandas as pd
from pathlib import Path

from sp_ucb_olp.data import S5GaussianLoader
from sp_ucb_olp.algorithms import get_algorithm


# Experiment configuration
CONFIG = {
    'T_values': [100, 200, 500, 1000, 2000, 5000, 10000],  # Multiple T for scaling check
    'algorithm': {
        'name': 'SP-UCB-α=1.5',
        'type': 'SP-UCB-OLP',
        'alpha': 1.5,
        'warm_start_rounds': 5,  # 5 samples per arm before decisions
        'solve_frequency': 5,    # Re-solve saddle point every 5 steps
    },
    'scenario': {
        'loader_class': S5GaussianLoader,
        'K': 5,
        'd': 3,
    },
    'rho': 0.7,
    'seeds': list(range(42, 92)),  # 50 seeds
}


def run_single_seed(seed: int, results_dir: Path) -> list:
    """Run all T values for a single seed."""
    np.random.seed(seed)

    K = CONFIG['scenario']['K']
    d = CONFIG['scenario']['d']
    rho = CONFIG['rho']
    alg_config = CONFIG['algorithm']

    results = []

    for T in CONFIG['T_values']:
        print(f"[Seed {seed}] T={T}...", end=" ", flush=True)
        start_time = time.time()

        # Create loader
        loader = S5GaussianLoader(K=K, T=T, d=d, seed=seed)
        B = loader.get_budget(rho)

        # Get oracle values
        oracle_values = loader.get_oracle_values(rho=rho)
        V_mix = oracle_values['V_mix']

        # Reset RNG for algorithm
        np.random.seed(seed)

        # Create algorithm with bounds matching environment
        # c_g = 1/√2 ≈ 0.707 (Hoeffding optimal constant)
        # P_max = 2.0 (capped based on oracle p* ≈ 0.8)
        algorithm = get_algorithm(
            alg_config['type'], K, d, T, B,
            {
                'alpha': alg_config['alpha'],
                'R_max': 2.0,
                'A_max': 2.0,
                'P_max': 2.0,
                'c_g': 0.0707,  # 1/(√2 * 10) - reduced by factor of 10
                'c_0': 1.0,  # Reset to default
                'warm_start_rounds': alg_config.get('warm_start_rounds', 1),
                'solve_frequency': alg_config.get('solve_frequency', 10),
            }
        )

        # Run simulation
        for t in range(T):
            theta, w, p = algorithm.select_config(t)
            r, a = loader.get_arrival(theta, t)
            accept = algorithm.decide_admission(t, theta, r, a, p)
            algorithm.update(t, theta, r, a, accept)

        elapsed_time = time.time() - start_time

        # Collect results
        stats = algorithm.get_statistics()
        total_reward = stats['total_reward']
        regret = T * V_mix - total_reward
        competitive_ratio = total_reward / (T * V_mix) if T * V_mix > 0 else 0

        result = {
            'algorithm': alg_config['name'],
            'T': T,
            'seed': seed,
            'rho': rho,
            'total_reward': total_reward,
            'regret': regret,
            'regret_normalized': regret / np.sqrt(T),  # Regret / √T
            'competitive_ratio': competitive_ratio,
            'acceptance_rate': stats['acceptance_rate'],
            'V_mix': V_mix,
            'T_V_mix': T * V_mix,
            'K': K,
            'd': d,
            'elapsed_time': elapsed_time,
        }
        results.append(result)

        print(f"Regret={regret:.1f}, Regret/√T={regret/np.sqrt(T):.2f}, CR={competitive_ratio:.3f} ({elapsed_time:.1f}s)")

    # Save seed results
    output_file = results_dir / f"alpha15_seed_{seed}.json"
    with open(output_file, 'w') as f:
        json.dump(results, f, indent=2)
    print(f"Saved: {output_file}")

    return results


def combine_results(results_dir: Path):
    """Combine results from all seeds."""
    all_results = []

    for seed in CONFIG['seeds']:
        seed_file = results_dir / f"alpha15_seed_{seed}.json"
        if seed_file.exists():
            with open(seed_file, 'r') as f:
                seed_results = json.load(f)
            all_results.extend(seed_results)
            print(f"Loaded {len(seed_results)} results from seed {seed}")

    if not all_results:
        print("No results found to combine.")
        return

    # Convert to DataFrame
    df = pd.DataFrame(all_results)

    # Save combined CSV
    csv_file = results_dir / "alpha15_regret_per_seed.csv"
    df.to_csv(csv_file, index=False)
    print(f"\nSaved combined results to: {csv_file}")

    # Print summary
    print("\n" + "=" * 70)
    print("ALPHA=1.5 REGRET SCALING SUMMARY")
    print("=" * 70)

    print("\nRegret by T (mean ± std):")
    summary = df.groupby('T').agg({
        'regret': ['mean', 'std'],
        'regret_normalized': ['mean', 'std'],
        'competitive_ratio': ['mean', 'std'],
    }).round(2)
    print(summary.to_string())

    print("\n√T Scaling Check:")
    print("If theory holds, Regret/√T should be approximately constant.")
    for T in CONFIG['T_values']:
        t_data = df[df['T'] == T]['regret_normalized']
        print(f"  T={T:5d}: Regret/√T = {t_data.mean():.2f} ± {t_data.std():.2f}")

    # Save summary
    summary_file = results_dir / "alpha15_summary.csv"
    summary_df = df.groupby('T').agg({
        'regret': ['mean', 'std', 'count'],
        'regret_normalized': ['mean', 'std'],
        'competitive_ratio': ['mean', 'std'],
    }).reset_index()
    summary_df.columns = ['T', 'regret_mean', 'regret_std', 'n_seeds',
                          'regret_norm_mean', 'regret_norm_std',
                          'cr_mean', 'cr_std']
    summary_df.to_csv(summary_file, index=False)
    print(f"\nSaved summary to: {summary_file}")


def main():
    parser = argparse.ArgumentParser(description='Alpha=1.5 Regret Scaling Experiment')
    parser.add_argument('--seed', type=int, default=None,
                        help='Seed to run (42-51). If not specified, prints info.')
    parser.add_argument('--combine', action='store_true',
                        help='Combine results from all seeds')
    parser.add_argument('--results-dir', type=str, default='./results/alpha15_regret',
                        help='Directory for results')
    args = parser.parse_args()

    results_dir = Path(args.results_dir)
    results_dir.mkdir(parents=True, exist_ok=True)

    if args.combine:
        combine_results(results_dir)
    elif args.seed is not None:
        if args.seed not in CONFIG['seeds']:
            print(f"Error: seed must be in {CONFIG['seeds']}")
            return
        run_single_seed(args.seed, results_dir)
    else:
        # Print experiment info
        print("=" * 60)
        print("Alpha=1.5 Regret Scaling Experiment")
        print("=" * 60)
        print(f"Environment: S5 (5-arm Gaussian)")
        print(f"  K = {CONFIG['scenario']['K']} arms")
        print(f"  d = {CONFIG['scenario']['d']} resources")
        print(f"Algorithm: {CONFIG['algorithm']['name']} (α = {CONFIG['algorithm']['alpha']})")
        print(f"T values: {CONFIG['T_values']}")
        print(f"Seeds: {CONFIG['seeds']}")
        print(f"ρ: {CONFIG['rho']}")
        print(f"\nTotal runs: {len(CONFIG['T_values'])} T × {len(CONFIG['seeds'])} seeds = {len(CONFIG['T_values']) * len(CONFIG['seeds'])} runs")
        print("\nTo run:")
        print("  Single seed:     python run_alpha15_regret.py --seed 42")
        print("  All in parallel: for seed in {42..51}; do python run_alpha15_regret.py --seed $seed & done; wait")
        print("  Combine:         python run_alpha15_regret.py --combine")


if __name__ == "__main__":
    main()
