"""
Main training script for GLEAM-AI.

This script provides the main entry point for training GLEAM-AI models
with both standard training and active learning capabilities.
"""

import os
import torch
import argparse
from pathlib import Path
from typing import Optional

from .trainer import GLEAMTrainer
from .utils import (
    load_training_config, setup_training_environment, 
    create_experiment_setup, validate_training_config,
    setup_logging, save_training_summary
)
from ..config.settings import load_config_from_yaml


def parse_arguments():
    """Parse command line arguments."""
    parser = argparse.ArgumentParser(description="GLEAM-AI Training Script")
    
    # Configuration
    parser.add_argument("--config", type=str, default="config.yaml",
                       help="Path to configuration file")
    parser.add_argument("--experiment-name", type=str, default="gleam_experiment",
                       help="Name of the experiment")
    parser.add_argument("--output-dir", type=str, default="./experiments",
                       help="Output directory for experiments")
    
    # Training options
    parser.add_argument("--mode", type=str, choices=["standard", "active_learning"], 
                       default="standard", help="Training mode")
    parser.add_argument("--acquisition-type", type=str, 
                       choices=["mean_std", "latent_info_gain"], default="mean_std",
                       help="Acquisition function type for active learning")
    
    # Environment options
    parser.add_argument("--device", type=str, default=None,
                       help="Device to use (cpu, cuda, mps)")
    parser.add_argument("--seed", type=int, default=None,
                       help="Random seed for reproducibility")
    parser.add_argument("--num-threads", type=int, default=None,
                       help="Number of threads for PyTorch")
    
    # Data paths
    parser.add_argument("--meta-path", type=str, default="./meta_data",
                       help="Path to metadata directory")
    parser.add_argument("--data-path", type=str, default="./data",
                       help="Path to data directory")
    parser.add_argument("--src-path", type=str, default="./src",
                       help="Path to source directory")
    parser.add_argument("--population-csv-path", type=str, 
                       default="./meta_data/populations.csv",
                       help="Path to population CSV file")
    
    # Logging
    parser.add_argument("--log-level", type=str, default="INFO",
                       choices=["DEBUG", "INFO", "WARNING", "ERROR"],
                       help="Logging level")
    
    return parser.parse_args()


def main():
    """Main training function."""
    # Parse arguments
    args = parse_arguments()
    
    # Setup logging
    setup_logging(
        log_dir=Path(args.output_dir) / "logs",
        experiment_name=args.experiment_name,
        level=args.log_level
    )
    
    print(f"Starting GLEAM-AI training in {args.mode} mode...")
    print(f"Configuration: {args.config}")
    print(f"Experiment: {args.experiment_name}")
    print(f"Output directory: {args.output_dir}")
    
    try:
        # Load configuration
        model_config, training_config, data_config, active_learning_config = load_training_config(
            args.config
        )
        
        # Setup environment
        device = setup_training_environment(
            seed=args.seed,
            device=args.device,
            num_threads=args.num_threads
        )
        
        # Validate configuration
        if not validate_training_config(
            model_config, training_config, data_config, 
            active_learning_config if args.mode == "active_learning" else None
        ):
            raise ValueError("Configuration validation failed")
        
        # Create experiment setup
        exp_dir = create_experiment_setup(
            base_dir=args.output_dir,
            experiment_name=args.experiment_name,
            config={
                "config_path": args.config,
                "mode": args.mode,
                "acquisition_type": args.acquisition_type,
                "device": device,
                "seed": args.seed,
                "num_threads": args.num_threads,
                "meta_path": args.meta_path,
                "data_path": args.data_path,
                "src_path": args.src_path,
                "population_csv_path": args.population_csv_path
            }
        )
        
        # Create trainer
        trainer = GLEAMTrainer(
            model_config=model_config,
            training_config=training_config,
            data_config=data_config,
            active_learning_config=active_learning_config if args.mode == "active_learning" else None,
            device=device,
            seed=args.seed
        )
        
        # Setup data
        trainer.setup_data(
            meta_path=args.meta_path,
            data_path=args.data_path,
            src_path=args.src_path,
            population_csv_path=args.population_csv_path
        )
        
        # Setup model
        trainer.setup_model(meta_path=args.meta_path)
        
        # Train model
        if args.mode == "standard":
            results = trainer.train(
                output_dir=exp_dir,
                experiment_name=args.experiment_name
            )
            
            print("Standard training completed successfully!")
            print(f"Best model saved at: {results['best_model_path']}")
            
        elif args.mode == "active_learning":
            # Setup active learning
            trainer.setup_active_learning(
                acquisition_type=args.acquisition_type
            )
            
            # Train with active learning
            results = trainer.train_with_active_learning(
                output_dir=exp_dir,
                experiment_name=args.experiment_name
            )
            
            print("Active learning training completed successfully!")
            print(f"Training history: {len(results['training_history'])} iterations")
            print(f"Acquisition history: {len(results['acquisition_history'])} acquisitions")
        
        # Save training summary
        save_training_summary(
            output_dir=exp_dir,
            experiment_name=args.experiment_name,
            config={
                "model_config": model_config.__dict__,
                "training_config": training_config.__dict__,
                "data_config": data_config.__dict__,
                "active_learning_config": active_learning_config.__dict__ if active_learning_config else None,
                "args": vars(args)
            },
            results=results,
            metrics=trainer.get_model_info()
        )
        
        print(f"Training summary saved to: {exp_dir}")
        
    except Exception as e:
        print(f"Training failed with error: {e}")
        raise


def run_standard_training(
    config_path: str = "config.yaml",
    experiment_name: str = "gleam_experiment",
    output_dir: str = "./experiments",
    device: Optional[str] = None,
    seed: Optional[int] = None
):
    """
    Run standard training with minimal configuration.
    
    Args:
        config_path: Path to configuration file
        experiment_name: Name of the experiment
        output_dir: Output directory for experiments
        device: Device to use for training
        seed: Random seed for reproducibility
    """
    # Set up arguments
    import sys
    sys.argv = [
        "main.py",
        "--config", config_path,
        "--experiment-name", experiment_name,
        "--output-dir", output_dir,
        "--mode", "standard"
    ]
    
    if device:
        sys.argv.extend(["--device", device])
    if seed:
        sys.argv.extend(["--seed", str(seed)])
    
    main()


def run_active_learning_training(
    config_path: str = "config.yaml",
    experiment_name: str = "gleam_active_learning",
    output_dir: str = "./experiments",
    acquisition_type: str = "mean_std",
    device: Optional[str] = None,
    seed: Optional[int] = None
):
    """
    Run active learning training with minimal configuration.
    
    Args:
        config_path: Path to configuration file
        experiment_name: Name of the experiment
        output_dir: Output directory for experiments
        acquisition_type: Type of acquisition function
        device: Device to use for training
        seed: Random seed for reproducibility
    """
    # Set up arguments
    import sys
    sys.argv = [
        "main.py",
        "--config", config_path,
        "--experiment-name", experiment_name,
        "--output-dir", output_dir,
        "--mode", "active_learning",
        "--acquisition-type", acquisition_type
    ]
    
    if device:
        sys.argv.extend(["--device", device])
    if seed:
        sys.argv.extend(["--seed", str(seed)])
    
    main()


if __name__ == "__main__":
    main()
