"""
This is the single entry point for running prompt optimisation experiments.
It replaces all individual example scripts with a unified, configuration-driven approach
that supports both legacy and new step-based experiment definitions.

Features:
- Dynamic experiment construction from configuration
- Support for both legacy and step-based configurations
- Comprehensive error handling and validation
- Integrated logging system
- Flexible CLI interface
- Automatic dependency resolution for steps

Usage:
    python app.py --config gsm8k.yaml
    python app.py --config aqua.yaml --verbose
    python app.py --config test_step_based.yaml --quiet
"""

import json
import sys
import time
import traceback
from pathlib import Path
from typing import Any, Dict

# Import all implementations to trigger registration
import src.core.registry  # noqa: F401
import src.embeddings  # noqa: F401
import src.evaluation  # noqa: F401
import src.llm  # noqa: F401
import src.prompt_optimisation  # noqa: F401
import src.tasks  # noqa: F401

# Import core components
from src.core.config import config_manager
from src.core.dynamic_experiment import DynamicExperimentRunner
from src.core.experiment import ExperimentRunner
from src.core.registry import (
    llm_registry,
    prompt_optimiser_registry,
    task_registry,
)
from src.utils.cli import configure_logging, default_parse_args
from src.utils.decorator_utils import with_logger
from src.utils.error_tracking import ErrorTracker

# Import logging utils first to set up global logger before other imports
from src.utils.logging_utils import log_evaluation_score, log_prompt_usage, setup_logger

# Global logger - will be set up after CLI args are parsed
logger = None


class ExperimentError(Exception):
    """Custom exception for experiment-related errors."""

    pass


class ConfigurationError(Exception):
    """Custom exception for configuration-related errors."""

    pass


@with_logger
def validate_configuration(config: Dict[str, Any]) -> None:
    """
    Validate the experiment configuration.

    Args:
        config: The loaded configuration dictionary

    Raises:
        ConfigurationError: If the configuration is invalid
    """
    # Check required top-level keys
    if "experiment" not in config:
        raise ConfigurationError("Configuration missing required 'experiment' section")

    if "name" not in config["experiment"]:
        raise ConfigurationError(
            "Experiment configuration missing required 'name' field"
        )

    # Validate step-based configuration if present
    if "steps" in config:
        try:
            # Use ConfigManager's validation method
            is_valid = config_manager.validate_step_based_config(config)
            if not is_valid:
                raise ConfigurationError("Step-based configuration validation failed")
        except Exception as e:
            raise ConfigurationError(
                f"Step-based configuration validation error: {str(e)}"
            )
    else:
        logger.debug("Using legacy configuration format")
        # Validate legacy configuration requirements
        required_sections = ["llm", "task"]
        for section in required_sections:
            if section not in config:
                raise ConfigurationError(
                    f"Legacy configuration missing required '{section}' section"
                )
            if "default" not in config[section]:
                raise ConfigurationError(
                    f"Section '{section}' missing required 'default' field"
                )

    logger.info("Configuration validation passed")


@with_logger
def create_experiment_runner(
    config: Dict[str, Any], output_dir: Path
) -> ExperimentRunner:
    """
    Create the appropriate experiment runner based on configuration.

    Args:
        config: The loaded configuration dictionary
        output_dir: Directory for experiment outputs

    Returns:
        An ExperimentRunner instance (either DynamicExperimentRunner or ExperimentRunner)
    """
    experiment_name = config["experiment"]["name"]

    # Check if this is a step-based configuration
    if "steps" in config and config["steps"]:
        logger.debug(
            f"Creating DynamicExperimentRunner for step-based experiment: {experiment_name}"
        )

        # Create and validate the dynamic experiment runner
        runner = DynamicExperimentRunner(
            name=experiment_name, config=config, output_dir=str(output_dir)
        )

        # Validate the configuration
        is_valid, errors = runner.validate_configuration()
        if not is_valid:
            error_msg = "Dynamic experiment validation failed:\n" + "\n".join(
                f"  - {error}" for error in errors
            )
            raise ConfigurationError(error_msg)

        return runner
    else:
        logger.debug(
            f"Creating legacy ExperimentRunner for experiment: {experiment_name}"
        )

        # Create legacy experiment runner and add steps manually
        runner = ExperimentRunner(
            name=experiment_name, config=config, output_dir=str(output_dir)
        )

        # Add legacy steps
        _add_legacy_steps(runner, config)

        return runner


@with_logger
def _add_legacy_steps(runner: ExperimentRunner, config: Dict[str, Any]) -> None:
    """
    Add legacy experiment steps to the runner.

    Args:
        runner: The ExperimentRunner instance
        config: The configuration dictionary
    """
    from app import (
        evaluate_results,
        init_llm,
        init_prompt_optimiser,
        init_task,
        optimise_prompt,
        run_optimised_prompt,
    )

    logger.debug("Adding legacy experiment steps")

    # Add the standard legacy steps
    runner.add_step("init_llm", init_llm, config=config)
    runner.add_step("init_task", init_task, config=config)
    runner.add_step("init_prompt_optimiser", init_prompt_optimiser, config=config)
    runner.add_step("optimise_prompt", optimise_prompt)
    runner.add_step("run_optimised_prompt", run_optimised_prompt)
    runner.add_step("evaluate_results", evaluate_results)

    logger.debug("Legacy steps added successfully")


def setup_output_directory(experiment_name: str) -> Path:
    """
    Set up the output directory for the experiment.

    Args:
        experiment_name: Name of the experiment

    Returns:
        Path to the output directory
    """
    timestamp = time.strftime("%y%m%d-%H%M%S")
    output_dir = Path("output")
    experiment_output_dir = output_dir / f"{experiment_name}_{timestamp}"
    experiment_output_dir.mkdir(parents=True, exist_ok=True)

    return experiment_output_dir


@with_logger
def log_experiment_summary(runner: ExperimentRunner, results: Dict[str, Any]) -> None:
    """
    Log a summary of the experiment results.

    Args:
        runner: The experiment runner
        results: The experiment results
    """
    logger.info("=" * 60)
    logger.info("EXPERIMENT SUMMARY")
    logger.info("=" * 60)
    logger.info(f"Experiment: {runner.name}")
    logger.info(f"Duration: {runner.metrics.get('duration', 0):.2f} seconds")

    if isinstance(runner, DynamicExperimentRunner):
        logger.info(f"Total steps: {runner.metrics.get('total_steps', 0)}")
        logger.info(f"Successful steps: {runner.metrics.get('successful_steps', 0)}")
        failed_steps = runner.metrics.get("failed_optional_steps", 0)
        if failed_steps > 0:
            logger.info(f"Failed optional steps: {failed_steps}")

    # Log key results if available
    if "evaluate_results" in results:
        eval_results = results["evaluate_results"]
        if isinstance(eval_results, dict):
            if "base_score" in eval_results and "optimised_score" in eval_results:
                logger.info(f"Base Score: {eval_results['base_score']:.4f}")
                logger.info(f"Optimised Score: {eval_results['optimised_score']:.4f}")
                logger.info(f"Improvement: {eval_results.get('improvement', 0):.4f}")
            elif "score" in eval_results:
                logger.info(f"Final Score: {eval_results['score']:.4f}")

    logger.info("=" * 60)


@with_logger
def handle_experiment_error(error: Exception, config_path: str) -> None:
    """
    Handle and log experiment errors appropriately.

    Args:
        error: The exception that occurred
        config_path: Path to the configuration file
    """
    if isinstance(error, ConfigurationError):
        logger.error(f"Configuration Error in {config_path}:")
        logger.error(f"  {str(error)}")
        logger.error("Please check your configuration file and try again.")
    elif isinstance(error, ExperimentError):
        logger.error("Experiment Error:")
        logger.error(f"  {str(error)}")
    elif isinstance(error, FileNotFoundError):
        logger.error("File Not Found Error:")
        logger.error(f"  {str(error)}")
        logger.error("Please check that all required files exist.")
    else:
        logger.error(f"Unexpected Error: {type(error).__name__}")
        logger.error(f"  {str(error)}")
        logger.error("Full traceback:")
        logger.error(traceback.format_exc())


@with_logger
def _extract_error_statistics(
    results: Dict[str, Any], metrics: Dict[str, Any]
) -> Dict[str, Any]:
    """
    Extract error statistics from experiment results and metrics.

    Args:
        results: The experiment results dictionary
        metrics: The experiment metrics dictionary

    Returns:
        Dictionary containing error statistics, or empty dict if no errors
    """
    error_stats = {}

    # First, check if metrics already contain error statistics
    error_metrics_keys = [
        "failed_llm_calls",
        "error_types",
        "errors",
    ]
    found_error_metrics = False

    for key in error_metrics_keys:
        if key in metrics:
            error_stats[key] = metrics[key]
            found_error_metrics = True

    if found_error_metrics:
        logger.info(
            f"Found error statistics in metrics: {error_stats.get('failed_llm_calls', 0)} failed calls"
        )
        return error_stats

    # Fallback: Check if any step results contain error tracker information
    # This handles cases where error tracking is done but not integrated
    for step_name, result in results.items():
        if isinstance(result, tuple) and len(result) >= 3:
            if isinstance(result[2], ErrorTracker):
                error_tracker = result[2]
                if error_tracker.get_error_count() > 0:
                    logger.info(
                        f"Found error tracker in step '{step_name}' with {error_tracker.get_error_count()} errors"
                    )
                    error_summary = error_tracker.get_summary()
                    return error_summary

    return {}


def main():
    """
    Main entry point for the consolidated experiment runner.
    """
    global logger

    try:
        # Parse command-line arguments
        args = default_parse_args(
            description="Run a prompt optimization experiment using dynamic configuration",
        )

        # Configure logging based on CLI arguments
        configure_logging(args)

        # Load and validate configuration
        try:
            config = config_manager.load_config(args.config, args)
        except Exception as e:
            raise ConfigurationError(f"Failed to load configuration: {str(e)}")

        # Set up output directory
        experiment_name = config["experiment"]["name"]
        output_dir = setup_output_directory(experiment_name)

        # Set up experiment-specific logger
        logger = setup_logger(
            experiment_name=experiment_name, output_dir=output_dir, args=args
        )

        logger.info(f"Loaded configuration: {config}")
        logger.info(f"Output directory: {output_dir}")

        # Validate the configuration
        validate_configuration(config)

        # Create the appropriate experiment runner
        runner = create_experiment_runner(config, output_dir)

        # Log experiment start
        logger.info(f"Starting experiment: {experiment_name}")
        logger.debug(f"Runner type: {type(runner).__name__}")

        # Run the experiment
        start_time = time.time()
        results = runner.run()
        end_time = time.time()

        # Log experiment completion
        duration = end_time - start_time
        logger.info(f"Experiment completed successfully in {duration:.2f} seconds")

        # Log experiment summary
        log_experiment_summary(runner, results)

        # Save results summary if needed
        results_summary = {
            "experiment_name": experiment_name,
            "duration": duration,
            "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
            "config_file": args.config,
            "results": results,
        }

        # Add error statistics to summary if available
        error_stats = _extract_error_statistics(results, runner.metrics)
        if error_stats:
            results_summary["error_statistics"] = error_stats
            logger.info(
                f"Added error statistics to summary: {error_stats.get('failed_llm_calls', 0)} failed calls"
            )

        summary_file = output_dir / "experiment_summary.json"
        try:
            with open(summary_file, "w") as f:
                json.dump(results_summary, f, indent=2, default=str)
            logger.info(f"Results summary saved to: {summary_file}")
        except Exception as e:
            logger.warning(f"Failed to save results summary: {str(e)}")

        logger.info("Dynamic Experiment Runner completed successfully")

    except KeyboardInterrupt:
        if logger:
            logger.info("Experiment interrupted by user")
        else:
            print("Experiment interrupted by user")
        sys.exit(1)

    except (ConfigurationError, ExperimentError, FileNotFoundError) as e:
        if logger:
            handle_experiment_error(e, args.config if "args" in locals() else "unknown")
        else:
            print(f"Error: {str(e)}")
        sys.exit(1)

    except Exception as e:
        if logger:
            handle_experiment_error(e, args.config if "args" in locals() else "unknown")
        else:
            print(f"Unexpected error: {str(e)}")
            traceback.print_exc()
        sys.exit(1)


# Legacy function imports for backward compatibility
# These are imported from the original app.py for use with legacy configurations
@with_logger
def init_llm(config):
    """Initialise the LLM."""
    llm_name = config["llm"]["default"]
    llm_config = config["llm"][llm_name]

    logger.debug(f"Initialising LLM: {llm_name}")
    logger.info(f"LLM configuration: {llm_config}")

    # Create the LLM instance
    llm = llm_registry.create(llm_name, **llm_config)

    logger.debug(
        f"LLM initialised: {llm.model_info.get('name')} {llm.model_info.get('version_name')}"
    )

    return llm


@with_logger
def init_task(config, init_llm=None, **kwargs):
    """Initialise the task."""
    logger.debug(f"Initialising task: {config['task']['default']}")

    # Get the task configuration
    task_name = config["task"]["default"]
    task_config = config["task"][task_name]
    logger.info(f"Task configuration: {task_config}")

    # Create the task instance
    task = task_registry.create(task_name, **task_config)

    logger.debug(f"Task initialised: {task_name}")

    return task


@with_logger
def init_prompt_optimiser(config, init_task=None, init_llm=None, **kwargs):
    """Initialise the prompt optimiser."""

    # Check if prompt_optimiser is configured
    if (
        "prompt_optimiser" not in config
        or not config["prompt_optimiser"]
        or config["prompt_optimiser"]["default"] == "none"
    ):
        logger.info("No prompt optimiser configured, skipping optimisation")
        return None

    optimiser_name = config["prompt_optimiser"]["default"]
    logger.debug(f"Initialising prompt optimiser: {optimiser_name}")

    # Create the prompt optimiser instance
    optimiser_config = config["prompt_optimiser"][optimiser_name]
    optimiser = prompt_optimiser_registry.create(
        optimiser_name,
        **optimiser_config,
    )

    logger.debug(f"Prompt optimiser initialised: {optimiser_name}")

    return optimiser


@with_logger
def optimise_prompt(
    init_llm,
    init_task,
    init_prompt_optimiser,
    **kwargs,
):
    """Optimise the prompt if an optimiser is available."""

    if not init_prompt_optimiser:
        logger.info("No prompt optimiser provided, skipping optimisation")
        return None

    logger.debug("Optimising prompt")

    # Get the base prompt
    msg_template = init_task.get_prompt_msg_template()

    logger.debug("Starting prompt optimisation process")
    log_prompt_usage(logger, str(msg_template), "base")

    init_prompt_optimiser.set_message_template(msg_template)
    init_prompt_optimiser.optimise(init_task, init_llm)

    logger.debug("Prompt optimisation complete")

    return init_prompt_optimiser


@with_logger
def run_optimised_prompt(
    init_llm,
    init_task,
    optimise_prompt=None,
    **kwargs,
):
    """Run the task with the optimised prompt, or base prompt if no optimisation was done."""

    # Check if optimisation was performed
    if optimise_prompt is None:
        logger.debug("Running task with base prompt (no optimisation performed)")
        base_prompt = init_task.get_prompt_msg_template()
        from src.utils.logging_utils import log_prompt_usage

        log_prompt_usage(logger, str(base_prompt), "final task")
        results_df, score, _ = init_task.run(init_llm)
        log_evaluation_score(logger, score)
    else:
        logger.debug("Running task with optimised prompt")
        optimised_msg_template = optimise_prompt.apply()

        log_prompt_usage(logger, str(optimised_msg_template), "optimised task")
        init_task.update_prompt_msg_template(optimised_msg_template)
        results_df, score, _ = init_task.run(init_llm)
        log_evaluation_score(logger, score)

    logger.info(f"Results dataframe shape: {results_df.shape}")

    return {
        "results_df": results_df,
        "score": score,
    }


@with_logger
def evaluate_results(run_optimised_prompt, **kwargs):
    """Evaluate the results."""
    logger.debug("Evaluating results")

    # For legacy compatibility, just return the score from run_optimised_prompt
    score = run_optimised_prompt["score"]
    results_df = run_optimised_prompt["results_df"]

    logger.info(f"Final Score: {score:.4f}")

    return {
        "score": score,
        "results_df": results_df,
        "base_score": score,  # For compatibility
        "optimised_score": score,  # For compatibility
        "improvement": 0.0,  # For compatibility
        "improvement_percent": 0.0,  # For compatibility
    }


if __name__ == "__main__":
    main()
