"""
Configuration utilities for loading and processing YAML configuration files.

This module provides utilities for loading YAML configuration files
and converting them to appropriate data structures.
"""

import os
import yaml
from pathlib import Path
from typing import Any, Dict, Optional, Union, List


def load_config(config_path: Union[str, Path]) -> Dict[str, Any]:
    """Load configuration from a YAML file.
    
    Args:
        config_path: Path to the YAML configuration file
        
    Returns:
        Dictionary containing the configuration data
        
    Raises:
        FileNotFoundError: If the configuration file doesn't exist
        yaml.YAMLError: If the YAML file is malformed
    """
    config_path = Path(config_path)
    
    if not config_path.exists():
        raise FileNotFoundError(f"Configuration file not found: {config_path}")
    
    with open(config_path, 'r', encoding='utf-8') as f:
        try:
            config_data = yaml.safe_load(f)
        except yaml.YAMLError as e:
            raise yaml.YAMLError(f"Error parsing YAML file {config_path}: {e}")
    
    return config_data or {}


def save_config(config_data: Dict[str, Any], config_path: Union[str, Path]) -> None:
    """Save configuration to a YAML file.
    
    Args:
        config_data: Dictionary containing configuration data
        config_path: Path where to save the YAML file
    """
    config_path = Path(config_path)
    config_path.parent.mkdir(parents=True, exist_ok=True)
    
    with open(config_path, 'w', encoding='utf-8') as f:
        yaml.safe_dump(config_data, f, default_flow_style=False, sort_keys=False)


def find_config_file(config_name: str, config_type: str = "training") -> Path:
    """Find a configuration file in the configs directory.
    
    Args:
        config_name: Name of the config file (with or without .yaml extension)
        config_type: Type of config ("training", "evaluation", "models")
        
    Returns:
        Path to the configuration file
        
    Raises:
        FileNotFoundError: If the configuration file doesn't exist
    """
    # Get the framework root directory (assuming this file is in src/reasoning_frameworks/utils/)
    current_file = Path(__file__).resolve()
    framework_root = current_file.parent.parent.parent.parent
    
    # Remove .yaml extension if present
    if config_name.endswith('.yaml'):
        config_name = config_name[:-5]
    
    config_dir = framework_root / "configs" / config_type
    config_file = config_dir / f"{config_name}.yaml"
    
    if not config_file.exists():
        available_configs = list_configs(config_type)
        raise FileNotFoundError(
            f"Configuration file not found: {config_file}\n"
            f"Available {config_type} configs: {available_configs}"
        )
    
    return config_file


def list_configs(config_type: str = "training") -> list[str]:
    """List available configuration files of a given type.
    
    Args:
        config_type: Type of config ("training", "evaluation", "models")
        
    Returns:
        List of available configuration names (without .yaml extension)
    """
    current_file = Path(__file__).resolve()
    framework_root = current_file.parent.parent.parent.parent
    
    config_dir = framework_root / "configs" / config_type
    
    if not config_dir.exists():
        return []
    
    configs = []
    for config_file in config_dir.glob("*.yaml"):
        configs.append(config_file.stem)
    
    return sorted(configs)


def merge_configs(*config_dicts: Dict[str, Any]) -> Dict[str, Any]:
    """Merge multiple configuration dictionaries.
    
    Later dictionaries override earlier ones for conflicting keys.
    Nested dictionaries are merged recursively.
    
    Args:
        *config_dicts: Configuration dictionaries to merge
        
    Returns:
        Merged configuration dictionary
    """
    def _merge_dict(base: Dict[str, Any], update: Dict[str, Any]) -> Dict[str, Any]:
        """Recursively merge two dictionaries."""
        result = base.copy()
        
        for key, value in update.items():
            if key in result and isinstance(result[key], dict) and isinstance(value, dict):
                result[key] = _merge_dict(result[key], value)
            else:
                result[key] = value
        
        return result
    
    if not config_dicts:
        return {}
    
    result = config_dicts[0].copy()
    for config_dict in config_dicts[1:]:
        result = _merge_dict(result, config_dict)
    
    return result


def load_and_merge_configs(*config_paths: Union[str, Path]) -> Dict[str, Any]:
    """Load and merge multiple configuration files.
    
    Args:
        *config_paths: Paths to configuration files to load and merge
        
    Returns:
        Merged configuration dictionary
    """
    config_dicts = []
    for config_path in config_paths:
        config_data = load_config(config_path)
        config_dicts.append(config_data)
    
    return merge_configs(*config_dicts)


def validate_config_structure(config_data: Dict[str, Any], 
                            required_fields: list[str]) -> list[str]:
    """Validate that a configuration has required fields.
    
    Args:
        config_data: Configuration dictionary to validate
        required_fields: List of required field paths (e.g., ["model.vlm_model_name", "data.train_data_path"])
        
    Returns:
        List of validation errors (empty if valid)
    """
    errors = []
    
    def _get_nested_value(data: Dict[str, Any], field_path: str) -> Any:
        """Get a nested value from a dictionary using dot notation."""
        keys = field_path.split('.')
        current = data
        
        for key in keys:
            if not isinstance(current, dict) or key not in current:
                return None
            current = current[key]
        
        return current
    
    for field in required_fields:
        value = _get_nested_value(config_data, field)
        if value is None:
            errors.append(f"Required field missing: {field}")
    
    return errors


def substitute_env_vars(config_data: Dict[str, Any]) -> Dict[str, Any]:
    """Substitute environment variables in configuration values.
    
    Replaces ${VAR_NAME} or $VAR_NAME patterns with environment variable values.
    
    Args:
        config_data: Configuration dictionary
        
    Returns:
        Configuration dictionary with environment variables substituted
    """
    import re
    
    def _substitute_string(value: str) -> str:
        """Substitute environment variables in a string."""
        # Pattern to match ${VAR} or $VAR
        pattern = r'\$\{([^}]+)\}|\$([A-Za-z_][A-Za-z0-9_]*)'
        
        def replacer(match):
            var_name = match.group(1) or match.group(2)
            return os.environ.get(var_name, match.group(0))
        
        return re.sub(pattern, replacer, value)
    
    def _substitute_recursive(obj: Any) -> Any:
        """Recursively substitute environment variables in nested structures."""
        if isinstance(obj, dict):
            return {key: _substitute_recursive(value) for key, value in obj.items()}
        elif isinstance(obj, list):
            return [_substitute_recursive(item) for item in obj]
        elif isinstance(obj, str):
            return _substitute_string(obj)
        else:
            return obj
    
    return _substitute_recursive(config_data)


def get_framework_root() -> Path:
    """Get the root directory of the reasoning framework.
    
    Returns:
        Path to the framework root directory
    """
    current_file = Path(__file__).resolve()
    # Assuming this file is in src/reasoning_frameworks/utils/
    return current_file.parent.parent.parent.parent


def resolve_config_path(config_name: str, config_type: str = "training") -> Path:
    """Resolve a configuration name to its full path.
    
    Args:
        config_name: Name of the config (can be just name, relative path, or absolute path)
        config_type: Default config type if not specified in path
        
    Returns:
        Resolved path to the configuration file
    """
    config_path = Path(config_name)
    
    # If it's already an absolute path, use it
    if config_path.is_absolute():
        return config_path
    
    # If it's a relative path with directories, resolve from framework root
    if len(config_path.parts) > 1:
        framework_root = get_framework_root()
        return framework_root / config_path
    
    # Otherwise, look in the standard config directory
    return find_config_file(config_name, config_type)


def load_logging_config(config_name: str = "evaluation_logging") -> Dict[str, Any]:
    """
    Load logging configuration from YAML file.
    
    Args:
        config_name: Name of the logging config file (without .yaml extension)
        
    Returns:
        Dictionary containing logging configuration
        
    Raises:
        FileNotFoundError: If the logging configuration file doesn't exist
    """
    # Get the framework root directory
    current_file = Path(__file__).resolve()
    framework_root = current_file.parent.parent.parent.parent
    
    # Remove .yaml extension if present
    if config_name.endswith('.yaml'):
        config_name = config_name[:-5]
    
    config_dir = framework_root / "configs" / "logging"
    config_file = config_dir / f"{config_name}.yaml"
    
    if not config_file.exists():
        available_configs = list_logging_configs()
        raise FileNotFoundError(
            f"Logging configuration file not found: {config_file}\n"
            f"Available logging configs: {available_configs}"
        )
    
    # Load the config
    config_data = load_config(config_file)
    
    # Handle base config inheritance
    if 'base_config' in config_data:
        base_config_name = config_data['base_config']
        if base_config_name.endswith('.yaml'):
            base_config_name = base_config_name[:-5]
        
        base_config_file = config_dir / f"{base_config_name}"
        if base_config_file.exists():
            base_config_data = load_config(base_config_file)
            # Merge base config with current config (current takes precedence)
            config_data = merge_configs(base_config_data, config_data)
            # Remove the base_config reference from the final config
            config_data.pop('base_config', None)
    
    return config_data


def list_logging_configs() -> List[str]:
    """
    List available logging configuration files.
    
    Returns:
        List of available logging configuration names (without .yaml extension)
    """
    current_file = Path(__file__).resolve()
    framework_root = current_file.parent.parent.parent.parent
    
    config_dir = framework_root / "configs" / "logging"
    
    if not config_dir.exists():
        return []
    
    configs = []
    for config_file in config_dir.glob("*.yaml"):
        configs.append(config_file.stem)
    
    return sorted(configs)


def apply_logging_config_to_args(args, logging_config: Dict[str, Any]):
    """
    Apply logging configuration to command line arguments.
    
    Command line arguments take precedence over config file values.
    
    Args:
        args: Argument namespace from argparse
        logging_config: Logging configuration dictionary
    """
    # HTML reports configuration
    html_config = logging_config.get('html_reports', {})
    if not hasattr(args, 'enable_html_reports') or args.enable_html_reports is None:
        args.enable_html_reports = html_config.get('enabled', False)
    
    # Output directories
    output_config = logging_config.get('output', {})
    if not hasattr(args, 'html_report_dir') or args.html_report_dir is None:
        args.html_report_dir = output_config.get('html_report_dir', '/scratch/<ANONYMIZED>/framework_html_reports')
    if not hasattr(args, 'debug_data_dir') or args.debug_data_dir is None:
        args.debug_data_dir = output_config.get('debug_data_dir', '/scratch/<ANONYMIZED>/framework_debug_data')
    
    # Experiment tracking
    tracking_config = logging_config.get('tracking', {})
    
    # MLflow configuration
    mlflow_config = tracking_config.get('mlflow', {})
    if not hasattr(args, 'enable_mlflow') or args.enable_mlflow is None:
        args.enable_mlflow = mlflow_config.get('enabled', False)
    if not hasattr(args, 'mlflow_tracking_uri') or args.mlflow_tracking_uri is None:
        args.mlflow_tracking_uri = mlflow_config.get('tracking_uri', './mlruns')
    
    # Wandb configuration
    wandb_config = tracking_config.get('wandb', {})
    if not hasattr(args, 'enable_wandb') or args.enable_wandb is None:
        args.enable_wandb = wandb_config.get('enabled', False)
    if not hasattr(args, 'wandb_project') or args.wandb_project is None:
        args.wandb_project = wandb_config.get('project')
    if not hasattr(args, 'wandb_entity') or args.wandb_entity is None:
        args.wandb_entity = wandb_config.get('entity')
    
    # Debug configuration
    debug_config = logging_config.get('debug', {})
    if not hasattr(args, 'load_all_debug_sessions') or args.load_all_debug_sessions is None:
        args.load_all_debug_sessions = not debug_config.get('load_all_sessions', False)  # Note: inverted logic 