import json
import pandas as pd
from typing import List, Optional, Dict, Any
import os
import glob


def find_latest_evaluation_file() -> Optional[str]:
    """Find the most recent evaluation results file."""
    pattern = "evaluation_data/evaluation_results_*.json"
    files = glob.glob(pattern)

    if not files:
        # Try the default filename without timestamp
        default_file = "evaluation_data/evaluation_results.json"
        if os.path.exists(default_file):
            return default_file
        return None

    # Return the most recent file
    return max(files, key=os.path.getctime)


def load_evaluation_data(
    file_path: str, filters: Optional[Dict[str, List[str]]] = None
) -> pd.DataFrame:
    """
    Load evaluation data with optional filtering.

    Parameters:
    - file_path: Path to the evaluation JSON file
    - filters: Optional dict with keys like 'models', 'tasks', 'question_types', 
               'include_patterns', 'exclude_patterns', etc.

    Returns:
    - pandas DataFrame with evaluation results
    """
    with open(file_path, "r", encoding="utf-8") as f:
        data = json.load(f)

    df = pd.DataFrame(data)

    # Apply filters if provided
    if filters:
        for filter_key, filter_values in filters.items():
            if filter_key == "models" and "model" in df.columns:
                df = df[df["model"].isin(filter_values)]
            elif filter_key == "tasks" and "benchmark" in df.columns:
                df = df[df["benchmark"].isin(filter_values)]
            elif filter_key == "question_types" and "question_type" in df.columns:
                df = df[df["question_type"].isin(filter_values)]
            elif filter_key == "targets" and "target" in df.columns:
                df = df[df["target"].isin(filter_values)]
            elif filter_key == "system_prompts" and "system_prompt" in df.columns:
                df = df[df["system_prompt"].isin(filter_values)]
            elif filter_key == "include_patterns" and "size_pattern" in df.columns:
                df = df[df["size_pattern"].isin(filter_values)]
            elif filter_key == "exclude_patterns" and "size_pattern" in df.columns:
                df = df[~df["size_pattern"].isin(filter_values)]

    return df


def split_by_question_type(df: pd.DataFrame) -> tuple:
    """
    Split data into full_output and question-based responses.

    Returns:
    - (full_output_df, question_based_df)
    """
    full_output_df = df[df["question_type"] == "full_output"].copy()
    question_based_df = df[df["question_type"] != "full_output"].copy()

    return full_output_df, question_based_df


def get_available_values(df: pd.DataFrame) -> Dict[str, List[str]]:
    """Get all available values for different dimensions."""
    return {
        "models": sorted(df["model"].unique()) if "model" in df.columns else [],
        "tasks": sorted(df["benchmark"].unique()) if "benchmark" in df.columns else [],
        "question_types": (
            sorted(df["question_type"].unique())
            if "question_type" in df.columns
            else []
        ),
        "targets": sorted(df["target"].unique()) if "target" in df.columns else [],
        "system_prompts": (
            sorted(df["system_prompt"].unique())
            if "system_prompt" in df.columns
            else []
        ),
        "encodings": (
            sorted(df["encoding"].unique()) if "encoding" in df.columns else []
        ),
    }


def validate_data(df: pd.DataFrame) -> Dict[str, Any]:
    """Validate the data and return summary statistics."""
    required_columns = ["model", "benchmark", "correct", "question_type", "target"]
    missing_columns = [col for col in required_columns if col not in df.columns]

    if missing_columns:
        raise ValueError(f"Missing required columns: {missing_columns}")

    return {
        "total_records": len(df),
        "unique_models": len(df["model"].unique()),
        "unique_tasks": len(df["benchmark"].unique()),
        "unique_question_types": len(df["question_type"].unique()),
        "overall_accuracy": df["correct"].mean(),
        "missing_columns": missing_columns,
    }
