from __future__ import annotations

import glob
import json
from typing import Any, Dict, List, Optional, Tuple

import pandas as pd

from ..inference.parsing import (
    compute_predicted_trend,
    single_sample_p_yes,
)
from .metrics import evaluate_predictions


def _build_default_paths(input_path: str, parsed_dir: Optional[str] = None, eval_prefix: str = "eval_firstonly_computed_") -> Tuple[str, str]:
    import os

    base_dir = os.path.dirname(input_path)
    base_name = os.path.basename(input_path)
    stem, _ = os.path.splitext(base_name)

    parsed_name = f"parsed_firstonly_{stem}.json"
    parsed_path = os.path.join(parsed_dir or base_dir, parsed_name)

    eval_name = f"{eval_prefix}{stem}.json"
    eval_path = os.path.join(base_dir, eval_name)
    return parsed_path, eval_path


def process_jsonl_files_firstonly(
    file_pattern: str,
    k_tail: int = 3,
    alpha: float = 0.5,
    threshold: float = 0.05,
    parsed_out_dir: Optional[str] = None,
    verbose: bool = False,
) -> List[Tuple[pd.DataFrame, Dict[str, Any]]]:
    """Process JSONL files using only the first sample from each outputs list to compute trend and evaluate."""
    files = glob.glob(file_pattern)
    if verbose:
        print(f"[firstonly] matched {len(files)} files for pattern: {file_pattern}")

    results: List[Tuple[pd.DataFrame, Dict[str, Any]]] = []

    for filename in files:
        if verbose:
            print(f"[firstonly] reading: {filename}")
        rows: List[Dict[str, Any]] = []
        line_count = 0
        with open(filename, "r", encoding="utf-8") as fh:
            for line in fh:
                if not line.strip():
                    continue
                line_count += 1
                data = json.loads(line)

                first_no = (data.get("outputs_no_news") or [])
                first_with = (data.get("outputs_with_news") or [])
                p_yes_no = single_sample_p_yes(first_no[0] if len(first_no) > 0 else None, k_tail=k_tail, alpha=alpha)
                p_yes_with = single_sample_p_yes(first_with[0] if len(first_with) > 0 else None, k_tail=k_tail, alpha=alpha)

                pred_trend = compute_predicted_trend(p_yes_no, p_yes_with, threshold=threshold)
                golden_trend = data.get("golden_trend", "Unknown")

                rows.append(
                    {
                        "confidence_no_news": p_yes_no,
                        "confidence_with_news": p_yes_with,
                        "predicted_trend_computed": pred_trend,
                        "golden_trend": golden_trend,
                        "raw_data": data,
                    }
                )
        df = pd.DataFrame(rows)
        if verbose:
            print(f"[firstonly] parsed {line_count} lines -> {len(df)} rows")

        parsed_path, eval_path = _build_default_paths(filename, parsed_dir=parsed_out_dir)
        with open(parsed_path, "w", encoding="utf-8") as f:
            json.dump(json.loads(df.to_json(orient="records")), f, indent=2)
        eval_results = evaluate_predictions(df, "predicted_trend_computed", "golden_trend", eval_path)
        if verbose:
            print(f"[firstonly] wrote parsed rows: {parsed_path}")
            print(f"[firstonly] wrote eval metrics: {eval_path}")
        results.append((df, eval_results))

    if verbose:
        print("[firstonly] done.")
    return results
