from __future__ import annotations

import glob
import json
import re
from typing import Any, Dict, List, Tuple

import pandas as pd

from .metrics import evaluate_predictions


def _parse_value(text: str, keyword: str) -> str:
    if not text:
        return "Unknown"
    patterns = [
        rf'"{keyword}"\s*:\s*"?(\d+(\.\d+)?)"?',
        rf"{keyword} score is\s*\"?(\d+(\.\d+)?)\"?",
        rf"{keyword}:\s*\"?(\d+(\.\d+)?)\"?",
        rf"\\*\\*{keyword} Score:\\*\\*\s*(\d+(\.\d+)?)",
        rf"\b{keyword}\s*[-:=]\s*(\d+(\.\d+)?)",
    ]
    last_match: str | None = None
    for pattern in patterns:
        matches = list(re.finditer(pattern, text, re.IGNORECASE))
        if matches:
            last_match = matches[-1].group(1)

    if last_match is None and keyword.lower() == "confidence":
        num_matches = list(re.finditer(r"\b([1-9]|10)\b", text))
        if num_matches:
            last_match = num_matches[-1].group(1)

    if last_match is None and keyword.lower() == "trend":
        trend_matches = list(re.finditer(r"\b(Up|Down|Still)\b", text, re.IGNORECASE))
        if trend_matches:
            last_match = trend_matches[-1].group(1).capitalize()

    return last_match if last_match is not None else "Unknown"


def _compute_predicted_trend(conf_no_news: str, conf_with_news: str) -> str:
    if conf_no_news == "Unknown" or conf_with_news == "Unknown":
        return "Unknown"
    try:
        a = float(conf_no_news)
        b = float(conf_with_news)
    except ValueError:
        return "Unknown"
    if b > a:
        return "Up"
    if b < a:
        return "Down"
    return "Still"


def process_jsonl_files(file_pattern: str) -> List[Tuple[pd.DataFrame, Dict[str, Any]]]:
    """Process JSONL files containing raw verbalized outputs and evaluate trends.

    Expects each JSON line to include:
      - raw_output_confidence_no_news
      - raw_output_confidence_with_news
      - raw_output_predicted_trend_direct (optional)
      - golden_trend
    """
    results: List[Tuple[pd.DataFrame, Dict[str, Any]]] = []

    for filename in glob.glob(file_pattern):
        rows: List[Dict[str, Any]] = []
        with open(filename, "r", encoding="utf-8") as file:
            for line in file:
                if not line.strip():
                    continue
                data = json.loads(line)
                conf_no_news = _parse_value(data.get("raw_output_confidence_no_news", ""), "confidence")
                conf_with_news = _parse_value(data.get("raw_output_confidence_with_news", ""), "confidence")
                pred_trend_direct = _parse_value(data.get("raw_output_predicted_trend_direct", ""), "trend")
                golden_trend = data.get("golden_trend", "Unknown")
                pred_trend_computed = _compute_predicted_trend(conf_no_news, conf_with_news)
                rows.append(
                    {
                        "confidence_no_news": conf_no_news,
                        "confidence_with_news": conf_with_news,
                        "predicted_trend_computed": pred_trend_computed,
                        "predicted_trend_direct": pred_trend_direct,
                        "golden_trend": golden_trend,
                        "raw_data": data,
                    }
                )
        df = pd.DataFrame(rows)

        # Save adjacent JSONs with generic names (based on input stem)
        import os
        stem = os.path.splitext(os.path.basename(filename))[0]
        parsed_out = os.path.join(os.path.dirname(filename), f"parsed_verbalized_{stem}.json")
        with open(parsed_out, "w", encoding="utf-8") as f:
            json.dump(json.loads(df.to_json(orient="records")), f, indent=2)

        out_computed = os.path.join(os.path.dirname(filename), f"eval_computed_{stem}.json")
        out_direct = os.path.join(os.path.dirname(filename), f"eval_direct_{stem}.json")
        eval_computed = evaluate_predictions(df, "predicted_trend_computed", "golden_trend", out_computed)
        eval_direct = evaluate_predictions(df, "predicted_trend_direct", "golden_trend", out_direct)
        results.append((df, {"computed": eval_computed, "direct": eval_direct}))

    return results
