from __future__ import annotations

import json
from typing import Any, Dict, List

from ..utils.dates import format_date_iso
from ..utils.io import read_json, write_json


def filter_questions(
    input_file: str,
    output_file: str,
    min_forecasters: int = 50,
    min_forecaster_count: int = 25,
) -> None:
    """Filter Metaculus questions and their history, keeping only entries meeting thresholds.

    Parameters
    - input_file: path to the raw questions JSON
    - output_file: path to write the filtered questions JSON
    - min_forecasters: threshold for question-level `nr_forecasters`
    - min_forecaster_count: threshold for history-level `forecaster_count`
    """
    data: List[Dict[str, Any]] = read_json(input_file)

    filtered_questions: List[Dict[str, Any]] = []
    for e in data:
        if (e or {}).get("nr_forecasters", 0) < min_forecasters:
            continue
        question = (e or {}).get("question", {})
        history = (
            question.get("aggregations", {})
            .get("recency_weighted", {})
            .get("history", [])
        )
        open_time = format_date_iso(question.get("open_time", ""))
        if not question.get("title") or not history or not open_time:
            continue

        filtered_history: List[Dict[str, Any]] = []
        for h in history:
            start_time = format_date_iso(h.get("start_time"))
            end_time = format_date_iso(h.get("end_time"))
            centers = h.get("centers") or []
            forecaster_count = h.get("forecaster_count", 0)
            if (
                start_time
                and end_time
                and centers
                and forecaster_count >= min_forecaster_count
            ):
                filtered_history.append(
                    {
                        "start_time": start_time,
                        "end_time": end_time,
                        "center": round(float(centers[0]), 2),
                        "forecaster_count": forecaster_count,
                    }
                )

        if not filtered_history:
            continue

        filtered_questions.append(
            {
                "id": e["id"],
                "title": question["title"],
                "open_time": open_time,
                "history": filtered_history,
            }
        )

    write_json(output_file, filtered_questions, indent=2)
