from __future__ import annotations

"""CLI entry to run the minimal evaluation pipeline.

Usage examples:
  python -m Multimodal_logical_reasoning.src.evaluation.eval_pipeline \
      --model Qwen2.5-Omni --task equivalent --max-samples 10
"""

import argparse
import json
from typing import Any, Dict, Optional

from .models import get_model_runner
from .tasks import get_task

import warnings
warnings.filterwarnings("ignore")


def main() -> None:
    parser = argparse.ArgumentParser(description="Run evaluation for a model and task (minimal)")
    parser.add_argument("--model", required=True, help="Registered model name, e.g., 'Qwen2.5-Omni'")
    parser.add_argument("--task", required=True, help="Registered task name, e.g., 'equivalent'")
    parser.add_argument("--max-samples", type=int, default=None, help="Limit number of samples for quick runs")
    parser.add_argument(
        "--task-kwargs",
        type=str,
        default=None,
        help=(
            "JSON dict of extra task init kwargs, e.g. "
            "'{\"modalities\":[\"TIA\",\"TAI\"], \"reasoning_steps\":3}'"
        ),
    )
    args = parser.parse_args()

    model = get_model_runner(args.model)

    task_kwargs: Dict[str, Any] = {}
    if args.task_kwargs:
        try:
            task_kwargs = json.loads(args.task_kwargs)
        except Exception as exc:
            raise SystemExit(f"Failed to parse --task-kwargs JSON: {exc}")

    task = get_task(args.task, **task_kwargs)
    result: Any = task.run(model, max_samples=args.max_samples)
    print(result)


if __name__ == "__main__":
    main()
