import click
import pathlib

from utils.logger import get_logger
from datasets import load_dataset, get_dataset_config_names

from evaluation import BaseEvaluator
from evaluation.registry import EVALUATOR_REGISTRY
from common.constants import *


@click.command()
@click.option("--model-name", required=True, type=str, help="Name of the model to evaluate.")
@click.option("--dataset-name", required=True, type=str, help="Name of the dataset.")
@click.option("--dataset-name-from-hf", default=None, type=str, help="Name of the dataset from huggingface.")
@click.option("--raw-results-dir", required=True, type=click.Path(path_type=pathlib.Path, exists=True), help="Directory to save raw results.")
@click.option("--processed-results-dir", required=True, type=click.Path(path_type=pathlib.Path, exists=True), help="Directory to save processed results.")
@click.option("--cot-reasoning", is_flag=True, default=False, help="Enable chain-of-thought reasoning (default: True).")
@click.option("--temperature", default=0.0, type=float, show_default=True, help="Temperature for evaluation.")
@click.option("--top-p", default=1.0, type=float, show_default=True, help="Top-p for evaluation.")
@click.option("--max-tokens", default=8192, type=int, show_default=True, help="Maximum tokens for evaluation (default: 8192).")
@click.option("--thinking-budget", default=8192, type=int, show_default=True, help="Maximum tokens for reasoning (default: 8192).")
@click.option("--enable-intrinsic-reasoning", is_flag=True, default=False, help="Enable intrinsic reasoning for models (default: False).")
@click.option("--huggingface-cache", default=None, type=click.Path(path_type=pathlib.Path, exists=True), help="Hugging Face cache directory to save datasets (default: None).")
@click.option("--log-dir", required=True, type=click.Path(path_type=pathlib.Path, exists=True), help="Directory to save logs (default: None).")
@click.option("--num-workers", default=4, type=int, show_default=True, help="Number of workers for data processing.")

def main(
      model_name, 
      dataset_name, 
      dataset_name_from_hf, 
      raw_results_dir: pathlib.Path, 
      processed_results_dir: pathlib.Path, 
      cot_reasoning, 
      temperature, 
      top_p, 
      max_tokens,
      thinking_budget,
      enable_intrinsic_reasoning,
      huggingface_cache,
      log_dir, 
      num_workers
    ):
    # set_logger
    logger = get_logger(model_name, dataset_name, temperature, top_p, cot_reasoning, log_dir)
    if model_name in DEFAULT_TEMPERATURE_TOP_P_MODELS or enable_intrinsic_reasoning:
        temperature = "Default"
        top_p = "Default"
        max_tokens = 28672 if model_name not in QWEN_LINEAGE else 8192
        thinking_budget = 20480
        logger.info(f"Using default temperature, top-p, max-tokens, and thinking-budget for {model_name}.")

    settings = locals()
    logger.info("Settings:")
    settings["temperature"] = temperature
    settings["top_p"] = top_p
    settings["max_tokens"] = max_tokens
    settings["thinking_budget"] = thinking_budget
    for key, value in settings.items():
        if key != 'logger':
            logger.info(f"{key}: {value}")

    config = EVALUATOR_REGISTRY[dataset_name]
    evaluator_cls = config['evaluator_cls']
    question_type = config['question_type']
    multi_subset = config.get('multi_subset', False)

    def build_subdir_name():
        subdir_parts = []
        subdir_parts.append(model_name)
        if cot_reasoning:
            subdir_parts.append('cot-reasoning')
        subdir_parts.append(f'temperature-{temperature}_top-p-{top_p}')
        if enable_intrinsic_reasoning:
            subdir_parts.append('enable-intrinsic-reasoning')
        return '_'.join(subdir_parts)

    def prepare_dirs(subdir_name):
        raw_dir = raw_results_dir.joinpath(subdir_name)
        processed_dir = processed_results_dir.joinpath(subdir_name)
        raw_dir.mkdir(parents=True, exist_ok=True)
        processed_dir.mkdir(parents=True, exist_ok=True)
        return raw_dir, processed_dir

    config_names = get_dataset_config_names(dataset_name_from_hf) if multi_subset else None
    answer_type = config['answer_type']
    subdir_name = build_subdir_name()
    raw_dir, processed_dir = prepare_dirs(subdir_name)
    # dataset = load_dataset(dataset_name_from_hf, subset if multi_subset else None, cache_dir=huggingface_cache)
    evaluator: BaseEvaluator = evaluator_cls(
        dataset_name_from_hf, config_names, huggingface_cache, raw_dir, processed_dir, model_name, 
        temperature, top_p, num_workers, cot_reasoning, question_type, answer_type, max_tokens=max_tokens,
        enable_intrinsic_reasoning=enable_intrinsic_reasoning, thinking_budget=thinking_budget,
    )
    evaluator.run_all()
    

if __name__ == '__main__':
    main()