import os
import re
import json
import multiprocessing

from tqdm import tqdm

from .eval import BaseEvaluator
from utils.analyze_answer import compare_answer_golden_of_simpleqa
from utils import build_prompt, generate_hash, check_empty_result, get_client, get_response
from common.constants import REASONING_PROCESS_REGS, ANSWER_REGS
from datasets import load_dataset


class SimpleQAEvaluator(BaseEvaluator):
    def package_params(self):
        dataset = load_dataset(self.dataset_name_from_hf, cache_dir=self.huggingface_cache)
        self.logger.info("Packaging Params")
        with tqdm(total=len(dataset), desc='Packaging Params') as progress_bar:
            for i, data in enumerate(dataset['test']):
                question = data['problem']
                topic_match = re.search(r"'topic':\s*'([^']+)'", data['metadata'])
                question_type = topic_match.group(1) if topic_match else "Unknown"
                full_prompt = build_prompt(
                    question,
                    include_cot=self.cot_reasoning,
                    question_type=self.question_type,
                    answer_type=self.answer_type,
                )
                prompt = full_prompt['prompt']
                golden = data['answer']
                question_hash = generate_hash(question)
                result_path = self.raw_results_dir.joinpath(f"{question_hash}.json")
                if not result_path.exists():
                    self.params.append((prompt, question, golden, question_type, question_hash,
                                        self.model_name, self.temperature, self.top_p))
                else:
                    with open(result_path, "r") as f:
                        existing_data = json.load(f)
                    if check_empty_result(existing_data):
                        self.params.append((prompt, question, golden, question_type, question_hash,
                                            self.model_name, self.temperature, self.top_p))
                progress_bar.update(1)


    def run_query(self):
        debug_flag = True
        success = 0
        failure = 0
        self.logger.info("Getting Model's Responses")
        with multiprocessing.Pool(self.num_workers) as pool:
            with tqdm(total=len(self.params), desc='Querying Models') as progress_bar:
                for index, (question_hash, result, evaluation, query_settings, query_flag) in enumerate(pool.imap_unordered(self.query_pipeline, self.params), start=1):
                    result_to_save = {
                        str(question_hash): {
                            "result": result,
                            "evaluation": evaluation, 
                        }
                    }
                    with open(self.raw_results_dir.joinpath(f"{question_hash}.json"), "w") as f:
                        json.dump(result_to_save, f, indent=4)
                    if query_flag:
                        success += 1
                    else:
                        failure += 1
                    progress_bar.set_description(f'Querying - {question_hash}')
                    progress_bar.update(1)
                    if debug_flag:
                        self.logger.info(f'query_settings: {query_settings}')
                        debug_flag = False
        self.logger.info("Getting Responses Complete")
        self.logger.info(f"Success: {success}, Failure: {failure}, Total: {len(self.params)}")


    def query_pipeline(self, params):
        query_flag = False
        query_settings = None 
        try:
            prompt, question, golden, question_type, question_hash, model_name, temperature, top_p = params
            client = get_client(
                model_name=model_name,
            )
            model_response, model_intrinsic_reasoning, usage, query_settings = get_response(
                                                                    client=client,
                                                                    model_name=model_name,
                                                                    prompt=prompt,
                                                                    temperature=temperature,
                                                                    top_p=top_p,
                                                                    max_tokens=self.max_tokens,
                                                                    thinking_budget=self.thinking_budget,
                                                                    enable_intrinsic_reasoning=self.enable_intrinsic_reasoning,
                                                                )
            

            reasoning_process_match = re.search(REASONING_PROCESS_REGS, model_response, re.DOTALL)
            answer_match = re.search(ANSWER_REGS, model_response, re.DOTALL)               
            reasoning_process_text = reasoning_process_match.group(1) if reasoning_process_match else None
            answer_text = answer_match.group(1) if answer_match else None
            decision, full_evaluation_response, llm_evaluation_usage = compare_answer_golden_of_simpleqa(question=question, 
                                                                    model_answer=answer_text,
                                                                    golden_answer=golden,
                                                                    model_name="gpt-4.1-2025-04-14",
                                                                )        
            result = {
                "prompt": prompt,
                "question": question,
                "model_response": model_response,
                "reasoning_process": reasoning_process_text.strip() if reasoning_process_text else None,
                "intrinsic_reasoning": model_intrinsic_reasoning.strip() if model_intrinsic_reasoning else None,
                "enable_intrinsic_reasoning": self.enable_intrinsic_reasoning,
                "cot_reasoning": self.cot_reasoning,
                "model_answer": answer_text.strip() if answer_text else None,
                "golden": golden,
                "question_type": question_type,
                "model_name": model_name,
                "task": "simple qa",
                "temperature": temperature,
                "top_p": top_p,
                "usage": usage,
            }
            evaluation = {
                "llm_evaluation_decision": decision,
                "llm_evaluation_full_response": full_evaluation_response,
                "llm_evaluation_usage": llm_evaluation_usage,
            }
            query_flag = True
        except Exception as e:
            evaluation = {}
            result = {}
            query_settings if query_settings is not None else {} 
            self.logger.error(f"Querying-{question_hash}: Error: {e}")
        return question_hash, result, evaluation, query_settings, query_flag
    
    
    def process_results(self):
        self.logger.info("Processing Results")
        acc = 0
        total = len(list(self.raw_results_dir.iterdir())) 
        processed_results_list = list()
        processed_error_results_list = list()
        with tqdm(total=len(list(self.raw_results_dir.iterdir())), desc='Processing Results') as progress_bar:
            for result_file in self.raw_results_dir.iterdir():
                if result_file.is_file() and result_file.suffix == '.json':
                    question_hash = str(result_file.stem)
                    with open(result_file, "r") as f:
                        result_data = json.load(f)
                    if str(result_data[question_hash]['evaluation']['llm_evaluation_decision']).strip() == "A":
                        result_data[question_hash]['evaluation']['correct'] =  True
                        acc += 1
                    else:
                        result_data[question_hash]['evaluation']['correct'] = False
                        processed_error_results_list.append(result_data)
                    processed_results_list.append(result_data)
                    progress_bar.update(1)
        with open(self.processed_results_dir.joinpath(f"processed_results.json"), "w") as f:
            json.dump(processed_results_list, f, indent=4)
        with open(self.processed_results_dir.joinpath(f"processed_error_results.json"), "w") as f:
            json.dump(processed_error_results_list, f, indent=4)
        self.logger.info(f"Accuracy: {acc / total}")
        self.logger.info("Processing Results Complete")