import os
from transformers import AutoTokenizer, AutoModelForCausalLM, StoppingCriteria, StoppingCriteriaList
import torch
from config_bbh import *
DATA_PATH = '../data/BIG-Bench-Hard'
from tqdm import tqdm

import json

class BBHDataLoader:
    """
    A class for reading BBH format data.

    Data directory structure:
      - JSON data is located at: DATA_PATH/bbh
    """

    def __init__(self, data_path=DATA_PATH):
        self.data_path = data_path
        self.bbh_dir = os.path.join(self.data_path, 'bbh')
        self.task_type_mapping = {
                    "salient_translation_error_detection": "option",
                    "dyck_languages": "dyck",
                    "object_counting": "math",
                    "sports_understanding": "yes_or_no",
                    "navigate": "option",
                    "reasoning_about_colored_objects": "option",
                    "logical_deduction_seven_objects": "option",
                    "tracking_shuffled_objects_three_objects": "option",
                    "geometric_shapes": "option",
                    "movie_recommendation": "option",
                    "date_understanding": "option",
                    "disambiguation_qa": "option",
                    "logical_deduction_five_objects": "option",
                    "word_sorting": "sorting",
                    "boolean_expressions": "bool",
                    "snarks": "option",
                    "temporal_sequences": "option",
                    "logical_deduction_three_objects": "option",
                    "multistep_arithmetic_two": "math",
                    "hyperbaton": "option",
                    "formal_fallacies": "option",
                    "web_of_lies": "yes_or_no",
                    "ruin_names": "option",
                    "tracking_shuffled_objects_seven_objects": "option",
                    "tracking_shuffled_objects_five_objects": "option",
                    "causal_judgement": "yes_or_no",
                    "penguins_in_a_table": "option",
                    "repeat_copy_logic": "open",
                    "rephrase":"open",
                    "auto_categorization": "open",
                    "processed_code_line_description": "open",
                    "processed_bridging_anaphora_resolution_barqa": "open",
                    "processed_disfl_qa": "open",
                    "processed_freetext": "open",
                    "processed_squaddev": "open",
                }

    def load_json_file(self, file_path):
        """
        Read a single JSON file and return the parsed data.
        :param file_path: Path to the JSON file
        :return: Parsed data dictionary, or None if failed
        """
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                data = json.load(f)
            return data
        except Exception as e:
            print(f"Error loading JSON file {file_path}: {e}")
            return None

    def list_files(self, directory, extension=None):
        """
        Traverse all files in the specified directory and return a list of file paths.
        :param directory: Directory path
        :param extension: If specified, only return files with this extension
        :return: List of file paths
        """
        file_list = []
        for root, dirs, files in os.walk(directory):
            for file in files:
                if extension is None or file.endswith(extension):
                    file_list.append(os.path.join(root, file))
        return file_list

    def load_all_jsons(self):
        """
        Load all JSON files in the DATA_PATH/bbh directory,
        return a dictionary with keys as file names (including extension) and values as JSON data dictionaries.
        """
        json_dict = {}
        files = self.list_files(self.bbh_dir, extension='.json')
        for file in files:
            base_name = os.path.basename(file)
            data = self.load_json_file(file)
            if data is not None:
                json_dict[base_name] = data
        return json_dict

    def load_matched_data(self):
        """
        Load JSON files from DATA_PATH/bbh,
        return a dictionary: key: file name (without extension), value: JSON data
        """
        json_dict = self.load_all_jsons()

        # Match using the file name without extension as the key
        json_map = {}
        for file_name, data in json_dict.items():
            stem, _ = os.path.splitext(file_name)
            json_map[stem] = data

        matched_data = json_map
        return matched_data

    def get_json_file_names(self):
        """
        Get a list of all JSON file names (without extension) in the DATA_PATH/bbh directory.
        :return: List of file names, e.g., ['example1', 'example2', ...]
        """
        files = self.list_files(self.bbh_dir, extension='.json')
        names = [os.path.splitext(os.path.basename(file))[0] for file in files]
        return names

    def get_by_json_name(self, json_name):
        """
        Get the corresponding JSON data based on the given JSON file name.
        Supports file names with or without extensions, the returned data corresponds to the key "json".
        :param json_name: JSON file name, e.g., "example1" or "example1.json"
        :return: If matched, return a dictionary {"json": ...}, otherwise return None
        """
        stem, _ = os.path.splitext(json_name)
        key = stem
        matched_data = self.load_matched_data()
        if key in matched_data:
            return {"json": matched_data[key]}
        else:
            print(f"Warning: No corresponding record found: {json_name}")
            return None

# Example usage
if __name__ == "__main__":
    bbh_loader = BBHDataLoader()
    # Get a list of JSON file names (without extension)
    task_names = bbh_loader.get_json_file_names()
    i = 1

    # for current_task in task_names:
    for current_task in ['multistep_arithmetic_two','web_of_lies']:
        task_data = bbh_loader.get_by_json_name(current_task)
        task_type = bbh_loader.task_type_mapping.get(current_task, '')
        print(f'''=======\nCurrent task: {current_task}, task type: {task_type},\nCurrent task example input: {task_data["json"].get('examples', '')[0]['input']},\nCurrent task example answer: {task_data["json"].get('examples', '')[0]['target']}''')
        i += 1

        test_examples = bbh_loader.get_by_json_name(current_task)['json']['examples']

        for ex in test_examples:
            ex.update(question=ex["input"] + "\n")
            ex.update(answer=ex["target"] + "")

        for example in tqdm(test_examples[:1], desc="Processing examples"):
            print(example)
            q = example["question"]
            a = example["answer"]
            ground_truth = a

            ins = task_instructions[task_type]
            query = TEMPLATE.format(question=q, instruction=ins)
            print(query)
