from collections import defaultdict
import json
from mmengine import load

from teval.utils.template import parse_string
from teval.utils.format_load import format_load
from teval.schema import ResponseDataSample
import ast
import numpy as np

class InstructEvaluator:
    """Instruct Following Evaluation

    Args:
        dataset_path(str): File path of evaluation dataset.

    """

    def __init__(
        self,
        dataset_path: str,
        **kwargs,
    ) -> None:
        self.dataset_path = dataset_path

    def _load_dataset(self):
        self.dataset = []
        dataset = load(self.dataset_path)

        for key in dataset.keys():
            datum = dataset[key]
            data_sample = self._process_response(datum)
            
            self.dataset.append(
                dict(
                    origin_prompt=datum["origin_prompt"],
                    response_data_sample=data_sample))
        self.num_samples = len(self.dataset)

    def _process_response(
        self,
        datum: dict,
    ) -> ResponseDataSample:
        """Process the response to needed format.

        Args:
            datum(dict): inputs.

        Returns:
            dict: Processed response data sample.
        """

        # Dict with keyword-only arguments.
        template = datum['template']
        # Generated response.
        pred_data = datum['prediction']
        # Response of ground truth.
        gt_data = datum['ground_truth']
        meta_data = datum['meta_data']

        return ResponseDataSample(
            template=template, pred=pred_data, gt=gt_data, meta_data=meta_data)

    def _evaluate(self, data_sample: dict) -> dict:
        metrics_result = dict()
        response_format = data_sample.meta_data['response_format']
        if response_format == 'json':
            pred_data = self.json_format_parse(data_sample)
        else:
            pred_data = self.string_format_parse(data_sample)
        
        if pred_data is None:
            # directly set to 0 for all metrics
            metrics_result[f'{response_format}_format_metric'] = 0
            metrics_result[f'{response_format}_args_em_metric'] = 0
            return metrics_result

        # Exact matching
        metrics_result[f'{response_format}_format_metric'] = 1
        metrics_result[f'{response_format}_args_em_metric'] = self.compute_args_em_metric(
            gt_action=data_sample.gt['action'], pred_action=pred_data['action'],
            gt_args=data_sample.gt['args'], pred_args=pred_data['args']
        )
        return metrics_result
    
    def compute_args_em_metric(self, gt_action, pred_action, gt_args, pred_args):
        cnt = 0.
        if gt_action == pred_action:
            cnt += 1.
        num_args = len(gt_args) + 1     # 1 means action name match
        for gt_key in gt_args:
            pred_val = pred_args.get(gt_key, "")
            if pred_val == gt_args[gt_key]:
                cnt += 1.
        return cnt / num_args

    def string_format_parse(self, data_sample):
        pred_data = data_sample.pred
        template = data_sample.template
        thought_start = template['thought_start']
        thought_end = template['thought_end']
        action_start = template['action_start']
        action_end = template['action_end']
        args_start = template['args_start']
        args_end = template['args_end']

        parse_template = thought_start + "{thought}" + thought_end \
            + action_start + "{action}" + action_end \
            + args_start + "{args}" + args_end
        res = parse_string(parse_template, pred_data, allow_newline=True)
        try:
            if res is not None:
                args = ast.literal_eval(res['args'].strip())
                res['args'] = args if isinstance(args, dict) else {}
                res['action'] = res['action'].strip()
            return res
        except:
            return dict(thought=res['thought'], action=res['action'].strip(), args=dict())

    def json_format_parse(self, data_sample):
        try:
            pred_data = format_load(data_sample.pred)
            template = data_sample.template
            new_data = dict()
            new_data['thought'] = pred_data[template['thought']]
            new_data['action'] = pred_data[template['action']]
            args = pred_data[template['args']]
            new_data['args'] = args if isinstance(args, dict) else {}
        except Exception as e:
            return None

        return new_data

    def evaluate(self):
        self._load_dataset()
        results_list = []
        for data_sample in self.dataset:
            metrics_result = self._evaluate(data_sample['response_data_sample'])
            results_list.append(metrics_result)
        return self._post_process(results_list)

    def _post_process(self, results_list):
        # list of dict to dict of list
        results_dict = defaultdict(list)
        {
            results_dict[key].append(sub[key])
            for sub in results_list for key in sub
        }
        metric_list = ['json_format_metric', 'json_args_em_metric',
                       'string_format_metric', 'string_args_em_metric']
        for metric in metric_list:
            results_dict[metric] = np.round(np.mean(results_dict[metric]), decimals=4)
        return results_dict