# Copyright (c) Alibaba, Inc. and its affiliates.
from contextlib import nullcontext
from typing import Any, Dict, List, Union

import numpy as np
import os
import torch.distributed as dist
from datasets import Dataset as HfDataset

from swift.llm import InferArguments, InferRequest, SwiftPipeline, load_dataset, prepare_model_template, sample_dataset
from swift.plugin import InferStats, MeanMetric, compute_rouge_bleu
from swift.plugin.orm import extract_xml_answer, cal_rouge, calculate_sari
from swift.utils import JsonlWriter, get_logger, is_master, read_from_jsonl
from .infer_engine import AdapterRequest, PtEngine
from .protocol import RequestConfig
from .utils import InferCliState
from tqdm import tqdm
from easse.sari import corpus_sari

logger = get_logger()




class SwiftInfer(SwiftPipeline):
    args_class = InferArguments
    args: args_class

    

    def __init__(self, args: Union[List[str], InferArguments, None] = None) -> None:
        from swift.llm import merge_lora
        super().__init__(args)
        args = self.args
        if args.merge_lora:
            merge_lora(args, device_map='cpu')
        self.infer_kwargs = {}
        if args.infer_backend == 'vllm' and args.adapters:
            self.infer_kwargs['adapter_request'] = AdapterRequest('_lora', args.adapters[0])

        if args.infer_backend == 'pt':
            model, self.template = prepare_model_template(args)
            self.infer_engine = PtEngine.from_model_template(model, self.template, max_batch_size=args.max_batch_size)
            logger.info(f'model: {self.infer_engine.model}')
        else:
            self.infer_engine = self.get_infer_engine(args)
            self.template = args.get_template(self.processor)
        self.random_state = np.random.RandomState(args.data_seed)

        self.frozen_model = PtEngine("Qwen/Qwen2.5-7B-Instruct", model_type="qwen2_5", device_map='auto')
        self.frozen_model.model.eval()
        for param in self.frozen_model.model.parameters():
            param.requires_grad = False


    def __getattr__(self, key: str):
        try:
            return super().__getattr__(key)
        except AttributeError:
            if 'infer_engine' in self.__dict__:
                return getattr(self.infer_engine, key)
            raise

    @staticmethod
    def get_infer_engine(args: InferArguments, **kwargs):
        kwargs.update({
            'model_id_or_path': args.model,
            'model_type': args.model_type,
            'revision': args.model_revision,
            'torch_dtype': args.torch_dtype,
        })
        infer_backend = kwargs.pop('infer_backend', None) or args.infer_backend
        if infer_backend == 'pt':
            from .infer_engine import PtEngine
            infer_engine_cls = PtEngine
            kwargs.update(args.get_model_kwargs())
            if hasattr(args, 'max_batch_size'):
                kwargs.update({'max_batch_size': args.max_batch_size})
        elif infer_backend == 'vllm':
            from .infer_engine import VllmEngine
            infer_engine_cls = VllmEngine
            kwargs.update(args.get_vllm_engine_kwargs())
        else:
            from .infer_engine import LmdeployEngine
            infer_engine_cls = LmdeployEngine
            kwargs.update(args.get_lmdeploy_engine_kwargs())
        return infer_engine_cls(**kwargs)

    def run(self) -> List[Dict[str, Any]]:
        args = self.args
        self.jsonl_writer = JsonlWriter(args.result_path) if args.result_path else None
        result = self.infer_dataset()
        if args.result_path:
            logger.info(f'The inference results have been saved to result_path: `{args.result_path}`.')
        return result

    def infer_single(self, infer_request: Union[InferRequest, Dict[str, Any]], request_config: RequestConfig) -> str:
        res_or_gen = self.infer([infer_request],
                                request_config,
                                template=self.template,
                                use_tqdm=False,
                                **self.infer_kwargs)[0]
        if request_config and request_config.stream:
            response = ''
            for res in res_or_gen:
                delta = res.choices[0].delta.content
                print(delta, end='', flush=True)
                response += delta
            print()
        else:
            response = res_or_gen.choices[0].message.content
            print(response)
        print('-' * 50)
        return response

    def infer_cli(self) -> List[Dict[str, Any]]:
        args = self.args
        template = self.template
        request_config = args.get_request_config()
        logger.info(f'request_config: {request_config}')

        logger.info('Input `exit` or `quit` to exit the conversation.')
        logger.info('Input `multi-line` to switch to multi-line input mode.')
        logger.info('Input `reset-system` to reset the system and clear the history.')
        support_multi_round = template.template_meta.support_multi_round
        if support_multi_round:
            logger.info('Input `clear` to clear the history.')
        else:
            logger.info('The current template only supports single-round dialogues.')

        infer_state = InferCliState()
        result_list = []
        while True:
            if not support_multi_round:
                infer_state.clear()
            query = infer_state.input_text()
            if query.strip().lower() in {'exit', 'quit'}:
                break
            query = infer_state.check_query(query)
            if query is None:
                continue
            infer_state.add_query(query)
            if args.model_meta.is_multimodal:
                infer_state.input_mm_data()
            if args.task_type == 'seq_cls' and args.num_labels == 1:
                # reward model
                response = infer_state.input_text()
                infer_state.add_response(response)
                data = infer_state.to_dict()
                response = self.infer_single(data, request_config)
                data = {'response': response, **data}
            else:
                data = infer_state.to_dict()
                response = self.infer_single(data, request_config)
                infer_state.add_response(response)
                data['messages'].append({'role': 'assistant', 'content': response})
                data = {'response': response, **data}
            result_list.append(data)
            if self.jsonl_writer:
                self.jsonl_writer.append(data)

        return result_list

    def _prepare_val_dataset(self) -> HfDataset:
        args = self.args
        dataset_kwargs = args.get_dataset_kwargs()
        if len(args.val_dataset) > 0:
            _, val_dataset = load_dataset(args.val_dataset, split_dataset_ratio=1.0, **dataset_kwargs)
        else:
            _, val_dataset = load_dataset(args.dataset, split_dataset_ratio=args.split_dataset_ratio, **dataset_kwargs)
        assert val_dataset is not None
        val_dataset = sample_dataset(val_dataset, args.val_dataset_sample, self.random_state)
        return val_dataset

    def _calc_metric(self):
        args = self.args
        if not is_master():
            return
        data_list = read_from_jsonl(self.jsonl_writer.fpath)
        preds, labels, orig = [], [], []
        for data in data_list:
            #prediction = extract_xml_answer(data['response'])
            prediction = data["response"] 
            try:
                if os.environ["DATASET"] in ["mr", "sst-2", "cr"]:
                    if data["labels"] == "0" or data["labels"] == 0:
                        label = "negative"
                    elif data["labels"] == "1" or data["labels"] == 1:
                        label = "positive"
                    else:
                        label = data["labels"]

                elif os.environ["DATASET"] == "sum":
                    args.metric = "rouge"
                    label = data["labels"]

                elif os.environ["DATASET"] == "sim":
                    args.metric = "sari"
                    label = data["labels"]

                
                elif os.environ["DATASET"] == "news":
                    if data["labels"] == "1":
                        label = "World"
                    elif data["labels"] == "2":
                        label = "Sports"
                    elif data["labels"] == "3":
                        label = "Business"
                    elif data["labels"] == "4":
                        label = "Tech"

                elif os.environ["DATASET"] == "trec":
                    if data["labels"] == "0":
                        label = "Description"
                    elif data["labels"] == "1":
                        label = "Entity"
                    elif data["labels"] == "2":
                        label = "Expression"
                    elif data["labels"] == "3":
                        label = "Human"
                    elif data["labels"] == "4":
                        label = "Location"
                    elif data["labels"] == "5":
                        label = "Number"


                elif os.environ["DATASET"] == "sst5":
                    if data["labels"] == "0":
                        label = "terrible"
                    elif data["labels"] == "1":
                        label = "bad"
                    elif data["labels"] == "2":
                        label = "okay"
                    elif data["labels"] == "3":
                        label = "good"
                    elif data["labels"] == "4":
                        label = "great"
                    else:
                        label = data["labels"]

                elif os.environ["DATASET"] == "subj":

                    if data["labels"] == "0":
                        label = "subjective"
                    elif data["labels"] == "1":
                        label = "objective"
                    else:
                        label = data["labels"]
            except:
                import pdb; pdb.set_trace()
            preds.append(prediction)
            labels.append(label)
            orig.append(data["messages"][1]["content"].split("\n")[-1])
        if args.metric == 'acc':
            mean_metric = MeanMetric()
            for pred, label in zip(preds, labels):
                mean_metric.update(pred.lower() == label.lower())
            res = {'acc': mean_metric.compute()['value']}
        elif args.metric == 'rouge':
            rouge1 = []
            rouge2 = []
            rougel = []
            for (x,y) in zip(preds, labels):
                try:
                    metrics = cal_rouge([x], [y])
                    rouge1.append(metrics[0])
                    rouge2.append(metrics[1])
                    rougel.append(metrics[2])
                except:
                    rouge1.append(0)
                    rouge2.append(0)
                    rougel.append(0)
            res = {'rouge1': round(np.mean(rouge1) * 100, 2), 'rouge2': round(np.mean(rouge2) * 100, 2),
                    'rougel': round(np.mean(rougel) * 100, 2)}
            print(res)
        elif args.metric == "sari":
            sari = []
            for i in range(len(orig)):
                #try:
                   #pass
                    metrics = calculate_sari(labels[i].replace("\n", ""), preds[i].replace("\n", ""), orig[i].replace("\n", ""))
                #import pdb; pdb.set_trace()
                #metrics = corpus_sari(labels[i], preds[i], orig[i])
                #except:
                #   metrics = 0
            sari.append(metrics)
            #refs_sents = [labels]              # 1 reference ⇒ shape (1, n_samples)
            #import pdb; pdb.set_trace()
            # metrics = corpus_sari(
            #     orig_sents=labels,
            #     sys_sents =preds,
            #     refs_sents=orig
            # )
            #res = {'sari': round(metrics, 2)}
            res = {'sari': round(np.mean(sari), 2)}
        logger.info(res)

    def infer_dataset(self) -> List[Dict[str, Any]]:
        args = self.args
        request_config = args.get_request_config()
        logger.info(f'request_config: {request_config}')

        val_dataset = self._prepare_val_dataset()
        logger.info(f'val_dataset: {val_dataset}')
        result_list = []

        self.infer_kwargs['metrics'] = [InferStats()]
        if request_config and request_config.stream:
            for data in val_dataset:
                labels = InferRequest.remove_response(data['messages'])
                query = data['messages'][-1]['content']
                print(f'[QUERY] {query}')
                if labels:
                    print(f'[LABELS] {labels}')
                print('[RESPONSE] ', end='')
                response = self.infer_single(data, request_config)
                data['messages'].append({'role': 'assistant', 'content': response})
                data = {'response': response, 'labels': labels, **data}
                result_list.append(data)
                if self.jsonl_writer:
                    self.jsonl_writer.append(data)
        else:
            if args.rank >= 0 and args.global_world_size > 1:
                val_dataset = val_dataset.shard(args.global_world_size, args.rank, contiguous=True)
            val_dataset = list(val_dataset)
            labels_list = []
            for data in val_dataset:
                if args.task_type == 'causal_lm':
                    labels = InferRequest.remove_response(data['messages'])
                else:
                    labels = data.pop('label', None)
                    if labels is not None:
                        labels = str(int(labels))
                labels_list.append(labels)

            resp_list = self.infer(
                val_dataset, request_config, template=self.template, use_tqdm=True, **self.infer_kwargs)
        
            new_requests = []
            for data, resp, labels in tqdm(zip(val_dataset, resp_list, labels_list)):
                response = resp.choices[0].message.content

                new_requests.append({
                    "messages": [
                        {"role": "system", "content": "You are a helpful assistant."},
                        {"role": "user", "content": f"{response} \n {data["messages"][1]["content"].split("OBSERVATION: \n\n ")[-1]}"}
                    ]
                })
            
            responses = self.frozen_model.infer(new_requests, request_config)
            responses = [x.choices[0].message.content for x in responses]

            #import pdb; pdb.set_trace()

            for data, response, labels  in zip(val_dataset, responses, labels_list):
                data['messages'].append({'role': 'assistant', 'content': response})
                #data = {'response': response, 'labels': labels, 'logprobs': resp.choices[0].logprobs, **data}
                data = {'response': response, 'labels': labels, **data}
                result_list.append(data) 
            
            if self.jsonl_writer:
                self.jsonl_writer.append(result_list, gather_obj=True)
        metrics = self.infer_kwargs.pop('metrics')
        print(f'[rank{args.rank}] {metrics[0].compute()}')
        args.metric = "acc"
        self._calc_metric()
        return result_list


def infer_main(args: Union[List[str], InferArguments, None] = None):
    return SwiftInfer(args).main()
