# coding: utf-8
import json
import sys
sys.path.append('./')
from dataset import *
from evaluate import *
from prompts.focus.prompt import *
from tools.utils import *
from tools.retriever import *
from app.sample_resp_cralwer import SampleRespCrawler
from app.chatgpt_resp_cralwer import ChatgptRespCrawler
from app.huggingface_resp_crawler import HuggingFaceRespCrawlerBase

class CoT:
    def __init__(self, key_path, dataset, model_type="chatgpt", model_path="", language="", setting="zero-shot",
            temperature=0.7, persona="", top_p=0.95):
        self.key_path = key_path
        self.dataset = dataset
        self.model_type = model_type
        self.model_path = model_path
        self.langugae = language
        self.token_unit_price = get_token_unit_price(model_type)

        self.setting = setting
        if self.dataset == "hotpotqa":
            data_paths = ["./dataset/hotpotQA/hotpot_dev_distractor_v1.json"]
            demo_path = "./cot_retrieval/config/prompt_en.json"
            self.prompt_constructor = HotpotQA(data_paths, demo_path=demo_path, mode="dev")
        elif self.dataset == "focus":
            data_paths = ["./dataset/FoCus/valid_focus.json"]
            self.prompt_constructor = FoCus(data_paths, mode="dev")
        
        self.retriever = Retriever("bm25")
        self.call_retrieval_times, self.persona_right_count, self.knowledge_right_count = 0, 0, 0
        self.init_model_type(temperature, persona, top_p, model_type)
    
    def init_model_type(self, temperature, persona, top_p, model):
        if self.model_type in ["gpt-3.5-turbo", "gpt-3.5-turbo-0613", "text-davinci-003", "gpt-4-0613"]:
            self.sample_crawler = ChatgptRespCrawler(self.key_path, temperature, persona=persona, top_p=top_p, model=model)
        else:
            self.sample_crawler = HuggingFaceRespCrawlerBase(self.model_type, self.model_path, top_p=top_p, temperature=temperature)
    
    def retrieve_external_knowledge(self, query, knowledge_bases, number_results):
        top_indexes, retrieved_res = self.retriever.retrieve_top_n(query, knowledge_bases, number=number_results)
        self.call_retrieval_times += 1
        return top_indexes, retrieved_res
    
    def persona_retrieval(self):
        # init dialogue, persona_cands, persona_indexes
        dialogue = self.cache["dialogue"]
        persona_cands = self.cache["persona_cands"]
        persona_indexes = self.cache["persona_grounding_indexes"]

        if "knowledge_retriever:output" in self.cache:
            retrieved_p_index, retrieved_persona = self.retrieve_external_knowledge(dialogue + " " + self.cache["knowledge_retriever:output"], persona_cands, 1)
        else:
            retrieved_p_index, retrieved_persona = self.retrieve_external_knowledge(dialogue, persona_cands, 1)
        
        if len(list(set(retrieved_p_index) & set(persona_indexes))) > 0:
            self.persona_right_count += 1
        
        # update the cache
        self.cache["persona_retriever:output"] = " ".join(retrieved_persona)
        return retrieved_persona
    
    def knowledge_retrieval(self):
        # init dialogue, knowledge_cands, knowledge_index
        dialogue = self.cache["dialogue"]
        knowledge_cands = self.cache["knowledge_cands"]
        knowledge_grounding_indexes = self.cache["knowledge_grounding_indexes"]

        if "persona_retriever:output" in self.cache:
            retrieved_k_index, retrieved_knowledge = self.retrieve_external_knowledge(dialogue + " " + self.cache["persona_retriever:output"], knowledge_cands, 1)
        else:
            retrieved_k_index, retrieved_knowledge = self.retrieve_external_knowledge(dialogue, knowledge_cands, 1)
        
        if retrieved_k_index.tolist()[0] == knowledge_grounding_indexes:
            self.knowledge_right_count += 1
        
        # update the cache
        self.cache["knowledge_retriever:output"] = " ".join(retrieved_knowledge)
        return retrieved_knowledge
    
    def answer_generator(self):
        # init the dialogue
        dialogue = self.cache["dialogue"]
        knowledge = self.cache["knowledge_retriever:output"]
        if "persona_retriever:output" in self.cache:
            persona = self.cache["persona_retriever:output"]
            retrieved_knowledge = f"PERSONA: {persona}\nKNOWLEDGE: {knowledge}"
        else:
            retrieved_knowledge = f"KNOWLEDGE: {knowledge}"

        test_prompt = f"Dialogue: {dialogue}\n\nRetrieved knowledge from different sources:\n {retrieved_knowledge}\n\nResponse: "
        full_prompt = chameleon_response_prompt + "\n\n" + test_prompt # full prompt

        generations = self.sample_crawler.call_openai_each(full_prompt)
        total_tokens, generations = get_response_according_to_model_type(generations, self.model_type)

        # update the cache
        self.cache["generations"] = generations
        self.cache["prices"] = total_tokens * self.token_unit_price
        return generations

    def predict_responses(self):
        # init the evaluation metrics
        count, rl_scores, f1_scores, avg_bleu_scores = 0, [], [], []
        prices, evaluation_elo = [], []

        for sample in self.prompt_constructor.examples[:200]:
            count += 1 # the number of current results
            dialogue = sample["context"]

            # init cache
            self.cache = {}
            self.cache["prices"] = 0
            self.cache["dialogue"] = sample["context"]
            self.cache["response"] = sample["response"]

            # persona and knowledge cands
            self.cache["persona_cands"] = sample["persona_cands"]
            self.cache["knowledge_cands"] = sample["knowledge_cands"]

            # grounded persona index
            self.cache["persona_grounding_indexes"] = [i for i in range(len(sample["persona_grounding"])) if sample["persona_grounding"][i] == True]
            self.cache["knowledge_grounding_indexes"] = sample["knowledge_index"]

            # [1] Predict the modules
            modules = ["persona_retrieval", "knowledge_retrieval", "answer_generator"]
            modules = [f"self.{module}" for module in modules]
            
            # [2] Execute the modules 
            for module in modules:
                output = eval(module)() # eval the module and update the cache
                if count < 4:
                    print(f"======== [Module]: {module} ========\n")
                    # print(f"# [Input]\n{input}\n")
                    print(f"# [Output]\n{output}\n")

            # [3] Evaluate the results
            # normalize the number in the text
            generations = self.cache["generations"]
            prices.append(self.cache["prices"])
            f1_scores.append(f1(generations, self.cache["response"]))
            rl_scores.append(rl(generations, self.cache["response"]))
            avg_bleu_scores.append(avg_bleu(generations, self.cache["response"]))

            evaluation_elo.append({
                "context": sample["context"],
                "generation": generations
            })
        
        # save the final evaluation results
        f1_score, rl_score, avg_bleu_score = np.mean(f1_scores), np.mean(rl_scores), np.mean(avg_bleu_scores)
        result = {
            "method": "cot",
            "model_type": self.model_type,
            "retriever_type": self.retriever.model_name,
            "persona_right_count": self.persona_right_count,
            "knowledge_right_count": self.knowledge_right_count,
            "call_retrieval_times": self.call_retrieval_times,
            "f1_score": f1_score,
            "rl_score": rl_score,
            "avg_bleu_score": avg_bleu_score,
            "price": sum(prices)
        }

        # dialogue as query: 2 30
        # change order of prompts: 2 33
        with open("./exp_output/" + self.dataset + "/" + self.model_type + "_cot_result.json", "w", encoding="utf-8") as f:
            json.dump(result, f, indent=4, ensure_ascii=False)
        
        with open("./response_output/" + self.dataset + "/" + self.model_type + "_cot_result.json", "w", encoding="utf-8") as f:
            json.dump(evaluation_elo, f, indent=4, ensure_ascii=False)

class Chameleon:
    def __init__(self, key_path, dataset, model_type="chatgpt", model_path="", language="", setting="zero-shot",
            temperature=0.7, persona="", top_p=0.95):
        self.key_path = key_path
        self.dataset = dataset
        self.model_type = model_type
        self.model_path = model_path
        self.langugae = language
        self.token_unit_price = get_token_unit_price(model_type)
        
        self.setting = setting
        if self.dataset == "hotpotqa":
            data_paths = ["./dataset/hotpotQA/hotpot_dev_distractor_v1.json"]
            demo_path = "./cot_retrieval/config/prompt_en.json"
            self.prompt_constructor = HotpotQA(data_paths, demo_path=demo_path, mode="dev")
        elif self.dataset == "focus":
            data_paths = ["./dataset/FoCus/valid_focus.json"]
            self.prompt_constructor = FoCus(data_paths, mode="dev")
        
        self.retriever = Retriever("bm25")
        self.call_retrieval_times, self.persona_right_count, self.knowledge_right_count = 0, 0, 0
        self.init_model_type(temperature, persona, top_p, model_type)
    
    def init_model_type(self, temperature, persona, top_p, model):
        if self.model_type in ["gpt-3.5-turbo", "gpt-3.5-turbo-0613", "text-davinci-003"]:
            self.sample_crawler = ChatgptRespCrawler(self.key_path, temperature, persona=persona, top_p=top_p, model=model)
        else:
            self.sample_crawler = HuggingFaceRespCrawlerBase(self.model_type, self.model_path, top_p=top_p, temperature=temperature)
    
    def retrieve_external_knowledge(self, query, knowledge_bases, number_results):
        top_indexes, retrieved_res = self.retriever.retrieve_top_n(query, knowledge_bases, number=number_results)
        self.call_retrieval_times += 1
        return top_indexes, retrieved_res
    
    def persona_retrieval(self):
        # init dialogue, persona_cands, persona_indexes
        dialogue = self.cache["dialogue"]
        persona_cands = self.cache["persona_cands"]
        persona_indexes = self.cache["persona_grounding_indexes"]

        if "knowledge_retriever:output" in self.cache:
            retrieved_p_index, retrieved_persona = self.retrieve_external_knowledge(dialogue + " " + self.cache["knowledge_retriever:output"], persona_cands, 1)
        else:
            retrieved_p_index, retrieved_persona = self.retrieve_external_knowledge(dialogue, persona_cands, 1)
        
        if len(list(set(retrieved_p_index) & set(persona_indexes))) > 0:
            self.persona_right_count += 1
        
        # update the cache
        self.cache["persona_retriever:output"] = " ".join(retrieved_persona)
        return retrieved_persona
    
    def knowledge_retrieval(self):
        # init dialogue, knowledge_cands, knowledge_index
        dialogue = self.cache["dialogue"]
        knowledge_cands = self.cache["knowledge_cands"]
        knowledge_grounding_indexes = self.cache["knowledge_grounding_indexes"]

        if "persona_retriever:output" in self.cache:
            retrieved_k_index, retrieved_knowledge = self.retrieve_external_knowledge(dialogue + " " + self.cache["persona_retriever:output"], knowledge_cands, 1)
        else:
            retrieved_k_index, retrieved_knowledge = self.retrieve_external_knowledge(dialogue, knowledge_cands, 1)
        
        if retrieved_k_index.tolist()[0] == knowledge_grounding_indexes:
            self.knowledge_right_count += 1
        
        # update the cache
        self.cache["knowledge_retriever:output"] = " ".join(retrieved_knowledge)
        return retrieved_knowledge
    
    def answer_generator(self):
        # init the dialogue
        dialogue = self.cache["dialogue"]
        knowledge = self.cache["knowledge_retriever:output"]
        if "persona_retriever:output" in self.cache:
            persona = self.cache["persona_retriever:output"]
            retrieved_knowledge = f"PERSONA: {persona}\nKNOWLEDGE: {knowledge}"
        else:
            retrieved_knowledge = f"KNOWLEDGE: {knowledge}"

        test_prompt = f"Dialogue: {dialogue}\n\nRetrieved knowledge from different sources:\n {retrieved_knowledge}\n\nResponse: "
        full_prompt = chameleon_response_prompt + "\n\n" + test_prompt # full prompt

        generations = self.sample_crawler.call_openai_each(full_prompt)
        total_tokens, generations = get_response_according_to_model_type(generations, self.model_type)

        # update the cache
        self.cache["generations"] = generations
        self.cache["prices"] += total_tokens * self.token_unit_price
        return generations
    
    def predict_modules(self, prompt):
        default_modules = ["persona_retrieval", "knowledge_retrieval", "answer_generator"]

        modules = self.sample_crawler.call_openai_each(prompt)
        total_tokens, modules = get_response_according_to_model_type(modules, self.model_type)
        try:
            modules = eval(modules.lower().strip())
            assert modules[-1] == "answer_generator"
            assert "knowledge_retrieval" in modules
        except:
            modules = default_modules

        self.cache["prices"] += total_tokens * self.token_unit_price
        return modules

    def predict_responses(self):
        # init the evaluation metrics
        count, rl_scores, f1_scores, avg_bleu_scores = 0, [], [], []
        prices, evaluation_elo = [], []

        for sample in self.prompt_constructor.examples[:200]:
            count += 1 # the number of current results
            dialogue = sample["context"]

            # init cache
            self.cache = {}
            self.cache["prices"] = 0
            self.cache["dialogue"] = sample["context"]
            self.cache["response"] = sample["response"]

            # persona and knowledge cands
            self.cache["persona_cands"] = sample["persona_cands"]
            self.cache["knowledge_cands"] = sample["knowledge_cands"]

            # grounded persona index
            self.cache["persona_grounding_indexes"] = [i for i in range(len(sample["persona_grounding"])) if sample["persona_grounding"][i] == True]
            self.cache["knowledge_grounding_indexes"] = sample["knowledge_index"]

            # [1] Predict the modules
            test_prompt = f"Dialogue: {dialogue}\n\nModules: "
            full_prompt = chameleon_modules_prompt + "\n\n" + test_prompt # full prompt

            modules = self.predict_modules(full_prompt)
            modules = [f"self.{module}" for module in modules]
            
            # [2] Execute the modules 
            for module in modules:
                output = eval(module)() # eval the module and update the cache
                if count < 4:
                    print(f"======== [Module]: {module} ========\n")
                    # print(f"# [Input]\n{input}\n")
                    print(f"# [Output]\n{output}\n")

            # [3] Evaluate the results
            # normalize the number in the text
            generations = self.cache["generations"]
            prices.append(self.cache["prices"])
            f1_scores.append(f1(generations, self.cache["response"]))
            rl_scores.append(rl(generations, self.cache["response"]))
            avg_bleu_scores.append(avg_bleu(generations, self.cache["response"]))

            evaluation_elo.append({
                "context": sample["context"],
                "generation": generations
            })
        
        # save the final evaluation results
        f1_score, rl_score, avg_bleu_score = np.mean(f1_scores), np.mean(rl_scores), np.mean(avg_bleu_scores)
        result = {
            "method": "chameleon",
            "model_type": self.model_type,
            "retriever_type": self.retriever.model_name,
            "persona_right_count": self.persona_right_count,
            "knowledge_right_count": self.knowledge_right_count,
            "call_retrieval_times": self.call_retrieval_times,
            "f1_score": f1_score,
            "rl_score": rl_score,
            "avg_bleu_score": avg_bleu_score,
            "price": sum(prices)
        }
        # dialogue as query: 2 30
        # change order of prompts: 2 33
        with open("./exp_output/focus/chameleon_result.json", "w", encoding="utf-8") as f:
            json.dump(result, f, indent=4)

        with open("./response_output/" + self.dataset + "/" + self.model_type + "_chameleon_result.json", "w", encoding="utf-8") as f:
            json.dump(evaluation_elo, f, indent=4, ensure_ascii=False)