import logging
import os
import random
import time

import openai
import backoff 
from transformers import GPT2Tokenizer
import warnings



completion_tokens = prompt_tokens = 0
MAX_TOKENS = 4000

# tokenizer = GPT2Tokenizer.from_pretrained('gpt2-medium')


# openai.api_base = "http://localhost:8000/v1"
# openai.api_base = "http:///v1"
# openai.api_base = "https:///v1"
# openai.api_key = ""


openai.api_base = "http://113.55.4.83:17712/v1"
openai.api_key = ""



# from transformers import BertTokenizer
#
# # 指定缓存目录
# cache_dir = "/usr/local/share/pretrained-llm/BertTokenizer"
#
# # 从本地缓存目录加载模型
# tokenizer = BertTokenizer.from_pretrained(cache_dir)

def tokens_in_text(text):
    """
    Accurately count the number of tokens in a string using the GPT-2 tokenizer.
    
    :param text: The input text.
    :return: The exact number of tokens in the text.
    """
    # with warnings.catch_warnings():
    #     warnings.filterwarnings("ignore", category=UserWarning)
    #     tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
    #     tokens = tokenizer.encode(text)
    return 712

# api_key = os.getenv("OPENAI_API_KEY", "")
# if api_key != "":
#     openai.api_key = api_key
# else:
#     print("Warning: OPENAI_API_KEY is not set")
    
# api_base = os.getenv("OPENAI_API_BASE", "")

#
# if api_base != "":
#     print("Warning: OPENAI_API_BASE is set to {}".format(api_base))
#     openai.api_base = api_base

def gpt3(prompt, model="gpt-3.5-turbo-16k", temperature=0, max_tokens=100, n=1, stop=None) -> list:
    from run import llm_manager
    print("evaluation reflection")
    messages = [{"role": "user", "content": prompt}]
    global completion_tokens, prompt_tokens
    outputs = []
    while n > 0:
        cnt = min(n, 20)
        n -= cnt
        for i in range(cnt):
            try:
                # models = ["Qwen2.5-7B-Instruct-1M" ,"Meta-Llama-3.1-8B-Instruct"  ,"Mistral-7B-Instruct-v0.3"]  # Meta-Llama-3.1-8B-Instruct ,"DeepSeek-R1-Distill-Qwen-7B"  "Qwen2.5-7B-Instruct-1M",  ,"Mistral-7B-Instruct-v0.3"
                # selected_model = random.choice(models)

                selected_model = llm_manager.call_llm()

                print(f"model：{selected_model}")
                # models = ["qwen-max", "deepseek-v3", "gpt-4"]

                # selected_model = random.choice(models)
                # selected_model = "Qwen2.5-7B-Instruct-1M"
                res = completions_with_backoff(model=selected_model, messages=messages, temperature=temperature,
                                               max_tokens=200, n=1, stop=stop)
                outputs.extend([choice["message"]["content"] for choice in res["choices"]])

                text_temp = [choice["message"]["content"] for choice in res["choices"]]



                # log completion tokens
                completion_tokens += res["usage"]["completion_tokens"]
                prompt_tokens += res["usage"]["prompt_tokens"]
                logging.info(f"now, model is {selected_model}, response:{text_temp}")
                logging.info(f"completion_tokens：{completion_tokens}，prompt_tokens：{prompt_tokens}")
            except openai.error.OpenAIError as e:
                print(f"API failed: {e}")
                time.sleep(5)
    return outputs


#  "Qwen2.5-14B-Instruct","Qwen2___5-7B-Instruct"


@backoff.on_exception(backoff.expo, openai.error.OpenAIError,max_tries=5,max_time=10)
def completions_with_backoff(**kwargs):
    return openai.ChatCompletion.create(**kwargs)

def gpt(prompt, model="Qwen2.5-7B-Instruct-1M", temperature=0.2, max_tokens=200, n=1, stop=None) -> list: # Qwen2___5-7B-Instruct  Qwen2.5-7B-Instruct-1M  Satori-7B-Round2  deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
    messages = [{"role": "user", "content": prompt}]

    return chatgpt(messages, model=model, temperature=temperature, max_tokens=max_tokens, n=n, stop=stop)
def chatgpt(messages, model="Qwen2.5-14B-Instruct", temperature=0.2, max_tokens=200, n=1, stop=None) -> list:
    global completion_tokens, prompt_tokens
    from run import llm_manager
    # from lats import isClaude
    outputs = []


    while n > 0:
        cnt = min(n, 20)
        n -= cnt
        for i in range(cnt):
            try:
                # models = ["Qwen2.5-7B-Instruct-1M","Meta-Llama-3.1-8B-Instruct","Mistral-7B-Instruct-v0.3"]  # Meta-Llama-3.1-8B-Instruct ,"DeepSeek-R1-Distill-Qwen-7B"  "Qwen2.5-7B-Instruct-1M",  ,"Mistral-7B-Instruct-v0.3"
                #
                # # models = ["qwen-max", "deepseek-v3", "gpt-4"]
                #
                # # models = ["Qwen2.5-7B-Instruct-1M", "Meta-Llama-3.1-8B-Instruct",
                # #           "Mistral-7B-Instruct-v0.3"]  # Meta-Llama-3.1-8B-Instruct ,"DeepSeek-R1-Distill-Qwen-7B"  "Qwen2.5-7B-Instruct-1M",  ,"Mistral-7B-Instruct-v0.3"
                #
                # selected_model = random.choice(models)

                selected_model = llm_manager.call_llm()

                print(f"now model：{selected_model}")
                res = completions_with_backoff(model=selected_model, messages=messages, temperature=temperature,
                                               max_tokens=max_tokens, n=1, stop=stop)
                # time.sleep(0.5)

                temp_list = []
                temp_list.append([choice["message"]["content"] for choice in res["choices"]])
                temp_list.append(selected_model)
                outputs.append(temp_list)

                text_temp = [choice["message"]["content"].strip() for choice in res["choices"]]
                logging.info(f"model:{selected_model},response：{text_temp}")

                completion_tokens += res["usage"]["completion_tokens"]
                prompt_tokens += res["usage"]["prompt_tokens"]

                logging.info(f"completion_tokens：{completion_tokens}，prompt_tokens：{prompt_tokens}")

            except openai.error.OpenAIError as e:
                print(f"API failed: {e}")
                time.sleep(5)  # 出错时暂停 5 秒再重试


        # outputs.extend([choice["message"]["content"] for choice in res["choices"]]) #extend 是 Python 列表对象的一个方法，用于将一个可迭代对象（如列表、元组、集合等）中的所有元素添加到当前列表的末尾。
        # log completion tokens
        # completion_tokens += res["usage"]["completion_tokens"] # LLM生成回复使用的token数
        # prompt_tokens += res["usage"]["prompt_tokens"] # 使用的prompt的token数
    logging.info(f"{llm_manager.llm_stats}")
    print(f"llm_manager status：{llm_manager.llm_stats}")
    return outputs
    
def gpt_usage():
    global completion_tokens, prompt_tokens
    # if backend == "gpt-4":
    #     cost = completion_tokens / 1000 * 0.06 + prompt_tokens / 1000 * 0.03
    # elif backend == "gpt-3.5-turbo":
    #     cost = completion_tokens / 1000 * 0.002 + prompt_tokens / 1000 * 0.0015
    # elif backend == "gpt-3.5-turbo-16k":
    #     cost = completion_tokens / 1000 * 0.004 + prompt_tokens / 1000 * 0.003
    # else:
    #     cost = 1
    return {"completion_tokens": completion_tokens, "prompt_tokens": prompt_tokens} # , "cost": cost


# print(gpt("who are you",model="Mistral-7B-Instruct-v0.3",max_tokens=20))
# print(gpt("please help me to make a plan, I want go to the yunnan?",model="Satori-7B-Round2",max_tokens=2048))
# # time.sleep(3)
# print(gpt("who are you?",model="Qwen2.5-7B-Instruct-1M",max_tokens=2048))
# print(gpt("please help me to make a plan, I want go to the yunnan??",model="Qwen2.5-7B-Instruct-1M",max_tokens=2048))
# print(gpt("Please search Wikipedia for information pertaining to the keywords I will provide. Return the relevant search results without including any extra details. Keywords:eastern sector"))
# # time.sleep(3)
# print(gpt("who are you?",model="DeepSeek-R1-Distill-Qwen-7B"))
# print(gpt("please help me to make a plan, I want go to the yunnan??",model="DeepSeek-R1-Distill-Qwen-7B",max_tokens=2048))
# for i in range(10):
#     models = ["Qwen2.5-7B-Instruct-1M", "Satori-7B-Round2", "deepseek-ai/DeepSee"]
#     selected_model = random.choice(models)
#     print(selected_model)Please search Wikipedia for information pertaining to the keywords I will provide. Return the relevant search results without including any extra details. Keywords:eastern sector

# messages = [{"role": "user", "content": "Please search Wikipedia for information pertaining to the keywords I will provide. Return the relevant search results without including any extra details. Keywords:eastern sector"}]
# res = completions_with_backoff(model="DeepSeek-R1-Distill-Qwen-7B", messages=messages, temperature=0.6,
#                                                max_tokens=2000, n=1, stop=None)
# print([choice["message"]["content"].strip() for choice in res["choices"]])