import logging
import os
import random
import time

import openai
import backoff 
from transformers import GPT2Tokenizer


completion_tokens = prompt_tokens = 0
MAX_TOKENS = 15000
# tokenizer = GPT2Tokenizer.from_pretrained('gpt2-medium')
tokenizer = ""


# api_key = os.getenv("OPENAI_API_KEY", "")
# if api_key != "":
#     openai.api_key = api_key
# else:
#     print("Warning: OPENAI_API_KEY is not set")
#
# api_base = os.getenv("OPENAI_API_BASE", "")
# if api_base != "":
#     print("Warning: OPENAI_API_BASE is set to {}".format(api_base))
#     openai.api_base = api_base

openai.api_base = "" #TODO Replace with your API and key
openai.api_key = "no"



@backoff.on_exception(backoff.expo, openai.error.OpenAIError,max_tries=5,max_time=10)
def completions_with_backoff(**kwargs):
    return openai.ChatCompletion.create(**kwargs)


#evaluation reflection
def gpt3(prompt, model="Meta-Llama-3.1-8B-Instruct", temperature=0, max_tokens=100, n=1, stop=None) -> list:
    from run import llm_manager
    messages = [{"role": "user", "content": prompt}]
    global completion_tokens, prompt_tokens
    outputs = []
    while n > 0:
        cnt = min(n, 20)
        n -= cnt
        for i in range(cnt):
            try:
                # models = ["Qwen2.5-7B-Instruct-1M" ,"Meta-Llama-3.1-8B-Instruct"  ,"Mistral-7B-Instruct-v0.3"]  # Meta-Llama-3.1-8B-Instruct ,"DeepSeek-R1-Distill-Qwen-7B"  "Qwen2.5-7B-Instruct-1M",  ,"Mistral-7B-Instruct-v0.3"
                # selected_model = random.choice(models)

                selected_model = llm_manager.call_llm()

                print(f"current model is :{selected_model}")
                # models = ["qwen-max", "deepseek-v3", "gpt-4"]

                # selected_model = random.choice(models)
                # selected_model = "Qwen2.5-7B-Instruct-1M"
                res = completions_with_backoff(model=selected_model, messages=messages, temperature=temperature,
                                               max_tokens=200, n=1, stop=stop)
                outputs.extend([choice["message"]["content"] for choice in res["choices"]])

                text_temp = [choice["message"]["content"] for choice in res["choices"]]



                # log completion tokens
                completion_tokens += res["usage"]["completion_tokens"]
                prompt_tokens += res["usage"]["prompt_tokens"]
                logging.info(f"model : {selected_model},response ：{text_temp}")
                logging.info(f"completion_tokens：{completion_tokens}，prompt_tokens：{prompt_tokens}")
            except openai.error.OpenAIError as e:
                print(f"API failed: {e}")
                time.sleep(5)
    return outputs
#
def gpt(prompt, model="gpt-4", temperature=0.2, max_tokens=100, n=1, stop=None) -> list:
    # if model == "test-davinci-002":
    #     return gpt3(prompt, model, temperature, max_tokens, n, stop)
    # else:
    messages = [{"role": "user", "content": prompt}]
    return chatgpt(messages, model=model, temperature=temperature, max_tokens=max_tokens, n=n, stop=stop)
#
def gpt4(prompt, model="gpt-4", temperature=0.2, max_tokens=100, n=1, stop=None) -> list:
#     if model == "test-davinci-002":
#         return gpt3(prompt, model, temperature, max_tokens, n, stop)
#     else:
    messages = [{"role": "user", "content": prompt}]
    return chatgpt(messages, model=model, temperature=temperature, max_tokens=max_tokens, n=n, stop=stop)
#
def chatgpt(messages, model="gpt-3.5-turbo-16k", temperature=0.2, max_tokens=100, n=1, stop=None) -> list:
    from run import llm_manager
    global completion_tokens, prompt_tokens
    outputs = []
    while n > 0:
        cnt = min(n, 20)
        n -= cnt

        # models2 = ["qwen-max", "deepseek-v3", "gpt-4"]
        for i in range(cnt):
            try:# "Meta-Llama-3.1-8B-Instruct",
                # models = ["Qwen2.5-7B-Instruct-1M" ,"Meta-Llama-3.1-8B-Instruct"  ,"Mistral-7B-Instruct-v0.3"]  # Meta-Llama-3.1-8B-Instruct ,"DeepSeek-R1-Distill-Qwen-7B"  "Qwen2.5-7B-Instruct-1M",  ,"Mistral-7B-Instruct-v0.3"
                # selected_model = random.choice(models)

                selected_model = llm_manager.call_llm()

                print(f"cuurently model is{selected_model}")
                res = completions_with_backoff(model=selected_model, messages=messages, temperature=temperature, max_tokens=max_tokens, n=1, stop=stop)

                temp_list = []
                temp_list.append([choice["message"]["content"] for choice in res["choices"]])
                temp_list.append(selected_model)
                outputs.append(temp_list)

                # outputs.extend([choice["message"]["content"] for choice in res["choices"]])
                text_temp = [choice["message"]["content"] for choice in res["choices"]]

                # log completion tokens
                completion_tokens += res["usage"]["completion_tokens"]
                prompt_tokens += res["usage"]["prompt_tokens"]
                logging.info(f"cuurent model {selected_model},response：{text_temp}")
                logging.info(f"completion_tokens：{completion_tokens}，prompt_tokens：{prompt_tokens}")
            except openai.error.OpenAIError as e:
                print(f"API failed: {e}")
                time.sleep(5)
    logging.info(f"{llm_manager.llm_stats}")
    print(f"llm_manager status：{llm_manager.llm_stats}")
    return outputs
    
def gpt_usage(backend="gpt-4"):
    global completion_tokens, prompt_tokens
    # if backend == "gpt-4":
    #     cost = completion_tokens / 1000 * 0.06 + prompt_tokens / 1000 * 0.03
    # elif backend == "gpt-3.5-turbo":
    #     cost = completion_tokens / 1000 * 0.002 + prompt_tokens / 1000 * 0.0015
    # elif backend == "gpt-3.5-turbo-16k":
    #     cost = completion_tokens / 1000 * 0.004 + prompt_tokens / 1000 * 0.003
    return {"completion_tokens": completion_tokens, "prompt_tokens": prompt_tokens}
