from openai import OpenAI
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm
import os

max_tokens = int(os.environ.get("MAX_TOKENS", 8192))
max_workers = int(os.environ.get("MAX_WORKERS", 8))
api_key = os.environ.get("API_KEY")
base_url = os.environ.get("BASE_URL")
model = os.environ.get("MODEL")

def get_response(params):
    message, api_key, base_url, model, enable_thinking = params
    client = OpenAI(
        api_key=api_key,
        base_url=base_url,
    )
    completion = client.chat.completions.create(
        model=model,
        messages=message,
        temperature=0.0,
        max_tokens=max_tokens,
        top_p=1,
        extra_body={"chat_template_kwargs": {"enable_thinking": enable_thinking}},
        timeout=10*60*60,
    )
    message = completion.choices[0].message
    message = message.dict()
    if "reasoning_content" in message and message["reasoning_content"] not in ["", None]:
        return str(message["reasoning_content"]) + "</think>" + str(message["content"])
    else:
        return completion.choices[0].message.content

def get_response_list(messages, enable_thinking=False):
    params_list = [
        (message, api_key, base_url, model, enable_thinking)
        for message in messages
    ]
    response_list = []
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        for response in tqdm(executor.map(get_response, params_list), total=len(params_list)):
            response_list.append(response)
    return response_list