import re
import os
import sys
import time

import torch
from openai import OpenAI

from openai_keys import OPENAI_API_KEY, OPENAI_BASE_URL
from utils.apply_template import apply_template_vllm
from vllm import SamplingParams

def gen_model_responses(model, tokenizer, args, sysprompt = "You are a helpful assistant.", prompt = ''):
    if model is None or tokenizer is None:
        while True:
            try:
                if 'gpt-4o' in args.model_name:
                    completion = OpenAI(api_key = OPENAI_API_KEY, base_url = OPENAI_BASE_URL).chat.completions.create(
                                        model = "gpt-4o-2024-08-06", 
                                        messages = [
                                            {"role": "system", "content": sysprompt},
                                            {"role": "user", "content": f"{prompt}"}
                                        ],
                                        max_tokens = args.max_new_tokens,
                                        temperature = args.temperature,
                                        top_p = args.top_p,
                                        seed = args.seed 
                                    )
                    return completion.choices[0].message.content
                elif 'deepseek-v3.1' in args.model_name:
                    completion = OpenAI(api_key = OPENAI_API_KEY, base_url = OPENAI_BASE_URL).chat.completions.create(
                                        model = "deepseek-v3.1-think-250821", 
                                        messages = [
                                            {"role": "system", "content": sysprompt},
                                            {"role": "user", "content": f"{prompt}"}
                                        ],
                                        max_tokens = args.max_new_tokens,
                                        temperature = args.temperature,
                                        top_p = args.top_p,
                                        seed = args.seed 
                                    )
                    return completion.choices[0].message.content
                elif 'gemini' in args.model_name:
                    completion = OpenAI(api_key = OPENAI_API_KEY, base_url = OPENAI_BASE_URL).chat.completions.create(
                                        model = "gemini-2.5-flash-thinking", 
                                        messages = [
                                            {"role": "system", "content": sysprompt},
                                            {"role": "user", "content": f"{prompt}"}
                                        ],
                                        max_tokens = args.max_new_tokens,
                                        temperature = args.temperature,
                                        top_p = args.top_p,
                                        seed = args.seed 
                                    )
                    return completion.choices[0].message.content
    
                else:
                    raise ValueError(f"Model {args.model_name} not supported.")
                
            except Exception as e:
                print(e, file=sys.stderr)
                # time.sleep(3)
    
    else: # use opensource model to generate responses
        answer = []
        sampling_params = SamplingParams(temperature=args.temperature, top_p=args.top_p, max_tokens=args.max_new_tokens, seed=args.seed)
        input_text = apply_template_vllm(tokenizer, sysprompt, prompt)
        outputs = model.generate(input_text, sampling_params)
        for output in outputs:
            answer.append(output.outputs[0].text)

        return answer
        