import pandas as pd
from vllm import LLM, SamplingParams
from transformers import AutoTokenizer
from tqdm import tqdm
import time

model_path = "qwen2.5-14B"

file_path = "mmlu.csv"
df = pd.read_csv(file_path)
questions = df["prompt"].astype(str).tolist()

tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)

def apply_prompt_template(prompt):
    messages = [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": prompt}
    ]
    return tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True,
        enable_thinking=False
    )

prompts = [apply_prompt_template(q) for q in questions]

llm = LLM(model=model_path)

sampling_params = SamplingParams(max_tokens=10)

t0 = time.time()
outputs = llm.generate(prompts, sampling_params)

answers = [out.outputs[0].text.strip() for out in outputs]

df["qwen2.5-14B"] = answers
df.to_csv(file_path.replace(".csv", "_qwen2.5-14B.csv"), index=False)

