import openai
import asyncio
import requests
import argparse
from openai import OpenAI

async def call_qwen(args, query, instruction=""):
    client = OpenAI(
        base_url=args.openai_api_base,
        api_key =args.openai_api_key,
    )
    
    if instruction:
        instruction = instruction
    else:
        instruction = "You are a helpful assistant."
    
    try:
        messages = [{ "role": "system", "content": instruction},
                    { "role": "user", "content": query}]
        
        
        res = client.chat.completions.create(
                                                model=args.model_name,
                                                messages=messages,
                                                temperature=args.temperature,
                                                max_tokens=args.max_new_tokens,)
        
        return res.choices[0].message.content
    except Exception as e:
        print(e)
        await call_qwen(args, query, instruction="")
        return ""

async def get_args():
    args = argparse.ArgumentParser("Inference Arguments")
    args.add_argument("--openai_api_key", type=str, default="123abc", help="")
    args.add_argument("--openai_api_base", type=str, default="http://localhost:8000/v1/chat/completions", help="port")
    args.add_argument("--model_name", type=str, default="checkpoints/searchqa/superclue-grpo-qwen2.5-14b-instruct-react-entity-bridging/global_step_35/actor/huggingface", help="deploy model name")
    args.add_argument("--max_new_tokens", type=int, default=4096, help="")
    args.add_argument("--temperature",    type=float, default=0.6, help="")
    return args.parse_args()

async def main():
    args = await get_args()
    
    query = """将以下事件按发生的时间顺序排列：\nA.小米su7 ultra正式上市；\nB.小鹏MONA M03正式上市；\nC.2025款吉利银河E8正式上市。"""
    instruction = ""
    reply = await call_qwen(args, query, instruction="")
    from rich import print 
    print("#"*100)
    print(f"query:{query}")
    print(f"reply:{reply}")
    print("\n"*2)


if __name__ == "__main__":
    asyncio.run(main())