import requests
import os

from sglang import assistant_begin, assistant_end
from sglang import assistant, function, gen, system, user
from sglang import image
from sglang import RuntimeEndpoint, set_default_backend
from sglang.srt.utils import load_image
from sglang.test.test_utils import is_in_ci
from sglang.utils import print_highlight, terminate_process, wait_for_server


port=xxx


import json
with open('xxx.jsonl', 'r') as f:
    data = f.readlines()
data = [json.loads(x) for x in data]


import requests
import json

def req(text):
    response = requests.post(
        f"http://localhost:{port}/generate",
        json={
            # "text": "The capital of France is ",
            "text": text,
            "sampling_params": {
                "temperature": 0,
                "max_new_tokens": 1,
            },
            "return_logprob": True,
            "top_logprobs_num": 1,
            "logprob_start_len": 0,
            "return_text_in_logprobs": True
        },
    )
    return response.json()
# print("Prompt Logprobs: ", response.json()["meta_info"]["input_token_logprobs"])
# print("Output Logprobs: ", response.json()["meta_info"]["output_token_logprobs"])
# print(response.json())

from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("Qwen3-8B")


with open('xxx.jsonl', 'w') as f:
    for x in data:
        text = tokenizer.apply_chat_template(
            #conversation=data['messages'],
            conversation = x['messages'],
            tokenize=False,
            add_generation_prompt=False,
            enable_thinking=True,
        )

        response_json = req(text)
        f.write(json.dumps(response_json, ensure_ascii=False) + '\n')
        # print(text)