# from .utils import Beam, generate_k_steps
from vllm import LLM, SamplingParams

llm = LLM(model="meta-llama/Llama-3.2-1B-Instruct")
tokenizer = llm.get_tokenizer()
g: int =3

def count_tokens(text, tokenizer=tokenizer):
    tokenized = tokenizer(text, return_tensors="pt")
    return tokenized.input_ids.shape[1]

sampling_params = SamplingParams(
    temperature=0.8,
    # use smaller max_tokens to avoid generating too long outputs
    max_tokens=100,
    top_p=1.0,
    # stop=["\n\n"],
    include_stop_str_in_output=True,
    n=1,
)

prompts = ["Convert the point $(0,3)$ in rectangular coordinates to polar coordinates. Enter your answer in the form $(r,\\theta),$ where $r > 0$ and $0 \\le \\theta < 2 \\pi.$ Do it step by step."]

outputs = llm.generate(
    prompts,
    sampling_params=sampling_params
)

for output in outputs:
    text = output.outputs[0].text
    print("=== Prompt ===")
    print(prompts)
    print("=== Full Generated Text ===")
    print(text)
    print(f"=== Number of tokens: {count_tokens(text)} ===")

    print("=== Truncated after g '\\n\\n' ===")
    parts = text.split("\n\n")
    if len(parts) >= g:
        truncated_text = "\n\n".join(parts[:g])
    else:
        truncated_text = text  
    print(truncated_text)
    print(f"=== Number of tokens: {count_tokens(truncated_text)} ===")
    print("=== End ===")
