import torch 
from transformers import AutoModelForCausalLM ,AutoTokenizer 

ckpt ="/path/to/home/converted_checkpoint/Qwen2.5-7B-Instruct/LLTM-all-numeric-depth-p80_lr_2e-5-minlr_4e-6_GB_64_3epoch/iter_0005916"


SYSTEM_PROMPT ="You are a highly capable assistant. Your task is to estimate the output of the given Python code. The reasoning process and output are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., <think> reasoning process here </think><answer> output here </answer>"

USER_PROMPT ="<code>\ndef f(lst):\n    i = 0\n    new_list = []\n    while i < len(lst):\n        if lst[i] in lst[i+1:]:\n            new_list.append(lst[i])\n            if len(new_list) == 3:\n                return new_list\n        i += 1\n    return new_list\n</code>\n<input>\n[0, 2, 1, 2, 6, 2, 6, 3, 0]\n</input>"


dtype =torch .bfloat16 if torch .cuda .is_available ()else torch .float32 

tok =AutoTokenizer .from_pretrained (ckpt ,use_fast =True ,trust_remote_code =True )
model =AutoModelForCausalLM .from_pretrained (
ckpt ,
torch_dtype =dtype ,
device_map ="auto",
trust_remote_code =True 
).eval ()


messages =[
{"role":"system","content":SYSTEM_PROMPT },
{"role":"user","content":USER_PROMPT },
]
prefix =tok .apply_chat_template (messages ,tokenize =False ,add_generation_prompt =True )
text =prefix +"<think>\n."
enc =tok (text ,return_tensors ="pt").to (model .device )

with torch .no_grad ():
    out =model (**enc ,use_cache =False ,return_dict =True )

    logits =out .logits 


last_logits =logits [:,-1 ,:].float ()
probs =last_logits .softmax (dim =-1 )


topk =10 
top_probs ,top_ids =torch .topk (probs ,k =topk ,dim =-1 )
top_tokens =tok .batch_decode (top_ids [0 ].tolist ())

print ("=== Next-token Top-10 ===")
for rank ,(tid ,p )in enumerate (zip (top_ids [0 ].tolist (),top_probs [0 ].tolist ()),start =1 ):
    print (f"{rank :2d}: id={tid :<7d} p={p :.4f}  token={repr (tok .decode ([tid ]))}")
