# python3 -m inference_rlhf.code.plot_token_level \
#     policy=qwen-25-7b \
#     user=anonymouspli \
#     task=math \
#     sampling.top_k=128 \
#     plot.tag=qwen-25-3b-hard-100 \
#     plot.betas=[0.0,1.0] \
#     plot.seeds=[0,1,2,3,4,5,6,7,8,9,10,11,12]

# python3 -m inference_rlhf.code.plot_token_level \
#     policy=qwen-25-7b \
#     user=anonymouspli \
#     task=math \
#     sampling.top_k=128 \
#     plot.tag=gpt-4o-mini-hard \
#     plot.betas=[0.0,0.5] \
#     plot.seeds=[0,1,2,3,4]

python3 -m inference_rlhf.code.plot_token_level \
    policy=qwen-25-7b \
    user=anonymouspli \
    task=math \
    sampling.top_k=128 \
    plot.tag=gpt-4o-mini-hard \
    plot.betas=[0.0,0.5,1.0] \
    plot.seeds=[0,1,2,3,4]