# python eval.py flores-latin deepseek-v3-0324;
# python eval.py flores-latin doubao-1-6;
# python eval.py flores-latin grok-4;
# python eval.py flores-latin qwen3-coder;
# python eval.py flores-latin gpt-5-mini;
# python eval.py flores-latin glm-4.5;
# python eval.py flores-latin glm-4.5-air;

# python eval.py humaneval gpt-5-nano;
# python eval.py humaneval deepseek-v3-0324;
# python eval.py humaneval doubao-1-6;
# python eval.py humaneval grok-4;

# TOK_PATH='./cs_gate_train/models/OLMo-2-0325-32B-Instruct' python eval.py flores-latin olmo-32b-oss-norm;
# TOK_PATH='./cs_gate_train/models/OLMo-2-0325-32B-Instruct' python eval.py flores-latin olmo-32b-oss-nonorm;
# TOK_PATH='/cpfs01/user/jiawei.lyt/ckpt/verl_checkpoints/lyt-rl-gen/qwen3-tpp-thinking-fh0723-mkd035-distilled-data0706-recitex1-bothtrans-mixlangx2-GenRM-32B-sentcs-GSPO-ref-turbopp-THINK-FLIP1-2.4-LENGTH_FLIP_THRESHOLD1.3-LENGTH_FLIP_PROB0.75-REF_ANSWER_POSITION-A-expert-12k_bs512_minibs128_n8/global_step_90/actor_hf' python eval.py humaneval 30b-think-nogate;

# TOK_PATH='/cpfs01/user/jiawei.lyt/ckpt/verl_checkpoints/lyt-rl-gen/qwen3-tpp-thinking-fh0723-mkd035-distilled-data0706-recitex1-bothtrans-mixlangx2-GenRM-32B-sentcs-GSPO-ref-turbopp-THINK-FLIP1-2.4-LENGTH_FLIP_THRESHOLD1.3-LENGTH_FLIP_PROB0.75-REF_ANSWER_POSITION-A-expert-12k_bs512_minibs128_n8/global_step_90/actor_hf' python eval.py flores-latin 30b-gate-oss-norm





# USE_INTERVENTION_PROMPT='true' TOK_PATH='./cs_gate_train/models/llama3-8b' python eval.py flores-latin llama-8b-nogate
# USE_INTERVENTION_PROMPT='true' TOK_PATH='./cs_gate_train/models/gate-gemma3-12b-20k_95p_2025-08-26-02:18:32_plugged' python eval.py flores-latin gemma-12b-nogate


# # set this for gptoss eval
# export VLLM_USE_V1=1
# export HF_ENDPOINT=https://hf-mirror.com

# TOK_PATH='./cs_gate_train/models/gpt-oss-20b' python eval.py humaneval gpt-oss-20b-norm > ./logs/gpt-oss-20b-norm-humaneval.log
# TOK_PATH='./cs_gate_train/models/gpt-oss-20b' python eval.py humaneval gpt-oss-20b-nogate-local > ./logs/gpt-oss-20b-norm-humaneval.log
# TOK_PATH='./cs_gate_train/models/gpt-oss-20b' python eval.py humaneval gpt-oss-20b-nonorm > ./logs/gpt-oss-20b-nonorm-humaneval.log

# TOK_PATH='./cs_gate_train/models/gpt-oss-20b' python eval.py humaneval gpt-oss-20b-norm > ./logs/gpt-oss-20b-norm-humaneval.log
# TOK_PATH='./cs_gate_train/models/gpt-oss-20b' python eval.py humaneval gpt-oss-20b-nogate-local > ./logs/gpt-oss-20b-norm-humaneval.log
# TOK_PATH='./cs_gate_train/models/gpt-oss-20b' python eval.py humaneval gpt-oss-20b-nonorm > ./logs/gpt-oss-20b-nonorm-humaneval.log

# TOK_PATH='./cs_gate_train/models/gpt-oss-20b' python eval.py humaneval gpt-oss-20b-norm > ./logs/gpt-oss-20b-norm-humaneval.log
# TOK_PATH='./cs_gate_train/models/gpt-oss-20b' python eval.py humaneval gpt-oss-20b-nogate-local > ./logs/gpt-oss-20b-norm-humaneval.log
# TOK_PATH='./cs_gate_train/models/gpt-oss-20b' python eval.py humaneval gpt-oss-20b-nonorm > ./logs/gpt-oss-20b-nonorm-humaneval.log


# TOK_PATH='./models/qwen3-8b' python eval.py include qwen3-8b-nogate;
# TOK_PATH='./models/qwen3-8b' python eval.py include qwen3-8b-nonorm;
# TOK_PATH='./models/qwen3-8b' python eval.py include qwen3-8b-norm;

# TOK_PATH='/cpfs01/user/jiawei.lyt/ckpt/verl_checkpoints/lyt-rl-gen/qwen3-tpp-nothink-0721-distilled-data0706-recitex1-bothtrans-mixlangx2-GenRM-32B-sentcs-GSPO-ref-turbopp-LENGTH_FLIP_THRESHOLD1.3-LENGTH_FLIP_PROB0.75-REF_ANSWER_POSITION-A-expert-12k_bs512_minibs128_n8/global_step_60/actor_hf' python eval.py include 30b-nogate;
TOK_PATH='/cpfs01/user/jiawei.lyt/ckpt/verl_checkpoints/lyt-rl-gen/qwen3-tpp-nothink-0721-distilled-data0706-recitex1-bothtrans-mixlangx2-GenRM-32B-sentcs-GSPO-ref-turbopp-LENGTH_FLIP_THRESHOLD1.3-LENGTH_FLIP_PROB0.75-REF_ANSWER_POSITION-A-expert-12k_bs512_minibs128_n8/global_step_60/actor_hf' python eval.py include 30b-gate-oss;
TOK_PATH='/cpfs01/user/jiawei.lyt/ckpt/verl_checkpoints/lyt-rl-gen/qwen3-tpp-nothink-0721-distilled-data0706-recitex1-bothtrans-mixlangx2-GenRM-32B-sentcs-GSPO-ref-turbopp-LENGTH_FLIP_THRESHOLD1.3-LENGTH_FLIP_PROB0.75-REF_ANSWER_POSITION-A-expert-12k_bs512_minibs128_n8/global_step_60/actor_hf' python eval.py include 30b-gate-oss-norm;

# TOK_PATH='./cs_gate_train/models/llama3-8b' python eval.py include llama-8b-nogate
# TOK_PATH='./cs_gate_train/models/llama3-8b' python eval.py include llama-8b-oss
# TOK_PATH='./cs_gate_train/models/llama3-8b' python eval.py include llama-8b-oss_nonorm

# TOK_PATH='./cs_gate_train/models/gemma3-12b' python eval.py include gemma-12b-nogate
# TOK_PATH='./cs_gate_train/models/gemma3-12b' python eval.py include gemma-12b-oss
# TOK_PATH='./cs_gate_train/models/gemma3-12b' python eval.py include gemma-12b-oss-nonorm