cd /mnt/shared-storage-user/p1-shared/wangfuting/codes/project_tts_extrapolation/eval_scripts/analysis
source /mnt/shared-storage-user/p1-shared/wangfuting/miniconda3/bin/activate verl041-test
python plot_eos_prob.py --inputs /mnt/shared-storage-user/p1-shared/wangfuting/codes/project_tts_extrapolation/results_nov/Qwen3-4B-Base-valid-all_32768_test.jsonl --name Qwen3-4B-Base  --cache results/qwen3-4b-base --out plots/qwen3_4b_base_prob_curve.pdf --batch-size 4  --metric stop_by_t

python plot_eos_prob.py --inputs /mnt/shared-storage-user/p1-shared/wangfuting/codes/project_tts_extrapolation/results_dec/grpo-step430-valid-all_32768_test.jsonl --name GRPO-Baseline  --cache results/grpo-step430 --out plots/grpo-430_prob_curve.pdf --batch-size 4 --model /mnt/shared-storage-user/p1-shared/wangfuting/shared/models/verl-041-result/verl-qwen3-4b-oct/baseline-grpo-dapo-math-minibsz32/best_model_four_sets/actor/huggingface --metric stop_by_t

python plot_eos_prob.py --inputs /mnt/shared-storage-user/p1-shared/wangfuting/codes/project_tts_extrapolation/results_dec/gspo-step500-valid-all_32768_test.jsonl --name GSPO-Baseline  --cache results/gspo-step500 --out plots/gspo-500_prob_curve.pdf --batch-size 4 --model /mnt/shared-storage-user/p1-shared/wangfuting/shared/models/verl-041-result/verl-qwen3-4b-oct/baseline-gspo-dapo-math-minibsz32/best_model/actor/huggingface --metric stop_by_t