prefix='math_500'

CUDA_VISIBLE_DEVICES=0 PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) nohup python Task_3/offline_inference.py --model_path sft_ckpt/qwen38b_type1_sft --input_file eval_data/${prefix}_information_missing_level1_verified_pp.jsonl --output_file Task_3/${prefix}_information_missing_level1_qwen38b_type1_sft.jsonl > /dev/null 2>&1 &

CUDA_VISIBLE_DEVICES=1 PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) nohup python Task_3/offline_inference.py --model_path sft_ckpt/qwen38b_type1_sft --input_file eval_data/${prefix}_parsed_reassemble_verified_pp.jsonl --output_file Task_3/${prefix}_qwen38b_type1_sft.jsonl --use_raw > /dev/null 2>&1 &

CUDA_VISIBLE_DEVICES=2 PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) nohup python Task_3/offline_inference.py --model_path sft_ckpt/qwen38b_type2_sft --input_file eval_data/${prefix}_parsed_reassemble_verified_pp.jsonl --output_file Task_3/${prefix}_qwen38b_type2_sft.jsonl --use_raw > /dev/null 2>&1 &

CUDA_VISIBLE_DEVICES=3 PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) nohup python Task_3/offline_inference.py --model_path sft_ckpt/qwen38b_type3_sft --input_file eval_data/${prefix}_information_missing_level1_verified_pp.jsonl --output_file Task_3/${prefix}_information_missing_level1_qwen38b_type3_sft.jsonl > /dev/null 2>&1 &

prefix='omni_math'

CUDA_VISIBLE_DEVICES=4 PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) nohup python Task_3/offline_inference.py --model_path sft_ckpt/qwen38b_type1_sft --input_file eval_data/${prefix}_information_missing_level1_verified_pp.jsonl --output_file Task_3/${prefix}_information_missing_level1_qwen38b_type1_sft.jsonl > /dev/null 2>&1 &

CUDA_VISIBLE_DEVICES=5 PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) nohup python Task_3/offline_inference.py --model_path sft_ckpt/qwen38b_type1_sft --input_file eval_data/${prefix}_parsed_reassemble_verified_pp.jsonl --output_file Task_3/${prefix}_qwen38b_type1_sft.jsonl --use_raw > /dev/null 2>&1 &

CUDA_VISIBLE_DEVICES=6 PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) nohup python Task_3/offline_inference.py --model_path sft_ckpt/qwen38b_type2_sft --input_file eval_data/${prefix}_parsed_reassemble_verified_pp.jsonl --output_file Task_3/${prefix}_qwen38b_type2_sft.jsonl --use_raw > /dev/null 2>&1 &

CUDA_VISIBLE_DEVICES=7 PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) nohup python Task_3/offline_inference.py --model_path sft_ckpt/qwen38b_type3_sft --input_file eval_data/${prefix}_information_missing_level1_verified_pp.jsonl --output_file Task_3/${prefix}_information_missing_level1_qwen38b_type3_sft.jsonl > /dev/null 2>&1 &

prefix='math_500'

CUDA_VISIBLE_DEVICES=0  PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) nohup python Task_3/offline_inference.py --model_path sft_ckpt/qwen38b_type4_sft --input_file eval_data/${prefix}_information_missing_level1_verified_pp.jsonl --output_file Task_3/${prefix}_information_missing_level1_qwen38b_type4_sft.jsonl > /dev/null 2>&1 &

CUDA_VISIBLE_DEVICES=1  PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) nohup python Task_3/offline_inference.py --model_path sft_ckpt/qwen38b_type4_sft --input_file eval_data/${prefix}_parsed_reassemble_verified_pp.jsonl --output_file Task_3/${prefix}_qwen38b_type4_sft.jsonl --use_raw > /dev/null 2>&1 &

CUDA_VISIBLE_DEVICES=2  PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) nohup python Task_3/offline_inference.py --model_path sft_ckpt/qwen38b_type5_sft --input_file eval_data/${prefix}_parsed_reassemble_verified_pp.jsonl --output_file Task_3/${prefix}_qwen38b_type5_sft.jsonl --use_raw > /dev/null 2>&1 &

CUDA_VISIBLE_DEVICES=3  PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) nohup python Task_3/offline_inference.py --model_path sft_ckpt/qwen38b_type6_sft --input_file eval_data/${prefix}_information_missing_level1_verified_pp.jsonl --output_file Task_3/${prefix}_information_missing_level1_qwen38b_type6_sft.jsonl > /dev/null 2>&1 &

prefix='omni_math'

CUDA_VISIBLE_DEVICES=4  PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) nohup python Task_3/offline_inference.py --model_path sft_ckpt/qwen38b_type4_sft --input_file eval_data/${prefix}_information_missing_level1_verified_pp.jsonl --output_file Task_3/${prefix}_information_missing_level1_qwen38b_type4_sft.jsonl > /dev/null 2>&1 &

CUDA_VISIBLE_DEVICES=5  PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) nohup python Task_3/offline_inference.py --model_path sft_ckpt/qwen38b_type4_sft --input_file eval_data/${prefix}_parsed_reassemble_verified_pp.jsonl --output_file Task_3/${prefix}_qwen38b_type4_sft.jsonl --use_raw > /dev/null 2>&1 &

CUDA_VISIBLE_DEVICES=6  PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) nohup python Task_3/offline_inference.py --model_path sft_ckpt/qwen38b_type5_sft --input_file eval_data/${prefix}_parsed_reassemble_verified_pp.jsonl --output_file Task_3/${prefix}_qwen38b_type5_sft.jsonl --use_raw > /dev/null 2>&1 &

CUDA_VISIBLE_DEVICES=7  PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) nohup python Task_3/offline_inference.py --model_path sft_ckpt/qwen38b_type6_sft --input_file eval_data/${prefix}_information_missing_level1_verified_pp.jsonl --output_file Task_3/${prefix}_information_missing_level1_qwen38b_type6_sft.jsonl > /dev/null 2>&1 &

prefix='math_500'

CUDA_VISIBLE_DEVICES=0  PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) nohup python Task_3/offline_inference.py --model_path sft_ckpt/qwen38b_type1_sft --input_file eval_data/${prefix}_purpose_missing_verified_pp.jsonl --output_file Task_3/${prefix}_purpose_missing_qwen38b_type1_sft.jsonl > /dev/null 2>&1 &

CUDA_VISIBLE_DEVICES=4  PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) nohup python Task_3/offline_inference.py --model_path sft_ckpt/qwen38b_type3_sft --input_file eval_data/${prefix}_purpose_missing_verified_pp.jsonl --output_file Task_3/${prefix}_purpose_missing_qwen38b_type3_sft.jsonl > /dev/null 2>&1 &

prefix='omni_math'

CUDA_VISIBLE_DEVICES=5  PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) nohup python Task_3/offline_inference.py --model_path sft_ckpt/qwen38b_type1_sft --input_file eval_data/${prefix}_purpose_missing_verified_pp.jsonl --output_file Task_3/${prefix}_purpose_missing_qwen38b_type1_sft.jsonl > /dev/null 2>&1 &

CUDA_VISIBLE_DEVICES=6  PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) nohup python Task_3/offline_inference.py --model_path sft_ckpt/qwen38b_type3_sft --input_file eval_data/${prefix}_purpose_missing_verified_pp.jsonl --output_file Task_3/${prefix}_purpose_missing_qwen38b_type3_sft.jsonl > /dev/null 2>&1 &

prefix='math_500'

CUDA_VISIBLE_DEVICES=0  PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) nohup python Task_3/offline_inference.py --model_path sft_ckpt/qwen38b_type4_sft --input_file eval_data/${prefix}_purpose_missing_verified_pp.jsonl --output_file Task_3/${prefix}_purpose_missing_qwen38b_type4_sft.jsonl > /dev/null 2>&1 &

CUDA_VISIBLE_DEVICES=1  PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) nohup python Task_3/offline_inference.py --model_path sft_ckpt/qwen38b_type6_sft --input_file eval_data/${prefix}_purpose_missing_verified_pp.jsonl --output_file Task_3/${prefix}_purpose_missing_qwen38b_type6_sft.jsonl > /dev/null 2>&1 &

prefix='omni_math'

CUDA_VISIBLE_DEVICES=2  PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) nohup python Task_3/offline_inference.py --model_path sft_ckpt/qwen38b_type4_sft --input_file eval_data/${prefix}_purpose_missing_verified_pp.jsonl --output_file Task_3/${prefix}_purpose_missing_qwen38b_type4_sft.jsonl > /dev/null 2>&1 &

CUDA_VISIBLE_DEVICES=3  PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) nohup python Task_3/offline_inference.py --model_path sft_ckpt/qwen38b_type6_sft --input_file eval_data/${prefix}_purpose_missing_verified_pp.jsonl --output_file Task_3/${prefix}_purpose_missing_qwen38b_type6_sft.jsonl > /dev/null 2>&1 &


prefix='math_500'
CUDA_VISIBLE_DEVICES=7 PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) nohup python Task_3/offline_inference.py --model_path sft_ckpt/qwen38b_type6_sft --input_file eval_data/${prefix}_parsed_reassemble_verified_pp.jsonl --output_file Task_3/${prefix}_qwen38b_type6_sft.jsonl --use_raw > /dev/null 2>&1 &

prefix='omni_math'
CUDA_VISIBLE_DEVICES=6 PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) nohup python Task_3/offline_inference.py --model_path sft_ckpt/qwen38b_type6_sft --input_file eval_data/${prefix}_parsed_reassemble_verified_pp.jsonl --output_file Task_3/${prefix}_qwen38b_type6_sft.jsonl --use_raw > /dev/null 2>&1 &

prefix='omni_math'
CUDA_VISIBLE_DEVICES=5 PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) nohup python Task_3/offline_inference.py --model_path sft_ckpt/qwen38b_type2_sft --input_file eval_data/${prefix}_information_missing_level1_verified_pp.jsonl --output_file Task_3/${prefix}_information_missing_level1_qwen38b_type2_sft.jsonl > /dev/null 2>&1 &
CUDA_VISIBLE_DEVICES=5 PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) nohup python Task_3/offline_inference.py --model_path sft_ckpt/qwen38b_type2_sft --input_file eval_data/${prefix}_purpose_missing_verified_pp.jsonl --output_file Task_3/${prefix}_purpose_missing_qwen38b_type2_sft.jsonl > /dev/null 2>&1 &
CUDA_VISIBLE_DEVICES=5 PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) nohup python Task_3/offline_inference.py --model_path sft_ckpt/qwen38b_type3_sft --input_file eval_data/${prefix}_parsed_reassemble_verified_pp.jsonl --output_file Task_3/${prefix}_qwen38b_type3_sft.jsonl --use_raw > /dev/null 2>&1 &

CUDA_VISIBLE_DEVICES=6 PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) nohup python Task_3/offline_inference.py --model_path sft_ckpt/qwen38b_type5_sft --input_file eval_data/${prefix}_information_missing_level1_verified_pp.jsonl --output_file Task_3/${prefix}_information_missing_level1_qwen38b_type5_sft.jsonl > /dev/null 2>&1 &
CUDA_VISIBLE_DEVICES=7 PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) nohup python Task_3/offline_inference.py --model_path sft_ckpt/qwen38b_type5_sft --input_file eval_data/${prefix}_purpose_missing_verified_pp.jsonl --output_file Task_3/${prefix}_purpose_missing_qwen38b_type5_sft.jsonl > /dev/null 2>&1 &
CUDA_VISIBLE_DEVICES=7 PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) nohup python Task_3/offline_inference.py --model_path sft_ckpt/qwen38b_type3_sft --input_file eval_data/${prefix}_parsed_reassemble_verified_pp.jsonl --output_file Task_3/${prefix}_qwen38b_type3_sft.jsonl --use_raw > /dev/null 2>&1 &

prefix='math_500'
prefix='omni_math'
PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python Task_3/evaluation.py --input_path_cls Task_3/${prefix}_qwen38b_type1_sft.jsonl --input_path_raw Task_3/${prefix}_qwen38b_type1_sft.jsonl
PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python Task_3/evaluation.py --input_path_cls Task_3/${prefix}_information_missing_level1_qwen38b_type1_sft.jsonl --input_path_raw Task_3/${prefix}_qwen38b_type1_sft.jsonl
PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python Task_3/evaluation.py --input_path_cls Task_3/${prefix}_purpose_missing_qwen38b_type1_sft.jsonl --input_path_raw Task_3/${prefix}_qwen38b_type1_sft.jsonl
PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python Task_3/evaluation.py --input_path_cls Task_3/${prefix}_qwen38b_type2_sft.jsonl --input_path_raw Task_3/${prefix}_qwen38b_type1_sft.jsonl
PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python Task_3/evaluation.py --input_path_cls Task_3/${prefix}_information_missing_level1_qwen38b_type2_sft.jsonl --input_path_raw Task_3/${prefix}_qwen38b_type1_sft.jsonl
PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python Task_3/evaluation.py --input_path_cls Task_3/${prefix}_purpose_missing_qwen38b_type2_sft.jsonl --input_path_raw Task_3/${prefix}_qwen38b_type1_sft.jsonl
PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python Task_3/evaluation.py --input_path_cls Task_3/${prefix}_information_missing_level1_qwen38b_type3_sft.jsonl --input_path_raw Task_3/${prefix}_qwen38b_type1_sft.jsonl
PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python Task_3/evaluation.py --input_path_cls Task_3/${prefix}_purpose_missing_qwen38b_type3_sft.jsonl --input_path_raw Task_3/${prefix}_qwen38b_type1_sft.jsonl
PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python Task_3/evaluation.py --input_path_cls Task_3/${prefix}_qwen38b_type3_sft.jsonl --input_path_raw Task_3/${prefix}_qwen38b_type1_sft.jsonl


PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python Task_3/evaluation.py --input_path_cls Task_3/${prefix}_qwen38b_type4_sft.jsonl --input_path_raw Task_3/${prefix}_qwen38b_type4_sft.jsonl
PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python Task_3/evaluation.py --input_path_cls Task_3/${prefix}_information_missing_level1_qwen38b_type4_sft.jsonl --input_path_raw Task_3/${prefix}_qwen38b_type4_sft.jsonl
PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python Task_3/evaluation.py --input_path_cls Task_3/${prefix}_purpose_missing_qwen38b_type4_sft.jsonl --input_path_raw Task_3/${prefix}_qwen38b_type4_sft.jsonl
PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python Task_3/evaluation.py --input_path_cls Task_3/${prefix}_qwen38b_type5_sft.jsonl --input_path_raw Task_3/${prefix}_qwen38b_type4_sft.jsonl
PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python Task_3/evaluation.py --input_path_cls Task_3/${prefix}_information_missing_level1_qwen38b_type5_sft.jsonl --input_path_raw Task_3/${prefix}_qwen38b_type4_sft.jsonl
PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python Task_3/evaluation.py --input_path_cls Task_3/${prefix}_purpose_missing_qwen38b_type5_sft.jsonl --input_path_raw Task_3/${prefix}_qwen38b_type4_sft.jsonl
PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python Task_3/evaluation.py --input_path_cls Task_3/${prefix}_information_missing_level1_qwen38b_type6_sft.jsonl --input_path_raw Task_3/${prefix}_qwen38b_type4_sft.jsonl
PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python Task_3/evaluation.py --input_path_cls Task_3/${prefix}_purpose_missing_qwen38b_type6_sft.jsonl --input_path_raw Task_3/${prefix}_qwen38b_type4_sft.jsonl
PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python Task_3/evaluation.py --input_path_cls Task_3/${prefix}_qwen38b_type6_sft.jsonl --input_path_raw Task_3/${prefix}_qwen38b_type4_sft.jsonl

PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python Task_3/case_studies.py --input_file Task_3/${prefix}_information_missing_level1_qwen38b_type6_sft.jsonl