prefix='math_500'
prefix='omni_math'

# decomposition
PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python benchmark_construction/raw_datasets/get_${prefix}.py
PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python benchmark_construction/stage_1/math/task_decomposition.py --input_file critic_bench/${prefix}.jsonl
PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python benchmark_construction/stage_1/math/post_process.py --input_file critic_bench/${prefix}_parsed.jsonl

# decomposition check
PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python benchmark_construction/stage_1_verification/math/decomposition_reassemble.py --input_file critic_bench/${prefix}_parsed_pp.jsonl --output_file critic_bench/${prefix}_parsed_reassemble.jsonl
PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python benchmark_construction/stage_1_verification/math/post_process_reassemble.py --input_file critic_bench/${prefix}_parsed_reassemble.jsonl
PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python benchmark_construction/stage_1_verification/math/verification.py --input_file critic_bench/${prefix}_parsed_reassemble_pp.jsonl --output_file critic_bench/${prefix}_parsed_reassemble_verified.jsonl --model_name 'deepseek-r1' --client_name 'aliyun' --num_concurrent 300
PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python benchmark_construction/stage_1_verification/math/post_process_verification.py --input_file critic_bench/${prefix}_parsed_reassemble_verified.jsonl --output_file critic_bench/${prefix}_stage1.jsonl

# information missing
PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python benchmark_construction/stage_2/math/information_missing.py --input_file critic_bench/${prefix}_stage1.jsonl --output_file critic_bench/${prefix}_information_missing_level1.jsonl --task_level 1 --seed 42 --variants 2
PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python benchmark_construction/stage_2/math/post_process_type_1.py --input_file critic_bench/${prefix}_information_missing_level1.jsonl
PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python benchmark_construction/stage_3/verification.py --input_file critic_bench/${prefix}_information_missing_level1_pp.jsonl --output_file critic_bench/${prefix}_information_missing_level1_verified.jsonl --model_name 'deepseek-r1' --client_name 'aliyun' --num_concurrent 300
PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python benchmark_construction/stage_3/post_process.py --input_file critic_bench/${prefix}_information_missing_level1_verified.jsonl

# purpose missing
PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python benchmark_construction/stage_2/math/purpose_missing.py --input_file critic_bench/${prefix}_stage1.jsonl --output_file critic_bench/${prefix}_purpose_missing.jsonl --seed 42
PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python benchmark_construction/stage_2/math/post_process_type_2.py --input_file critic_bench/${prefix}_purpose_missing.jsonl
PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python benchmark_construction/stage_3/verification.py --input_file critic_bench/${prefix}_purpose_missing_pp.jsonl --output_file critic_bench/${prefix}_purpose_missing_verified.jsonl --model_name 'deepseek-r1' --client_name 'aliyun' --num_concurrent 300
PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python benchmark_construction/stage_3/post_process.py --input_file critic_bench/${prefix}_purpose_missing_verified.jsonl

PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python benchmark_construction/dataset_statistics.py --input_file critic_bench/${prefix}_parsed_reassemble_verified_pp.jsonl
PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python benchmark_construction/dataset_statistics.py --input_file critic_bench/${prefix}_information_missing_level1_verified_pp.jsonl
PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python benchmark_construction/dataset_statistics.py --input_file critic_bench/${prefix}_purpose_missing_verified_pp.jsonl

# information missing level2
PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python benchmark_construction/stage_2/math/information_missing.py --input_file critic_bench/${prefix}_stage1.jsonl --output_file critic_bench/${prefix}_information_missing_level2.jsonl --task_level 2 --seed 42 --variants 2
PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python benchmark_construction/stage_2/math/post_process_type_1.py --input_file critic_bench/${prefix}_information_missing_level2.jsonl
PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python benchmark_construction/stage_3/verification.py --input_file critic_bench/${prefix}_information_missing_level2_pp.jsonl --output_file critic_bench/${prefix}_information_missing_level2_verified.jsonl --model_name 'deepseek-r1' --client_name 'aliyun' --num_concurrent 300
PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python benchmark_construction/stage_3/post_process.py --input_file critic_bench/${prefix}_information_missing_level2_verified.jsonl

# information missing level3
PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python benchmark_construction/stage_2/math/information_missing.py --input_file critic_bench/${prefix}_stage1.jsonl --output_file critic_bench/${prefix}_information_missing_level3.jsonl --task_level 3 --seed 42 --variants 2
PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python benchmark_construction/stage_2/math/post_process_type_1.py --input_file critic_bench/${prefix}_information_missing_level3.jsonl
PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python benchmark_construction/stage_3/verification.py --input_file critic_bench/${prefix}_information_missing_level3_pp.jsonl --output_file critic_bench/${prefix}_information_missing_level3_verified.jsonl --model_name 'deepseek-r1' --client_name 'aliyun' --num_concurrent 300
PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python benchmark_construction/stage_3/post_process.py --input_file critic_bench/${prefix}_information_missing_level3_verified.jsonl