
#prefix='openr1_math'
#
PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python Task_3/data_organization.py --input_file_non_cls Task_1/${prefix}_non_cls_training.jsonl --input_file_cls Task_1/${prefix}_cls_training.jsonl --output_file Task_3/${prefix}_non_cot_training.jsonl
PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python Task_3/data_organization.py --input_file_non_cls Task_1/${prefix}_non_cls_training.jsonl --output_file Task_3/${prefix}_non_cot_non_cls_training.jsonl
PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python Task_3/data_organization.py --input_file_cls Task_1/${prefix}_cls_training.jsonl --output_file Task_3/${prefix}_non_cot_only_cls_training.jsonl

PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python Task_3/data_organization.py --input_file_non_cls Task_1/${prefix}_non_cls_training.jsonl --input_file_cls Task_1/${prefix}_cls_training.jsonl --output_file Task_3/${prefix}_cot_training.jsonl --include_thought
PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python Task_3/data_organization.py --input_file_non_cls Task_1/${prefix}_non_cls_training.jsonl --output_file Task_3/${prefix}_cot_non_cls_training.jsonl --include_thought
PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python Task_3/data_organization.py --input_file_cls Task_1/${prefix}_cls_training.jsonl --output_file Task_3/${prefix}_cot_only_training.jsonl --include_thought

#deepspeed --module openrlhf.cli.train_sft \
#   --max_len 4096 \
#   --dataset Task_3/${prefix}_non_cot_training.jsonl \
#   --input_key prompt \
#   --output_key response \
#   --train_batch_size 64 \
#   --micro_train_batch_size 8 \
#   --apply_chat_template \
#   --max_samples 500000 \
#   --pretrain /data/model_path/models/Qwen3-8B-Base \
#   --save_path Task_3/sft/qwen38b_type1_sft \
#   --save_steps -1 \
#   --logging_steps 1 \
#   --eval_steps -1 \
#   --zero_stage 3 \
#   --max_epochs 1 \
#   --bf16 \
#   --flash_attn \
#   --learning_rate 1e-5 \
#   --load_checkpoint \
#   --packing_samples \
#   --gradient_checkpointing
#
#
#deepspeed --module openrlhf.cli.train_sft \
#   --max_len 4096 \
#   --dataset Task_3/${prefix}_non_cot_non_cls_training.jsonl \
#   --input_key prompt \
#   --output_key response \
#   --train_batch_size 64 \
#   --micro_train_batch_size 8 \
#   --apply_chat_template \
#   --max_samples 500000 \
#   --pretrain /data/model_path/models/Qwen3-8B-Base \
#   --save_path Task_3/sft/qwen38b_type2_sft \
#   --save_steps -1 \
#   --logging_steps 1 \
#   --eval_steps -1 \
#   --zero_stage 3 \
#   --max_epochs 1 \
#   --bf16 \
#   --flash_attn \
#   --learning_rate 1e-5 \
#   --load_checkpoint \
#   --packing_samples \
#   --gradient_checkpointing
#
#deepspeed --module openrlhf.cli.train_sft \
#   --max_len 20480 \
#   --dataset Task_3/${prefix}_cot_non_cls_training.jsonl \
#   --input_key prompt \
#   --output_key response \
#   --train_batch_size 64 \
#   --micro_train_batch_size 1 \
#   --apply_chat_template \
#   --max_samples 500000 \
#   --pretrain /data/model_path/models/Qwen3-8B-Base \
#   --save_path Task_3/sft/qwen38b_type4_sft \
#   --save_steps -1 \
#   --logging_steps 1 \
#   --eval_steps -1 \
#   --zero_stage 3 \
#   --max_epochs 1 \
#   --bf16 \
#   --flash_attn \
#   --learning_rate 1e-5 \
#   --load_checkpoint \
#   --packing_samples \
#   --gradient_checkpointing

prefix='openr1_math'

deepspeed --module openrlhf.cli.train_sft \
   --max_len 20480 \
   --dataset Task_3/${prefix}_cot_training.jsonl \
   --input_key prompt \
   --output_key response \
   --train_batch_size 64 \
   --micro_train_batch_size 1 \
   --apply_chat_template \
   --max_samples 500000 \
   --pretrain /data/model_path/models/Qwen3-8B-Base \
   --save_path Task_3/sft/qwen38b_type3_sft \
   --save_steps -1 \
   --logging_steps 1 \
   --eval_steps -1 \
   --zero_stage 3 \
   --max_epochs 1 \
   --bf16 \
   --flash_attn \
   --learning_rate 1e-5 \
   --load_checkpoint \
   --packing_samples \
   --gradient_checkpointing

prefix='math_500'
for prefix in 'math_500' 'omni_math'; do
  CUDA_VISIBLE_DEVICES=4,5 PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python Task_3/offline_inference.py --model_path Task_3/sft/qwen38b_type3_sft --input_file critic_bench/${prefix}_information_missing_level1_verified_pp.jsonl --output_file Task_3/${prefix}_information_missing_level1_qwen38b_type3_sft.jsonl
  CUDA_VISIBLE_DEVICES=4,5 PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python Task_3/offline_inference.py --model_path Task_3/sft/qwen38b_type3_sft --input_file critic_bench/${prefix}_parsed_reassemble_verified_pp.jsonl --output_file Task_3/${prefix}_qwen38b_type3_sft.jsonl --use_raw
#  PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python Task_3/evaluation.py --input_path_cls Task_3/${prefix}_qwen38b_type1_sft.jsonl --input_path_raw Task_3/${prefix}_qwen38b_type1_sft.jsonl
#  PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python Task_3/evaluation.py --input_path_cls Task_3/${prefix}_information_missing_level1_qwen38b_type1_sft.jsonl --input_path_raw Task_3/${prefix}_qwen38b_type1_sft.jsonl
  CUDA_VISIBLE_DEVICES=4,5 PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python Task_3/offline_inference.py --model_path Task_3/sft/qwen38b_type4_sft --input_file critic_bench/${prefix}_parsed_reassemble_verified_pp.jsonl --output_file Task_3/${prefix}_qwen38b_type4_sft.jsonl --use_raw

#  CUDA_VISIBLE_DEVICES=6 PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python Task_3/offline_inference.py --model_path Task_3/sft/qwen38b_type2_sft --input_file critic_bench/${prefix}_parsed_reassemble_verified_pp.jsonl --output_file Task_3/${prefix}_qwen38b_type2_sft.jsonl --use_raw
#  PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python Task_3/evaluation.py --input_path_cls Task_3/${prefix}_qwen38b_type2_sft.jsonl --input_path_raw Task_3/${prefix}_qwen38b_type2_sft.jsonl
#
#  CUDA_VISIBLE_DEVICES=4,5,6,7 PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python Task_3/offline_inference.py --model_path Task_3/sft/qwen38b_type4_sft --input_file critic_bench/${prefix}_parsed_reassemble_verified_pp.jsonl --output_file Task_3/${prefix}_qwen38b_type4_sft.jsonl --use_raw
#  PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python Task_3/evaluation.py --input_path_cls Task_3/${prefix}_qwen38b_type2_sft.jsonl --input_path_raw Task_3/${prefix}_qwen38b_type2_sft.jsonl
done

PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python Task_3/evaluation.py --input_path_cls Task_3/${prefix}_information_missing_level1_qwen38b_type3_sft.jsonl --input_path_raw Task_3/${prefix}_qwen38b_type3_sft.jsonl
PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd) python Task_3/evaluation.py --input_path_cls Task_3/${prefix}_qwen38b_type3_sft.jsonl --input_path_raw Task_3/${prefix}_qwen38b_type3_sft.jsonl
