
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export HOME="."

ray stop --force && ray start --head --disable-usage-stats --num-cpus=24
# ray status || ray start --head --disable-usage-stats --num-cpus=24

# PART=$1  # 第一个参数作为 part 编号
# if [ -z "$PART" ]; then
#   echo "用法: $0 <part编号，比如 02>"
#   exit 1
# fi

# INPUT_FILE="dataset/0922_filter_77k_data/filted_data.part-${PART}.jsonl"
INPUT_FILE="/data//GRM-Omni-v1/0923_omni_results/0923_filter_rewardbench_reasoning/filted_data.jsonl"
OUTPUT_DIR="/data//GRM-Omni-v1/0923_omni_results/0923_filter_rewardbench_reasoning/explore"
echo "Exploration with Read from :[${INPUT_FILE}]."
echo "Exploration Results will be stored in :[${OUTPUT_DIR}]."

# INF_MODEL="/data//GRM-Omni-ckpt/0923_grm_omni_dpo_mix2_29k_All_step_final"
# INF_MODEL="/data//GRM-Omni-v1/GRM-Omni-ckpt/0922_GRM_omni_lang_sft_140k_mix_All_step_final"
INF_MODEL="/data//GRM-Omni-ckpt/0923_grm_omni_dpo_mix2_29k_300_step_not_merged/final"
# REFINE_MODEL="models/Qwen2.5-VL-72B-Instruct-AWQ"
# SCORE_MODEL="models/Skywork-VL-Reward-7B"

REFINE_MODEL="models/Qwen3-32B-AWQ"
SCORE_MODEL="models/Skywork-Reward-V2-Llama-3.1-8B-40M"

echo "Use Model: [${INF_MODEL}] To exploration."
sleep 2

# python src/inference.py \
#     --inference_model $INF_MODEL \
#     --inference_model_modality "omni" \
#     --refinement_model $REFINE_MODEL \
#     --refinement_model_modality "language" \
#     --ranking_model $SCORE_MODEL \
#     --ranking_model_modality "language" \
#     --method "exploration" \
#     --manner "direct" \
#     --criteria_step "1" \
#     --batch_size 64 \
#     --workers 8 \
#     --tensor_parallel 1 \
#     --top_p 0.8 \
#     --temperature 0.0 \
#     --seed 132 \
#     --dpo_pool_file "/data//GRM-Omni-v1/0923_omni_results/0923_filter_rewardbench_reasoning/DPO_pool.jsonl" \
#     --input_file $INPUT_FILE \
#     --output_dir $OUTPUT_DIR


############ 3
INPUT_FILE="/data//GRM-Omni-v1/0923_omni_results/0923_filter_rewardbench_safety_sft/filted_data.jsonl"
OUTPUT_DIR="/data//GRM-Omni-v1/0923_omni_results/0923_filter_rewardbench_safety_sft/explore"
INF_MODEL="/data//GRM-Omni-v1/GRM-Omni-ckpt/0922_GRM_omni_lang_sft_140k_mix_All_step_final"

echo "Use Model: [${INF_MODEL}] To exploration."
sleep 2

python src/inference.py \
    --inference_model $INF_MODEL \
    --inference_model_modality "omni" \
    --refinement_model $REFINE_MODEL \
    --refinement_model_modality "language" \
    --ranking_model $SCORE_MODEL \
    --ranking_model_modality "language" \
    --method "exploration" \
    --manner "direct" \
    --criteria_step "1" \
    --batch_size 64 \
    --workers 8 \
    --tensor_parallel 1 \
    --top_p 0.8 \
    --temperature 0.0 \
    --seed 132 \
    --dpo_pool_file "/data//GRM-Omni-v1/0923_omni_results/0923_filter_rewardbench_safety_sft/DPO_pool.jsonl" \
    --input_file $INPUT_FILE \
    --output_dir $OUTPUT_DIR

############ 4
INPUT_FILE="/data//GRM-Omni-v1/0923_omni_results/0923_filter_rewardbench_safety/filted_data.jsonl"
OUTPUT_DIR="/data//GRM-Omni-v1/0923_omni_results/0923_filter_rewardbench_safety/explore"
INF_MODEL="/data//GRM-Omni-ckpt/0923_grm_omni_dpo_mix2_29k_300_step_not_merged/final"


echo "Use Model: [${INF_MODEL}] To exploration."
sleep 2

python src/inference.py \
    --inference_model $INF_MODEL \
    --inference_model_modality "omni" \
    --refinement_model $REFINE_MODEL \
    --refinement_model_modality "language" \
    --ranking_model $SCORE_MODEL \
    --ranking_model_modality "language" \
    --method "exploration" \
    --manner "direct" \
    --criteria_step "1" \
    --batch_size 64 \
    --workers 8 \
    --tensor_parallel 1 \
    --top_p 0.8 \
    --temperature 0.0 \
    --seed 132 \
    --dpo_pool_file "/data//GRM-Omni-v1/0923_omni_results/0923_filter_rewardbench_safety/DPO_pool.jsonl" \
    --input_file $INPUT_FILE \
    --output_dir $OUTPUT_DIR
