
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export HOME="/home/export/base/ycsc_1/1/online1//GRM-Omni/src"

# ray stop --force && ray start --head --disable-usage-stats --num-cpus=24

INF_MODEL="/data//hf_models/models/Qwen3-32B-AWQ"
REFINE_MODEL="/data//hf_models/models/Qwen3-32B-AWQ"
SCORE_MODEL="/data//hf_models/models/Skywork-Reward-V2-Llama-3.1-8B-40M"


python src/inference.py \
    --max_input_size '1' \
    --inference_model $INF_MODEL \
    --inference_model_modality "language" \
    --refinement_model $REFINE_MODEL \
    --refinement_model_modality "language" \
    --ranking_model $SCORE_MODEL \
    --ranking_model_modality "language" \
    --method "exploration" \
    --manner "direct" \
    --criteria_step "1" \
    --batch_size 1 \
    --workers 1 \
    --tensor_parallel 1 \
    --top_p 0.8 \
    --temperature 0.7 \
    --seed 132 \
    --dpo_pool_file "/data//GRM-Omni-v1/dataset/dpo_poll.jsonl" \
    --input_file "dataset/wrong_samples.jsonl" \
    --output_dir "_test_dpo_gen"
