
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export VLLM_ALLOW_LONG_MAX_MODEL_LEN=1
export HOME="/data//GRM-Omni-v1/src"

export TEST_UNIFIED="true"
# ray status || ray start --head --disable-usage-stats --num-cpus=24 
ray stop --force && ray start --head --disable-usage-stats --num-cpus=24 

EVAL_MODEL="/data//hf_models/models/UnifiedReward-qwen-7b"

BENCHDIR="/data//GRM-Omni-v1/dataset/testing/benchmark/image/multimodal_bench/processed"
BENCH="multimodal_rewardbench"
OUTDIR="/data//GRM-Omni-v1/925_test/image/Unified_Qwen_multimodal"

echo benchmark:${BENCH} output:${OUTDIR}
echo $TEST_UNIFIED
python src/inference.py \
    --inference_model $EVAL_MODEL \
    --inference_model_modality "vision" \
    --benchmark_dir $BENCHDIR \
    --benchmark $BENCH \
    --method "pairwise_judge" \
    --manner "direct" \
    --criteria_step 1 \
    --batch_size 64 \
    --workers 8 \
    --tensor_parallel 1 \
    --top_p 0.8 \
    --temperature 0.7 \
    --seed 123 \
    --output_dir $OUTDIR
