export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7

MODEL_PATH="ckpts/sft_ckpts/critic-sft-merged-new/v0-20250903-102221/checkpoint-1730"
VAL_DIR="other/change/new_v2_2/data_test_mos"
RESULT_DIR="infer_results/GRM3_2/sft"
LOG_DIR="logs_eval/GRM3_2/sft"


datasets=("bvcc_test" "nisqa_test_for" "nisqa_test_livetalk" "nisqa_test_p501" "singmos_1w_test" "somos_test" "tmhint_qi_1w_test" "vmc_track1" "vmc_track2" "vmc_track3")



for dataset in "${datasets[@]}"; do
    echo "Evaluating dataset: $dataset"

    swift infer \
        --model $MODEL_PATH \
        --model_type qwen2_audio \
        --stream true \
        --infer_backend vllm \
        --vllm_tensor_parallel_size 8 \
        --temperature 0.0 \
        --max_new_tokens 512 \
        --vllm_limit_mm_per_prompt '{"audio": 2}' \
        --val_dataset "${VAL_DIR}/${dataset}.jsonl" \
        --result_path "${RESULT_DIR}/${dataset}.jsonl" > "${LOG_DIR}/${dataset}.log" 2>&1
    echo "Evaluation dataset: $dataset completed, log saved to ${LOG_DIR}/${dataset}.log"
done