#!/bin/bash

output_folder="my_games" # your folder containing the output games and animations
avr_folder="AVR_Eval_Agent" # your folder containing the github code
api_key="YOUR-OpenRouter-API" # your openrouter api key
chromium_path=${HOME}/chromium # your path to chromium

# Assuming 4 GPUs
CUDA_VISIBLE_DEVICES=0,1 vllm serve Qwen/Qwen3-32B \
  --dtype bfloat16 \
  --api-key token-1 \
  --max-model-len 32768 \
  --gpu_memory_utilization 0.9 \
  --tensor_parallel_size 2 \
  --port 8001 &
CUDA_VISIBLE_DEVICES=2,3 vllm serve Qwen/Qwen2.5-Omni-7B \
  --dtype bfloat16 \
  --api-key token-2 \
  --max-model-len 32768 \
  --gpu_memory_utilization 0.9 \
  --tensor_parallel_size 2 \
  --trust-remote-code \
  --port 8002 &

# Wait for the server to start
echo "Waiting for VLLM server to start..."
sleep 8m # it takes about 3.5min-ish
echo "Starting experiments"

# Assuming you have trained the following two models and they are in those folders
model_1="games_1_kimik2_Omnifeedback_10to12iter_seed1"
model_2="games_1_qwen3coder_Omnifeedback_10to12iter_seed1"

# A vs B
current_dir=${output_folder}/avreval_model1_${model_1}_model2_${model_2}
mkdir $current_dir
cd $current_dir
python ${avr_folder}/evaluate_content.py \
  --use_vllm_server \
  --model_path Qwen/Qwen3-32B \
  --vllm_server_url "http://localhost:8001" \
  --api_key token-1 \
  --use_separate_evaluator \
  --evaluator_model_path Qwen/Qwen2.5-Omni-7B \
  --evaluator_vllm_server_url "http://localhost:8002" \
  --evaluator_api_key token-2 \
  --content_type video-game \
  --dataset ${avr_folder}/data/video_games_short.csv \
  --row_index ${row_index} \
  --output_dir . \
  --seed 1 \
  --enable_audio \
  --folders ${output_folder}/${model_1} \
  --folders_paired ${output_folder}/${model_2} \
  --relative \
  --multiround \
  --coding_evaluation \
  --name_is_output_dir \
  --top_p 0.95 \
  --top_k 20 \
  --repetition_penalty 1.1 \
  --temp_coding 0.0

# B vs A
current_dir=${output_folder}/avreval_model1_${model_2}_model2_${model_1}
mkdir $current_dir
cd $current_dir
python ${avr_folder}/evaluate_content.py \
  --use_vllm_server \
  --model_path Qwen/Qwen3-32B \
  --vllm_server_url "http://localhost:8001" \
  --api_key token-1 \
  --use_separate_evaluator \
  --evaluator_model_path Qwen/Qwen2.5-Omni-7B \
  --evaluator_vllm_server_url "http://localhost:8002" \
  --evaluator_api_key token-2 \
  --content_type video-game \
  --dataset ${avr_folder}/data/video_games_short.csv \
  --row_index ${row_index} \
  --output_dir . \
  --seed 1 \
  --enable_audio \
  --folders ${output_folder}/${model_2} \
  --folders_paired ${output_folder}/${model_1} \
  --relative \
  --multiround \
  --coding_evaluation \
  --name_is_output_dir \
  --top_p 0.95 \
  --top_k 20 \
  --repetition_penalty 1.1 \
  --temp_coding 0.0
