#!/bin/bash

output_folder="my_games" # your folder containing the output games and animations
avr_folder="AVR_Eval_Agent" # your folder containing the github code
api_key="YOUR-OpenRouter-API" # your openrouter api key
chromium_path=${HOME}/chromium # your path to chromium

CUDA_VISIBLE_DEVICES=0,1 vllm serve Qwen/Qwen3-32B \
  --dtype bfloat16 \
  --api-key token-1 \
  --max-model-len 32768 \
  --gpu_memory_utilization 0.9 \
  --tensor_parallel_size 2 \
  --port 8001 &

CUDA_VISIBLE_DEVICES=2,3 vllm serve Qwen/Qwen2.5-Omni-7B \
  --dtype bfloat16 \
  --api-key token-2 \
  --max-model-len 32768 \
  --gpu_memory_utilization 0.9 \
  --tensor_parallel_size 2 \
  --trust-remote-code \
  --port 8002 &


# Wait for the server to start
echo "Waiting for VLLM server to start..."
sleep 8m # it takes about 3.5min-ish
echo "Starting experiments"

# Multiround relative-eval with coding comparing each content to other contents

# (8)*(8-1)*5=280

contents=("grok3_Omnifeedback_10to12iter_seed1" "grok3_nofeedback_10to12iter_seed1" "grok3_Omnifeedback_noassets_10to12iter_seed1" "grok3_nofeedback_noassets_10to12iter_seed1" "grok3_Omnifeedback_5to7iter_initbestof5_seed1" "grok3_nofeedback_5to7iter_initbestof5_seed1" "grok3_Omnifeedback_noassets_5to7iter_initbestof5_seed1" "grok3_nofeedback_noassets_5to7iter_initbestof5_seed1")
contents2=("grok3_Omnifeedback_10to12iter_seed1" "grok3_nofeedback_10to12iter_seed1" "grok3_Omnifeedback_noassets_10to12iter_seed1" "grok3_nofeedback_noassets_10to12iter_seed1" "grok3_Omnifeedback_5to7iter_initbestof5_seed1" "grok3_nofeedback_5to7iter_initbestof5_seed1" "grok3_Omnifeedback_noassets_5to7iter_initbestof5_seed1" "grok3_nofeedback_noassets_5to7iter_initbestof5_seed1")

# Array to store PIDs of background processes
pids=()
for row_index in $(seq 1 5); do
  for content in "${contents[@]}"; do
    for content2 in "${contents2[@]}"; do
      if [[ $content != $content2 ]]
      then
        current_dir=${output_folder}/grok3_games_${row_index}_omni_eval0_multiround_relative_gen${content}_compare${content2}
        mkdir $current_dir
        cd $current_dir
        nohup python ${avr_folder}/evaluate_content.py \
          --use_vllm_server \
          --model_path Qwen/Qwen3-32B \
          --vllm_server_url "http://localhost:8001" \
          --api_key token-1 \
          --use_separate_evaluator \
          --evaluator_model_path Qwen/Qwen2.5-Omni-7B \
          --evaluator_vllm_server_url "http://localhost:8002" \
          --evaluator_api_key token-2 \
          --content_type video-game \
          --dataset ${avr_folder}/data/video_games_short_eval_grok3.csv \
          --row_index ${row_index} \
          --output_dir . \
          --seed 1 \
          --enable_audio \
          --folders ${output_folder}/games_${row_index}_${content} \
          --folders_paired ${output_folder}/games_${row_index}_${content2} \
          --relative \
          --multiround \
          --coding_evaluation \
          --name_is_output_dir \
          --top_p 0.95 \
          --top_k 20 \
          --repetition_penalty 1.1 \
          --temp_coding 0.0 \
        2>&1 | tee output.log & pids+=($!)
      fi
    done
  done

  echo "Waiting for specific background processes to complete: ${pids[@]}..."
  for pid in "${pids[@]}"; do
      wait "$pid" || echo "Warning: Process $pid did not exit cleanly."
  done
  echo "All waited-for processes have completed."
  pids=()

done

# Array to store PIDs of background processes
pids=()
for row_index in $(seq 1 5); do
  for content in "${contents[@]}"; do
    for content2 in "${contents2[@]}"; do
      if [[ $content != $content2 ]]
      then
        current_dir=${output_folder}/grok3_anim_${row_index}_omni_eval0_multiround_relative_gen${content}_compare${content2}
        mkdir $current_dir
        cd $current_dir
        nohup python ${avr_folder}/evaluate_content.py \
          --use_vllm_server \
          --model_path Qwen/Qwen3-32B \
          --vllm_server_url "http://localhost:8001" \
          --api_key token-1 \
          --use_separate_evaluator \
          --evaluator_model_path Qwen/Qwen2.5-Omni-7B \
          --evaluator_vllm_server_url "http://localhost:8002" \
          --evaluator_api_key token-2 \
          --content_type animation \
          --dataset ${avr_folder}/data/animations_short_eval_grok3.csv \
          --row_index ${row_index} \
          --output_dir . \
          --seed 1 \
          --enable_audio \
          --folders ${output_folder}/anim_${row_index}_${content} \
          --folders_paired ${output_folder}/anim_${row_index}_${content2} \
          --relative \
          --multiround \
          --coding_evaluation \
          --name_is_output_dir \
          --top_p 0.95 \
          --top_k 20 \
          --repetition_penalty 1.1 \
          --temp_coding 0.0 \
        2>&1 | tee output.log & pids+=($!)
      fi
    done
  done

  echo "Waiting for specific background processes to complete: ${pids[@]}..."
  for pid in "${pids[@]}"; do
      wait "$pid" || echo "Warning: Process $pid did not exit cleanly."
  done
  echo "All waited-for processes have completed."
  pids=()

done




###########################

contents=("grok3_Omnifeedback_10to12iter_seed1" "grok3_nofeedback_10to12iter_seed1" "grok3_Omnifeedback_noassets_10to12iter_seed1" "grok3_nofeedback_noassets_10to12iter_seed1")
contents2=("grok3_Omnifeedback_5to7iter_initbestof5_seed1" "grok3_nofeedback_5to7iter_initbestof5_seed1" "grok3_Omnifeedback_noassets_5to7iter_initbestof5_seed1" "grok3_nofeedback_noassets_5to7iter_initbestof5_seed1")

# Array to store PIDs of background processes
pids=()
for row_index in $(seq 1 5); do
  for content in "${contents[@]}"; do
      current_dir=${output_folder}/grok3_anim_${row_index}_omni_eval0_multiround_relative_gen${content}_beforeafter1
      mkdir $current_dir
      cd $current_dir
      nohup python ${avr_folder}/evaluate_content.py \
        --use_vllm_server \
        --model_path Qwen/Qwen3-32B \
        --vllm_server_url "http://localhost:8001" \
        --api_key token-1 \
        --use_separate_evaluator \
        --evaluator_model_path Qwen/Qwen2.5-Omni-7B \
        --evaluator_vllm_server_url "http://localhost:8002" \
        --evaluator_api_key token-2 \
        --content_type animation \
        --dataset ${avr_folder}/data/animations_short_eval_grok3.csv \
        --row_index ${row_index} \
        --output_dir . \
        --seed 1 \
        --n_evaluations 0 \
        --enable_audio \
        --folders ${output_folder}/anim_${row_index}_${content} \
        --file final_content \
        --folders_paired ${output_folder}/anim_${row_index}_${content} \
        --file_paired temp_content_0 \
        --relative \
        --multiround \
        --coding_evaluation \
        --name_is_output_dir \
        --top_p 0.95 \
        --top_k 20 \
        --repetition_penalty 1.1 \
        --temp_coding 0.0 \
          2>&1 | tee output.log & pids+=($!)
      current_dir=${output_folder}/grok3_anim_${row_index}_omni_eval0_multiround_relative_gen${content}_beforeafter2
      mkdir $current_dir
      cd $current_dir
      nohup python ${avr_folder}/evaluate_content.py \
        --use_vllm_server \
        --model_path Qwen/Qwen3-32B \
        --vllm_server_url "http://localhost:8001" \
        --api_key token-1 \
        --use_separate_evaluator \
        --evaluator_model_path Qwen/Qwen2.5-Omni-7B \
        --evaluator_vllm_server_url "http://localhost:8002" \
        --evaluator_api_key token-2 \
        --content_type animation \
        --dataset ${avr_folder}/data/animations_short_eval_grok3.csv \
        --row_index ${row_index} \
        --output_dir . \
        --seed 1 \
        --n_evaluations 0 \
        --enable_audio \
        --folders ${output_folder}/anim_${row_index}_${content} \
        --file temp_content_0 \
        --folders_paired ${output_folder}/anim_${row_index}_${content} \
        --file_paired final_content \
        --relative \
        --multiround \
        --coding_evaluation \
        --name_is_output_dir \
        --top_p 0.95 \
        --top_k 20 \
        --repetition_penalty 1.1 \
        --temp_coding 0.0 \
          2>&1 | tee output.log & pids+=($!)
  done

  echo "Waiting for specific background processes to complete: ${pids[@]}..."
  for pid in "${pids[@]}"; do
      wait "$pid" || echo "Warning: Process $pid did not exit cleanly."
  done
  echo "All waited-for processes have completed."
  pids=()

  for content in "${contents2[@]}"; do
      current_dir=${output_folder}/grok3_anim_${row_index}_omni_eval0_multiround_relative_gen${content}_beforeafter1
      mkdir $current_dir
      cd $current_dir
      nohup python ${avr_folder}/evaluate_content.py \
        --use_vllm_server \
        --model_path Qwen/Qwen3-32B \
        --vllm_server_url "http://localhost:8001" \
        --api_key token-1 \
        --use_separate_evaluator \
        --evaluator_model_path Qwen/Qwen2.5-Omni-7B \
        --evaluator_vllm_server_url "http://localhost:8002" \
        --evaluator_api_key token-2 \
        --content_type animation \
        --dataset ${avr_folder}/data/animations_short_eval_grok3.csv \
        --row_index ${row_index} \
        --output_dir . \
        --seed 1 \
        --n_evaluations 0 \
        --enable_audio \
        --folders ${output_folder}/anim_${row_index}_${content} \
        --file final_content \
        --folders_paired ${output_folder}/anim_${row_index}_${content} \
        --file_paired temp_candidate_0_0 \
        --relative \
        --multiround \
        --coding_evaluation \
        --name_is_output_dir \
        --top_p 0.95 \
        --top_k 20 \
        --repetition_penalty 1.1 \
        --temp_coding 0.0 \
          2>&1 | tee output.log & pids+=($!)
      current_dir=${output_folder}/grok3_anim_${row_index}_omni_eval0_multiround_relative_gen${content}_beforeafter2
      mkdir $current_dir
      cd $current_dir
      nohup python ${avr_folder}/evaluate_content.py \
        --use_vllm_server \
        --model_path Qwen/Qwen3-32B \
        --vllm_server_url "http://localhost:8001" \
        --api_key token-1 \
        --use_separate_evaluator \
        --evaluator_model_path Qwen/Qwen2.5-Omni-7B \
        --evaluator_vllm_server_url "http://localhost:8002" \
        --evaluator_api_key token-2 \
        --content_type animation \
        --dataset ${avr_folder}/data/animations_short_eval_grok3.csv \
        --row_index ${row_index} \
        --output_dir . \
        --seed 1 \
        --n_evaluations 0 \
        --enable_audio \
        --folders ${output_folder}/anim_${row_index}_${content} \
        --file temp_candidate_0_0 \
        --folders_paired ${output_folder}/anim_${row_index}_${content} \
        --file_paired final_content \
        --relative \
        --multiround \
        --coding_evaluation \
        --name_is_output_dir \
        --top_p 0.95 \
        --top_k 20 \
        --repetition_penalty 1.1 \
        --temp_coding 0.0 \
          2>&1 | tee output.log & pids+=($!)
  done

  echo "Waiting for specific background processes to complete: ${pids[@]}..."
  for pid in "${pids[@]}"; do
      wait "$pid" || echo "Warning: Process $pid did not exit cleanly."
  done
  echo "All waited-for processes have completed."
  pids=()

done















########## Games


# Array to store PIDs of background processes
pids=()
for row_index in $(seq 1 5); do
  for content in "${contents[@]}"; do
      current_dir=${output_folder}/grok3_games_${row_index}_omni_eval0_multiround_relative_gen${content}_beforeafter1
      mkdir $current_dir
      cd $current_dir
      nohup python ${avr_folder}/evaluate_content.py \
        --use_vllm_server \
        --model_path Qwen/Qwen3-32B \
        --vllm_server_url "http://localhost:8001" \
        --api_key token-1 \
        --use_separate_evaluator \
        --evaluator_model_path Qwen/Qwen2.5-Omni-7B \
        --evaluator_vllm_server_url "http://localhost:8002" \
        --evaluator_api_key token-2 \
        --content_type video-game \
        --dataset ${avr_folder}/data/video_games_short_eval_grok3.csv \
        --row_index ${row_index} \
        --output_dir . \
        --seed 1 \
        --n_evaluations 0 \
        --enable_audio \
        --folders ${output_folder}/games_${row_index}_${content} \
        --file final_content \
        --folders_paired ${output_folder}/games_${row_index}_${content} \
        --file_paired temp_content_0 \
        --relative \
        --multiround \
        --coding_evaluation \
        --name_is_output_dir \
        --top_p 0.95 \
        --top_k 20 \
        --repetition_penalty 1.1 \
        --temp_coding 0.0 \
          2>&1 | tee output.log & pids+=($!)
      current_dir=${output_folder}/grok3_games_${row_index}_omni_eval0_multiround_relative_gen${content}_beforeafter2
      mkdir $current_dir
      cd $current_dir
      nohup python ${avr_folder}/evaluate_content.py \
        --use_vllm_server \
        --model_path Qwen/Qwen3-32B \
        --vllm_server_url "http://localhost:8001" \
        --api_key token-1 \
        --use_separate_evaluator \
        --evaluator_model_path Qwen/Qwen2.5-Omni-7B \
        --evaluator_vllm_server_url "http://localhost:8002" \
        --evaluator_api_key token-2 \
        --content_type video-game \
        --dataset ${avr_folder}/data/video_games_short_eval_grok3.csv \
        --row_index ${row_index} \
        --output_dir . \
        --seed 1 \
        --n_evaluations 0 \
        --enable_audio \
        --folders ${output_folder}/games_${row_index}_${content} \
        --file temp_content_0 \
        --folders_paired ${output_folder}/games_${row_index}_${content} \
        --file_paired final_content \
        --relative \
        --multiround \
        --coding_evaluation \
        --name_is_output_dir \
        --top_p 0.95 \
        --top_k 20 \
        --repetition_penalty 1.1 \
        --temp_coding 0.0 \
          2>&1 | tee output.log & pids+=($!)
  done

  echo "Waiting for specific background processes to complete: ${pids[@]}..."
  for pid in "${pids[@]}"; do
      wait "$pid" || echo "Warning: Process $pid did not exit cleanly."
  done
  echo "All waited-for processes have completed."
  pids=()

  for content in "${contents2[@]}"; do
      current_dir=${output_folder}/grok3_games_${row_index}_omni_eval0_multiround_relative_gen${content}_beforeafter1
      mkdir $current_dir
      cd $current_dir
      nohup python ${avr_folder}/evaluate_content.py \
        --use_vllm_server \
        --model_path Qwen/Qwen3-32B \
        --vllm_server_url "http://localhost:8001" \
        --api_key token-1 \
        --use_separate_evaluator \
        --evaluator_model_path Qwen/Qwen2.5-Omni-7B \
        --evaluator_vllm_server_url "http://localhost:8002" \
        --evaluator_api_key token-2 \
        --content_type video-game \
        --dataset ${avr_folder}/data/video_games_short_eval_grok3.csv \
        --row_index ${row_index} \
        --output_dir . \
        --seed 1 \
        --n_evaluations 0 \
        --enable_audio \
        --folders ${output_folder}/games_${row_index}_${content} \
        --file final_content \
        --folders_paired ${output_folder}/games_${row_index}_${content} \
        --file_paired temp_candidate_0_0 \
        --relative \
        --multiround \
        --coding_evaluation \
        --name_is_output_dir \
        --top_p 0.95 \
        --top_k 20 \
        --repetition_penalty 1.1 \
        --temp_coding 0.0 \
          2>&1 | tee output.log & pids+=($!)
      current_dir=${output_folder}/grok3_games_${row_index}_omni_eval0_multiround_relative_gen${content}_beforeafter2
      mkdir $current_dir
      cd $current_dir
      nohup python ${avr_folder}/evaluate_content.py \
        --use_vllm_server \
        --model_path Qwen/Qwen3-32B \
        --vllm_server_url "http://localhost:8001" \
        --api_key token-1 \
        --use_separate_evaluator \
        --evaluator_model_path Qwen/Qwen2.5-Omni-7B \
        --evaluator_vllm_server_url "http://localhost:8002" \
        --evaluator_api_key token-2 \
        --content_type video-game \
        --dataset ${avr_folder}/data/video_games_short_eval_grok3.csv \
        --row_index ${row_index} \
        --output_dir . \
        --seed 1 \
        --n_evaluations 0 \
        --enable_audio \
        --folders ${output_folder}/games_${row_index}_${content} \
        --file temp_candidate_0_0 \
        --folders_paired ${output_folder}/games_${row_index}_${content} \
        --file_paired final_content \
        --relative \
        --multiround \
        --coding_evaluation \
        --name_is_output_dir \
        --top_p 0.95 \
        --top_k 20 \
        --repetition_penalty 1.1 \
        --temp_coding 0.0 \
          2>&1 | tee output.log & pids+=($!)
  done

  echo "Waiting for specific background processes to complete: ${pids[@]}..."
  for pid in "${pids[@]}"; do
      wait "$pid" || echo "Warning: Process $pid did not exit cleanly."
  done
  echo "All waited-for processes have completed."
  pids=()

done

