#!/bin/bash

export CUDA_VISIBLE_DEVICES=0,1,2,3,4
export PYTHONPATH=$(pwd):$PYTHONPATH

iter_num=3

username="my_user_name"
projectname="two_agent_2"

for i in $(seq 1 $iter_num); do
    model_name_or_path="model/${projectname}_dpo_iter_$((i-1))"
    model_this_turn="model/${projectname}_dpo_iter_$((i))"

    model_name_or_path_rdpo="model/${projectname}_rdpo_iter_$((i-1))"
    model_this_turn_rdpo="model/${projectname}_rdpo_iter_$((i))"

    if [ "$i" -eq 2 ]; then
        model_name_or_path="my_user_name/full_vanilla_dpo_iter_1"
        model_name_or_path_rdpo="my_user_name/full_vanilla_dpo_iter_1"
    fi

    dataset_this_turn="$username/ultrafeedback_binarized_with_response_full_part$((i-1))"
    dataset_this_turn_local="datasets/ultrafeedback_binarized_with_response_full_part$((i-1))"

    dataset_vllm_1="datasets/ultrafeedback_binarized_${projectname}_vllm_1_part_$((i))"

    dataset_vllm_2="datasets/ultrafeedback_binarized_${projectname}_vllm_2_part_$((i))"
    
    dataset_train="datasets/ultrafeedback_binarized_${projectname}_train_part_$((i))"

    hub_model_id="${projectname}_dpo_iter_$i" 
    output_dir="model/$hub_model_id"

    hub_model_id_rdpo="${projectname}_rdpo_iter_$i" 
    output_dir_rdpo="model/$hub_model_id_rdpo"

    python scripts/download_datasets.py --dataset $dataset_this_turn --output $dataset_this_turn_local

    if [ "$i" -eq 1 ]; then
        echo "Start training iteration $((i))"
        sed -i "7s|.*|dataset_mixer: {updated: ${dataset_this_turn}, original: HuggingFaceH4/ultrafeedback_binarized}|" recipes/ultra_beta/dpo/config_full_iter_$i.yaml
        ACCELERATE_LOG_LEVEL=info accelerate launch --config_file recipes/accelerate_configs/deepspeed_zero3.yaml --num_processes=5 scripts/run_dpo.py recipes/ultra_beta/dpo/config_full_iter_$i.yaml hub_model_id=$hub_model_id output_dir=$output_dir || exit 1
    else
        echo "Start training iteration $((i))"

        CUDA_VISIBLE_DEVICES=0 python scripts/generate_full_vllm2.py --model $model_name_or_path --dataset $dataset_this_turn_local --output $dataset_vllm_1 --part 0 --total 5 --index 0 --temperature 0.7  || exit 1 &
        CUDA_VISIBLE_DEVICES=1 python scripts/generate_full_vllm2.py --model $model_name_or_path --dataset $dataset_this_turn_local --output $dataset_vllm_1 --part 1 --total 5 --index 0 --temperature 0.7  || exit 1 &
        CUDA_VISIBLE_DEVICES=2 python scripts/generate_full_vllm2.py --model $model_name_or_path --dataset $dataset_this_turn_local --output $dataset_vllm_1 --part 2 --total 5 --index 0 --temperature 0.7  || exit 1 &
        CUDA_VISIBLE_DEVICES=3 python scripts/generate_full_vllm2.py --model $model_name_or_path --dataset $dataset_this_turn_local --output $dataset_vllm_1 --part 3 --total 5 --index 0 --temperature 0.7  || exit 1 &
        CUDA_VISIBLE_DEVICES=4 python scripts/generate_full_vllm2.py --model $model_name_or_path --dataset $dataset_this_turn_local --output $dataset_vllm_1 --part 4 --total 5 --index 0 --temperature 0.7  || exit 1 &
        wait

        python scripts/merge_and_save.py --dataset $dataset_vllm_1 --total 5 || exit 1

        CUDA_VISIBLE_DEVICES=0 python scripts/generate_full_vllm2.py --model $model_name_or_path_rdpo --dataset $dataset_vllm_1 --output $dataset_vllm_2 --part 0 --total 5 --index 1 --temperature 0.7 || exit 1 &
        CUDA_VISIBLE_DEVICES=1 python scripts/generate_full_vllm2.py --model $model_name_or_path_rdpo --dataset $dataset_vllm_1 --output $dataset_vllm_2 --part 1 --total 5 --index 1 --temperature 0.7 || exit 1 &
        CUDA_VISIBLE_DEVICES=2 python scripts/generate_full_vllm2.py --model $model_name_or_path_rdpo --dataset $dataset_vllm_1 --output $dataset_vllm_2 --part 2 --total 5 --index 1 --temperature 0.7 || exit 1 &
        CUDA_VISIBLE_DEVICES=3 python scripts/generate_full_vllm2.py --model $model_name_or_path_rdpo --dataset $dataset_vllm_1 --output $dataset_vllm_2 --part 3 --total 5 --index 1 --temperature 0.7 || exit 1 &
        CUDA_VISIBLE_DEVICES=4 python scripts/generate_full_vllm2.py --model $model_name_or_path_rdpo --dataset $dataset_vllm_1 --output $dataset_vllm_2 --part 4 --total 5 --index 1 --temperature 0.7 || exit 1 &
        wait
        
        python scripts/merge_and_save.py --dataset $dataset_vllm_2 --total 5 || exit 1

        CUDA_VISIBLE_DEVICES=0 python scripts/pairrm_annotate_local.py --dataset $dataset_vllm_2 --output $dataset_train --part 0 --total 5 || exit 1 &
        CUDA_VISIBLE_DEVICES=1 python scripts/pairrm_annotate_local.py --dataset $dataset_vllm_2 --output $dataset_train --part 1 --total 5 || exit 1 &
        CUDA_VISIBLE_DEVICES=2 python scripts/pairrm_annotate_local.py --dataset $dataset_vllm_2 --output $dataset_train --part 2 --total 5 || exit 1 &
        CUDA_VISIBLE_DEVICES=3 python scripts/pairrm_annotate_local.py --dataset $dataset_vllm_2 --output $dataset_train --part 3 --total 5 || exit 1 &
        CUDA_VISIBLE_DEVICES=4 python scripts/pairrm_annotate_local.py --dataset $dataset_vllm_2 --output $dataset_train --part 4 --total 5 || exit 1 &
        wait

        python scripts/merge_and_save.py --dataset $dataset_train --total 5 || exit 1

        sed -i "7s|.*|dataset_mixer: {updated: ${dataset_train}, original: HuggingFaceH4/ultrafeedback_binarized}|" recipes/ultra_beta/dpo/config_full_iter_$i.yaml

        ACCELERATE_LOG_LEVEL=info accelerate launch --config_file recipes/accelerate_configs/deepspeed_zero3.yaml --num_processes=5 scripts/run_dpo.py recipes/ultra_beta/dpo/config_full_iter_$i.yaml model_name_or_path=$model_name_or_path hub_model_id=$hub_model_id output_dir=$output_dir || exit 1

        ACCELERATE_LOG_LEVEL=info accelerate launch --config_file recipes/accelerate_configs/deepspeed_zero3.yaml --num_processes=5 scripts/run_rdpo.py recipes/ultra_beta/dpo/config_full_iter_$i.yaml model_name_or_path=$model_name_or_path_rdpo hub_model_id=$hub_model_id_rdpo output_dir=$output_dir_rdpo || exit 1

    fi
done

# CUDA_VISIBLE_DEVICES=0 python scripts/alpaca.py --name $projectname --model model/${projectname}_iter_3


