#!/bin/bash

export CUDA_VISIBLE_DEVICES=1,2,3,4
export PYTHONPATH=$(pwd):$PYTHONPATH


username="my_user_name"
projectname="two_agent_1_epoch_2"

for i in $(seq 4 6); do
    model_name_or_path="model/${projectname}_dpo_iter_$((i-1))"
    model_this_turn="model/${projectname}_dpo_iter_$((i))"

    model_name_or_path_rdpo="model/${projectname}_rdpo_iter_$((i-1))"
    model_this_turn_rdpo="model/${projectname}_rdpo_iter_$((i))"

    learning_rate=1e-7

    if [ "$i" -eq 4 ]; then
        model_name_or_path="my_user_name/two_agent_1_dpo_iter_3"
        model_name_or_path_rdpo="my_user_name/two_agent_1_rdpo_iter_3"
    fi

    if [ "$i" -eq 6 ]; then
        learning_rate=5e-8
    fi

    dataset_this_turn="$username/ultrafeedback_binarized_with_response_full_part$((i-4))"
    dataset_this_turn_local="datasets/ultrafeedback_binarized_with_response_full_part$((i-1))"

    dataset_vllm_1="datasets/ultrafeedback_binarized_${projectname}_vllm_1_part_$((i))"

    dataset_vllm_2="datasets/ultrafeedback_binarized_${projectname}_vllm_2_part_$((i))"
    
    dataset_train="datasets/ultrafeedback_binarized_${projectname}_train_part_$((i))"

    hub_model_id="${projectname}_dpo_iter_$i" 
    output_dir="model/$hub_model_id"

    hub_model_id_rdpo="${projectname}_rdpo_iter_$i" 
    output_dir_rdpo="model/$hub_model_id_rdpo"

    python scripts/download_datasets.py --dataset $dataset_this_turn --output $dataset_this_turn_local

    echo "Start training iteration $((i))"

    if [ "$i" -eq 5 ]; then
        ACCELERATE_LOG_LEVEL=info accelerate launch --config_file recipes/accelerate_configs/deepspeed_zero3.yaml --num_processes=4 scripts/run_rdpo.py \
            recipes/ultra_beta/dpo/config_full_iter_$((i-3)).yaml model_name_or_path=$model_name_or_path_rdpo \
            hub_model_id=$hub_model_id_rdpo output_dir=$output_dir_rdpo learning_rate=$learning_rate || exit 1
    else
        # CUDA_VISIBLE_DEVICES=0 python scripts/generate_full_vllm2.py --model $model_name_or_path --dataset $dataset_this_turn_local --output $dataset_vllm_1 --part 0 --total 5 --index 0 --temperature 0.7  || exit 1 &
        CUDA_VISIBLE_DEVICES=1 python scripts/generate_full_vllm2.py --model $model_name_or_path --dataset $dataset_this_turn_local --output $dataset_vllm_1 --part 1 --total 4 --index 0 --temperature 0.7  || exit 1 &
        CUDA_VISIBLE_DEVICES=2 python scripts/generate_full_vllm2.py --model $model_name_or_path --dataset $dataset_this_turn_local --output $dataset_vllm_1 --part 2 --total 4 --index 0 --temperature 0.7  || exit 1 &
        CUDA_VISIBLE_DEVICES=3 python scripts/generate_full_vllm2.py --model $model_name_or_path --dataset $dataset_this_turn_local --output $dataset_vllm_1 --part 3 --total 4 --index 0 --temperature 0.7  || exit 1 &
        CUDA_VISIBLE_DEVICES=4 python scripts/generate_full_vllm2.py --model $model_name_or_path --dataset $dataset_this_turn_local --output $dataset_vllm_1 --part 0 --total 4 --index 0 --temperature 0.7  || exit 1 &
        wait

        python scripts/merge_and_save.py --dataset $dataset_vllm_1 --total 4 || exit 1

        # CUDA_VISIBLE_DEVICES=0 python scripts/generate_full_vllm2.py --model $model_name_or_path_rdpo --dataset $dataset_vllm_1 --output $dataset_vllm_2 --part 0 --total 5 --index 1 --temperature 0.5 || exit 1 &
        CUDA_VISIBLE_DEVICES=1 python scripts/generate_full_vllm2.py --model $model_name_or_path_rdpo --dataset $dataset_vllm_1 --output $dataset_vllm_2 --part 1 --total 4 --index 1 --temperature 0.5 || exit 1 &
        CUDA_VISIBLE_DEVICES=2 python scripts/generate_full_vllm2.py --model $model_name_or_path_rdpo --dataset $dataset_vllm_1 --output $dataset_vllm_2 --part 2 --total 4 --index 1 --temperature 0.5 || exit 1 &
        CUDA_VISIBLE_DEVICES=3 python scripts/generate_full_vllm2.py --model $model_name_or_path_rdpo --dataset $dataset_vllm_1 --output $dataset_vllm_2 --part 3 --total 4 --index 1 --temperature 0.5 || exit 1 &
        CUDA_VISIBLE_DEVICES=4 python scripts/generate_full_vllm2.py --model $model_name_or_path_rdpo --dataset $dataset_vllm_1 --output $dataset_vllm_2 --part 0 --total 4 --index 1 --temperature 0.5 || exit 1 &
        wait
        
        python scripts/merge_and_save.py --dataset $dataset_vllm_2 --total 4 || exit 1

        # CUDA_VISIBLE_DEVICES=0 python scripts/pairrm_annotate_local.py --dataset $dataset_vllm_2 --output $dataset_train --part 0 --total 5 || exit 1 &
        CUDA_VISIBLE_DEVICES=1 python scripts/pairrm_annotate_local.py --dataset $dataset_vllm_2 --output $dataset_train --part 1 --total 4 || exit 1 &
        CUDA_VISIBLE_DEVICES=2 python scripts/pairrm_annotate_local.py --dataset $dataset_vllm_2 --output $dataset_train --part 2 --total 4 || exit 1 &
        CUDA_VISIBLE_DEVICES=3 python scripts/pairrm_annotate_local.py --dataset $dataset_vllm_2 --output $dataset_train --part 3 --total 4 || exit 1 &
        CUDA_VISIBLE_DEVICES=4 python scripts/pairrm_annotate_local.py --dataset $dataset_vllm_2 --output $dataset_train --part 0 --total 4 || exit 1 &
        wait

        python scripts/merge_and_save.py --dataset $dataset_train --total 4 || exit 1

        sed -i "7s|.*|dataset_mixer: {updated: ${dataset_train}, original: HuggingFaceH4/ultrafeedback_binarized}|" recipes/ultra_beta/dpo/config_full_iter_$((i-3)).yaml

        ACCELERATE_LOG_LEVEL=info accelerate launch --config_file recipes/accelerate_configs/deepspeed_zero3.yaml --num_processes=4 scripts/run_rdpo.py \
            recipes/ultra_beta/dpo/config_full_iter_$((i-3)).yaml model_name_or_path=$model_name_or_path_rdpo \
            hub_model_id=$hub_model_id_rdpo output_dir=$output_dir_rdpo learning_rate=$learning_rate || exit 1

        ACCELERATE_LOG_LEVEL=info accelerate launch --config_file recipes/accelerate_configs/deepspeed_zero3.yaml --num_processes=4 scripts/run_dpo.py \
            recipes/ultra_beta/dpo/config_full_iter_$((i-3)).yaml model_name_or_path=$model_name_or_path \
            hub_model_id=$hub_model_id output_dir=$output_dir learning_rate=$learning_rate || exit 1
    fi


done


