#!/bin/bash

export CUDA_VISIBLE_DEVICES=0,1,2,3,4
export PYTHONPATH=$(pwd):$PYTHONPATH

iter_num=3

username="my_user_name"
projectname="approx_nash_maxmin"

for i in $(seq 1 $iter_num); do
    model_name_or_path="model/${projectname}_iter_$((i-1))"

    model_this_turn="model/${projectname}_iter_$((i))"

    dataset_this_turn="$username/ultrafeedback_binarized_with_response_full_part$((i-1))"

    dataset_this_turn_local="datasets/ultrafeedback_binarized_with_response_full_part$((i-1))"

    dataset_vllm_1="datasets/ultrafeedback_binarized_${projectname}_vllm_1_part_$((i))"
    dataset_pi="datasets/ultrafeedback_binarized_${projectname}_pi_part_$((i))"
    dataset_minpi="datasets/ultrafeedback_binarized_${projectname}_minpi_part_$((i))"
    dataset_train="datasets/ultrafeedback_binarized_${projectname}_train_part_$((i))"

    hub_model_id="${projectname}_iter_$i" 
    output_dir="model/$hub_model_id"

    python scripts/download_datasets.py --dataset $dataset_this_turn --output $dataset_this_turn_local

    if [ "$i" -eq 1 ]; then
        echo "Start training iteration $((i))"
        sed -i "7s|.*|dataset_mixer: {updated: ${dataset_this_turn}, original: HuggingFaceH4/ultrafeedback_binarized}|" recipes/ultra_beta/dpo/config_full_iter_$i.yaml
        ACCELERATE_LOG_LEVEL=info accelerate launch --config_file recipes/accelerate_configs/deepspeed_zero3.yaml --num_processes=5 scripts/run_dpo.py recipes/ultra_beta/dpo/config_full_iter_$i.yaml hub_model_id=$hub_model_id output_dir=$output_dir || exit 1
    else
        echo "Start training iteration $((i))"

        sed -i "7s|.*|dataset_mixer: {updated: ${dataset_train}, original: HuggingFaceH4/ultrafeedback_binarized}|" recipes/ultra_beta/dpo/config_full_iter_$i.yaml

        CUDA_VISIBLE_DEVICES=0 python scripts/generate_full_vllm.py --model $model_name_or_path --dataset $dataset_this_turn_local --output $dataset_vllm_1 --part 0 --total 5   || exit 1 &
        CUDA_VISIBLE_DEVICES=1 python scripts/generate_full_vllm.py --model $model_name_or_path --dataset $dataset_this_turn_local --output $dataset_vllm_1 --part 1 --total 5   || exit 1 &
        CUDA_VISIBLE_DEVICES=2 python scripts/generate_full_vllm.py --model $model_name_or_path --dataset $dataset_this_turn_local --output $dataset_vllm_1 --part 2 --total 5   || exit 1 &
        CUDA_VISIBLE_DEVICES=3 python scripts/generate_full_vllm.py --model $model_name_or_path --dataset $dataset_this_turn_local --output $dataset_vllm_1 --part 3 --total 5   || exit 1 &
        CUDA_VISIBLE_DEVICES=4 python scripts/generate_full_vllm.py --model $model_name_or_path --dataset $dataset_this_turn_local --output $dataset_vllm_1 --part 4 --total 5   || exit 1 &
        # CUDA_VISIBLE_DEVICES=5 python scripts/generate_full_vllm.py --model $model_name_or_path --dataset $dataset_this_turn_local --output $dataset_vllm_1 --part 5 --total 8   || exit 1 &
        # CUDA_VISIBLE_DEVICES=6 python scripts/generate_full_vllm.py --model $model_name_or_path --dataset $dataset_this_turn_local --output $dataset_vllm_1 --part 6 --total 8   || exit 1 &
        # CUDA_VISIBLE_DEVICES=7 python scripts/generate_full_vllm.py --model $model_name_or_path --dataset $dataset_this_turn_local --output $dataset_vllm_1 --part 7 --total 8   || exit 1 & 
        wait

        python scripts/merge_and_save.py --dataset $dataset_vllm_1 --total 5 || exit 1

        ACCELERATE_LOG_LEVEL=info accelerate launch --config_file recipes/accelerate_configs/multi_gpu.yaml --num_processes=1 scripts/get_pi_ref.py --dataset $dataset_vllm_1 --output $dataset_pi || exit 1

        python scripts/select_min_pi.py --dataset $dataset_pi --output $dataset_minpi --selectmax || exit 1

        CUDA_VISIBLE_DEVICES=0 python scripts/pairrm_annotate_local.py --dataset $dataset_minpi --output $dataset_train --part 0 --total 5 || exit 1 &
        CUDA_VISIBLE_DEVICES=1 python scripts/pairrm_annotate_local.py --dataset $dataset_minpi --output $dataset_train --part 1 --total 5 || exit 1 &
        CUDA_VISIBLE_DEVICES=2 python scripts/pairrm_annotate_local.py --dataset $dataset_minpi --output $dataset_train --part 2 --total 5 || exit 1 &
        CUDA_VISIBLE_DEVICES=3 python scripts/pairrm_annotate_local.py --dataset $dataset_minpi --output $dataset_train --part 3 --total 5 || exit 1 &
        CUDA_VISIBLE_DEVICES=4 python scripts/pairrm_annotate_local.py --dataset $dataset_minpi --output $dataset_train --part 4 --total 5 || exit 1 &
        # CUDA_VISIBLE_DEVICES=5 python scripts/pairrm_annotate_local.py --dataset $dataset_minpi --output $dataset_train --part 5 --total 8 || exit 1 &
        # CUDA_VISIBLE_DEVICES=6 python scripts/pairrm_annotate_local.py --dataset $dataset_minpi --output $dataset_train --part 6 --total 8 || exit 1 &
        # CUDA_VISIBLE_DEVICES=7 python scripts/pairrm_annotate_local.py --dataset $dataset_minpi --output $dataset_train --part 7 --total 8 || exit 1 &
        wait

        python scripts/merge_and_save.py --dataset $dataset_train --total 5 || exit 1

        ACCELERATE_LOG_LEVEL=info accelerate launch --config_file recipes/accelerate_configs/deepspeed_zero3.yaml --num_processes=5 scripts/run_dpo.py recipes/ultra_beta/dpo/config_full_iter_$i.yaml model_name_or_path=$model_name_or_path hub_model_id=$hub_model_id output_dir=$output_dir || exit 1
    fi
done

CUDA_VISIBLE_DEVICES=0 python scripts/alpaca.py --name $projectname --model model/${projectname}_iter_3


