#!/bin/bash

iter_num=4
for i in $(seq 1 $iter_num); do
    username="USERNAME"
    alpha=0.001
    name="SELM-Zephyr-7B"
    interval=$((61135/(iter_num)))
    org_dataset="HuggingFaceH4/ultrafeedback_binarized"
    model_name_or_path="$username/${alpha}_${name}_iter_$((i-1))"
    dataset_mixer="{'updated':'$username/${alpha}_${name}_dataset_iter_$((i-1))','original':'$org_dataset'}"
    dataset_splits=("train_prefs[$((interval*(i-1))):$((interval*i))]","test_prefs")
    hub_model_id="${alpha}_${name}_iter_$i"
    output_dir="data/$hub_model_id"
    if [ "$i" -eq 1 ]; then
        learning_rate=5e-7
        model_name_or_path="HuggingFaceH4/mistral-7b-sft-beta"
    elif [ "$i" -eq 2 ]; then
        learning_rate=5e-7
    elif [ "$i" -eq 3 ]; then
        learning_rate=5e-7
    else
        learning_rate=1e-7
    fi
    if [ "$i" -eq 1 ]; then
        echo "Finished training iteration $((i-1))"
        ACCELERATE_LOG_LEVEL=info /home/aiscuser/.local/bin/accelerate launch --config_file recipes/accelerate_configs/deepspeed_zero3.yaml scripts/run_dpo.py recipes/zephyr-7b-beta-selm/dpo/config_full.yaml dataset_splits=$dataset_splits hub_model_id=$hub_model_id output_dir=$output_dir || exit 1
    else
        python scripts/pairrm_annotate_new_1sample_pref.py recipes/zephyr-7b-beta-selm/dpo/optm_config_full.yaml learning_rate=$learning_rate model_name_or_path=$model_name_or_path dataset_mixer=$dataset_mixer dataset_splits=$dataset_splits || exit 1
        ACCELERATE_LOG_LEVEL=info /home/aiscuser/.local/bin/accelerate launch --config_file recipes/accelerate_configs/deepspeed_zero3.yaml scripts/run_optm_dpo.py recipes/zephyr-7b-beta-selm/dpo/optm_config_full.yaml learning_rate=$learning_rate alpha=$alpha model_name_or_path=$model_name_or_path dataset_mixer=$dataset_mixer hub_model_id=$hub_model_id output_dir=$output_dir || exit 1
    fi
done