#!/usr/bin/env bash
digit_range_1=1
digit_range_2=10
batch_size=16
num_layers=2
num_heads=2
seq_length=4
max_seq_len=$(( 2 * seq_length + 3 ))  # extra <BOS>, ->, <EOS> tokens
split_method=random_permutation
num_epochs=1000
embedding_dims=(16 24 32)
supervisions=(hard_teacher)
curriculum_epoch=0
loss_steps=(0 1 2 h)  # h is the last token
dist_strategy=uniform  # uniform or beam_minabs
num_beams=16
output_dir=grpo_continuous_model_digit_range_${digit_range_1}_${digit_range_2}_Seq_Length_${seq_length}_Layer_${num_layers}_Head_${num_heads}_Dist_Strategy_${dist_strategy}_Num_Beams_${num_beams}/


[ ! -d "logs" ] && mkdir logs
[ ! -d "models" ] && mkdir models
[ ! -d "figures" ] && mkdir figures

if [ ! -d "figures/${output_dir}" ]; then
    mkdir figures/${output_dir}
fi
if [ ! -d "logs/${output_dir}" ]; then
    mkdir logs/${output_dir}
fi
if [ ! -d "models/${output_dir}" ]; then
    mkdir models/${output_dir}
fi



for embedding_dim in "${embedding_dims[@]}"
do
    for supervision in "${supervisions[@]}"
    do
        nohup_out=logs/${output_dir}Embed${embedding_dim}_${split_method}_${supervision}_${batch_size}.out

        CUDA_VISIBLE_DEVICES=1 \
        nohup python -u continuous_generation.py \
        --max_seq_len $max_seq_len \
        --embedding_dim $embedding_dim \
        --digit_range $digit_range_1 $digit_range_2 \
        --batch_size $batch_size \
        --seq_length $seq_length \
        --loss_steps "${loss_steps[@]}" \
        --split_method $split_method \
        --num_epochs $num_epochs \
        --output_dir $output_dir \
        --supervision $supervision \
        --curriculum_epoch $curriculum_epoch \
        --num_layers $num_layers \
        --num_heads $num_heads \
        > "$nohup_out" 2>&1 &

        echo "nohup output: $nohup_out"
    done
done