
dataset=tldr
scripts/run_online_iterative_paper.sh --dataset_name $dataset --model_name qwen2.5_3b --num_gpus 2 \
--ref_model_name outputs/sft/sft-$dataset-qwen2.5_3b


for dataset in uf; do
for model in qwen2.5_3b; do
for beta in 0.3 30; do
scripts/run_online_iterative_paper.sh --dataset_name $dataset --model_name $model --num_gpus 2 \
--ref_model_name outputs/sft/sft-$dataset-$model --beta $beta --tag v3_isbeta_$beta
done
done
done


for dataset in uf; do
for model in qwen2.5_3b; do
for lora_r in 16 32 64 128; do # 16 32 64 128
scripts/run_online_iterative_paper.sh --dataset_name $dataset --model_name $model --num_gpus 2 \
--ref_model_name outputs/sft/sft-$dataset-$model --tag "v3_beta05_sigmoid_1epoch_lora_r${lora_r}" \
--use_lora --lora_r $lora_r
done
done
done