export CUBLAS_WORKSPACE_CONFIG=":16:8" # https://docs.nvidia.com/cuda/cublas/index.html#cublasApi_reproducibility
export PYTHONHASHSEED=0
export WANDB_PROJECT=EVA
export WANDB_ENTITY=ml_eva
export CUDA_VISIBLE_DEVICES="0"

# base_dir="/scratch/project/dd-24-12/data/eva_cache"
base_dir="/system/user/publicwork/hauzenbe/eva_cache"
cache_dir=$base_dir
data_cache_dir="${base_dir}/data"
dataset_name="e2e_nlg"
num_gpus=1
batch_size=8
epochs=5
experiment_name=lora_lr_grid_randinit
model_name_or_path=gpt2
data_ref="$([ -v task_name ] && echo $task_name || echo $dataset_name)"

for seed in 0 10 101
do
  dir=""
  output_dir=$base_dir/$experiment_name/$data_ref/$dir/$seed/model
  logging_dir=$base_dir/$experiment_name/$data_ref/$dir/$seed/log

  if [ ! -d $logging_dir ]
  then
    mkdir -p $logging_dir
    export WANDB_DIR=$logging_dir

    python -m torch.distributed.launch --nproc_per_node=$num_gpus --master_port 50012 \
    examples/language-modeling/run_clm.py \
    --model_name_or_path $model_name_or_path \
    --cache_dir $cache_dir \
    --data_cache_dir $data_cache_dir \
    --dataset_name $dataset_name \
    --do_train \
    --do_eval \
    --per_device_train_batch_size $batch_size \
    --learning_rate 2e-4 \
    --num_train_epochs $epochs \
    --output_dir $output_dir \
    --overwrite_output_dir \
    --logging_steps 10 \
    --logging_dir $logging_dir \
    --evaluation_strategy epoch \
    --save_strategy no \
    --weight_decay 0.01 \
    --warmup_steps 500 \
    --label_smoothing_factor 0.1 \
    --apply_lora \
    --lora_r 4 \
    --lora_alpha 32 \
    --seed $seed \
    --report_to=wandb \
    --experiment_name=$experiment_name \
    #--lora_path $base_dir/${model_name_or_path}_${data_ref}_r_16_pca.bin 
  fi
done

