#!/bin/bash
#SBATCH -p gpu20
#SBATCH -t 24:00:00
#SBATCH -o posttrain_procy_qa-%j.out
#SBATCH --gres gpu:2

export HF_DATASETS_CACHE='/sdb/zke4/dataset_cache'
#export TRANSFORMERS_CACHE='./model_cache'
export TRANSFORMERS_OFFLINE=1
max_samples=640000



for idrandom in  0
do
  for task in 0 1 2 3 4 5
  do
    python -m torch.distributed.launch --nproc_per_node 4 --use_env posttrain.py \
    --per_device_train_batch_size 62 \
    --fp16\
    --max_seq_length 164 \
    --max_samples ${max_samples} \
    --idrandom ${idrandom} \
    --ntasks 6 \
    --task ${task} \
    --baseline 'softmask_pipeline_standard_norm_dgi_pre_as_general_first_proxy_all_layer'
  done
done

#proxy distill
#--baseline 'softmask_pipeline_cl'
#args.compute_head_in = 'pre'

#pipline_norm 'default'
#--baseline 'softmask_pipeline_cl_dgi' \
# --baseline 'softmask_pipeline_cl_standard_norm' \
#    --pipline_norm 'standard_norm'


#dgi_cl
#dgi_one
#adapter
#naive