#!/bin/bash

export OMP_NUM_THREADS=1
export CUDA_VISIBLE_DEVICES='4,5,6,7'
export MASTER_ADDR='localhost'
export MASTER_PORT=6013
export NUM_GPUS=4

prefix='LoT_Transformer-XL_wt103'
teacher='wt103_base'
if [ "$teacher" = "wt103_base" ]; then
    config_file=transformer_xl/pytorch/wt103_base.yaml
else
    config_file=transformer-_xl/pytorch/wt103_large.yaml
fi
student='wt103_base'
alpha=0.1
T=2
batch_chunk=2
lr=0.01
dropout=0.05
clip=0.25
warmup_step=5000
start_step=10000
max_step=60000
experiment_name=${teacher}_${student}_wt103_alpha${alpha}_T${T}_lr${lr}_dropout${dropout}_clip${clip}_warmup${warmup_step}_startstep${start_step}_maxstep${max_step}_batch_chunk${batch_chunk}
log_folder_name=logs/${prefix}
if [ ! -d ${log_folder_name} ]; then
    mkdir -p ${log_folder_name}
fi

ckpt_folder_name=ckpt/${prefix}
if [ ! -d ${ckpt_folder_name} ]; then
    mkdir -p ${ckpt_folder_name}
fi
work_dir=${ckpt_folder_name}/${experiment_name}
log_filename=${log_folder_name}/${experiment_name}.log

echo 'Run training...'
nohup python -m torch.distributed.launch --nproc_per_node ${NUM_GPUS} --master_addr ${MASTER_ADDR} --master_port ${MASTER_PORT} transformer_xl/pytorch/train_lot.py \
    --teacher ${teacher} \
    --student "${student}" \
    --alpha ${alpha} \
    --T ${T} \
    --fp16 \
    --lr ${lr} \
    --dropout ${dropout} \
    --clip ${clip} \
    --warmup_step ${warmup_step} \
    --start_step ${start_step} \
    --max_step ${max_step} \
    --config_file ${config_file} \
    --batch_chunk ${batch_chunk} \
    --work_dir ${work_dir} \
    --exp_name ${experiment_name} \
    > ${log_filename} 2>&1 &