# sd
#DATA=/data/ruohongz/data2
#RES=/data/ruohongz/XMTC/saved

# sa
#DATA=/usr1/ruohongz/XMTC/data
DATA=/usr1/ruohongz/XMTC/dataset
RES=/usr1/ruohongz/XMTC/saved

DATANAME=Wiki10-31K
GPU=${1:-0,1}
model=${2:-xlnet}
model_name=${3:-xlnet-base-cased}
run_id=${4:-tmp}
#cmd=$5
#num_layer=${5:-12}
num_layer=${5:-12}
cmd=$6

# model, model name
# xlnet, xlnet-base-cased
# roberta, roberta-base
# roberta, distilroberta-base
# bert, bert-base-cased
# distilbert, distilbert-base-cased

# example train script: ./scripts/wiki10.bash 4,5 bertattn bert-large-cased attn1_bn512 1 --do_train


# python -m torch.distributed.launch --nproc_per_node=2 --master_port=8001 main.py \
python main.py \
--data_dir $DATA/$DATANAME \
--output_dir $RES/$DATANAME/${model_name}.${run_id} \
--log_dir $RES/log/${DATANAME}.${model_name}.${run_id}.txt \
--model_type $model \
--model_name_or_path $model_name \
--task_name $DATANAME \
--overwrite_output_dir \
--max_seq_length 512 \
--per_gpu_train_batch_size=6 \
--per_gpu_eval_batch_size=36 \
--learning_rate_x 1e-5 \
--learning_rate_h 1e-4 \
--learning_rate_a 1e-3 \
--num_train_epochs 8.0 \
--log_dir $RES/log/${DATANAME}_${run_id}.txt \
--pos_label 30 \
--adaptive_cutoff 15469 \
--div_value 2 \
--logging_steps 100 \
--save_steps 500 \
--gpu $GPU --seed 313 \
--do_eval --eval_checkpoints last \
--save result_top --mode dev \
--max_seq_length 512  \
$cmd \
--label_suffix full \
--num_hidden_layers $num_layer \
--save_epoch
#--attention_probs_dropout_prob 0 --hidden_dropout_prob 0 --bottleneck_size 512
#--positional_encoding absolute --attention_probs_dropout_prob 0 --hidden_dropout_prob 0
#--data_suffix .tfidf2 --keyword_only \

#--attentionXML \






