TASK_NAME=SST-2

# CUDA_VISIBLE_DEVICES=0 python trainer/fast_r2d2_glue_trainer.py \
python -m torch.distributed.launch --master_port 7090 --nproc_per_node $1 trainer/fast_r2d2_glue_trainer.py \
    --max_grad_norm 1.0 --lr 5e-5 --parser_lr 1e-2 --enable_epoch_eval \
    --vocab_dir data/parser_atomspan_r2d2_4l_notie_wiki103wash_a100_60 \
    --config_path data/parser_atomspan_r2d2_4l_notie_wiki103wash_a100_60/config.json \
    --task_type sst-2 --glue_dir data/glue/$TASK_NAME --max_batch_len 1000000 \
    --max_batch_size $((64/$1)) --output_dir data/save/sst2_r2d2_root_4023 \
    --epochs 20 --pretrain_dir data/parser_atomspan_r2d2_4l_notie_wiki103wash_a100_60 \
    --log_step 20 --num_samples 256 --sampler random --apex_mode O1 --empty_label_idx -1 \
    --model_name fastr2d2_root --seed 4023
    #--tree_path data/glue/SST-2/trees.txt