cd trainer

python -m torch.distributed.launch --nproc_per_node $1 fast_r2d2_multi_label_trainer.py 
    --max_grad_norm 1 
    --max_batch_size 16 
    --max_batch_len 384 
    --vocab_dir ../data/pretrain/parser_atomspan_r2d2_4l_notie_wiki103wash_a100_60 
    --config_path ../data/pretrain/parser_atomspan_r2d2_4l_notie_wiki103wash_a100_60/config.json 
    --data_dir ../data/ATIS 
    --epoch 60 
    --output_dir ../data/save 
    --log_step 10 
    --eval_step 30 
    --pretrain_dir ../data/pretrain/pretrain_sst2_4l_notie 
    --task intent 
    --num_samples 256 
    --sampler sequential 
    --enable_exclusive
    #  --enable_topdown