CUDA_VISIBLE_DEVICES=4 python fairseq/custom/gpt2/run_gpt2_sg.py  \
--data-base ./data-bin/wikitext-103-bpe_v0    \
--output-dir ./checkpoint/train/sg/   \
--eval-split valid    \
--train-n-steps 35000   \
--validate-every 1000    \
--sequence-tune-rate 0.0    \
--mode train \
--train-batch-size 300    \
--gamma 0.3  \
--learning-rate 2e-5 \
--objective sg \