# sd
DATA=/data/ruohongz/data2
RES=/data/ruohongz/XMTC/saved
# sa
DATA=/usr1/ruohongz/XMTC/dataset
RES=/usr1/ruohongz/XMTC/saved

DATANAME=${1:-EUR-Lex}
model=${2:-xlnet}
model_name=${3:-xlnet-base-cased}
max_seq_length=${4:-512}

# xlnet, xlnet-base-cased
# roberta, roberta-base
# roberta, distilroberta-base
# bert, bert-base-cased
# distilbert, distilbert-base-cased

python preprocess_xml.py \
--data_dir $DATA/$DATANAME \
--model_type ${model} \
--model_name ${model_name} \
--max_seq_length $max_seq_length \
--do_label \
#--do_feature \
#--overwrite \
#--keyword --feature_name tfidf2 --top_keyword 50 \
#--keyword_only \
#--do_label \
#--shuffle \







