DATA_PATH=/usr1/ruohongz/XMTC/data
RES=/usr1/ruohongz/XMTC/results
DATANAME=${1:-EUR-Lex}
vocab_size=${2:-1}
cmd=$2

python preprocess_raw_text.py \
--data_dir $DATA_PATH/$DATANAME --data $DATANAME \
$cmd --top_feature  --feature_name tfidf2 # --vocab_size -1 --overwrite --tfidf --stem --tokenize --create_raw