#!/bin/bash -x

proj=$(date +%Y%m%d%H%M)

parallel_option="-j 1 -k"
# parallel_option="--dry-run"

mode=${1:-train}
proj=$proj$mode
# not NEG2 to save up GPU memory

export data=/dccstor/latplan1/public/1-billion-word-language-modeling-benchmark-r13output
export PYTHONUNBUFFERED=1
export SHELL=/bin/bash

1billion (){
    nvidia-smi
    ./train.py \
        --train "$data/training-monolingual.tokenized.shuffled/*-00100"   \
        --valid "$data/validation-monolingual.tokenized.shuffled/*-00100" \
        --test  "$data/heldout-monolingual.tokenized.shuffled/*-00050"    \
        --min-occurrence 10 \
        --subsampling \
        --subsampling-threshold 0.0001 \
        --threads 16 \
        --batch-size 1000 \
        --path may29 \
        $@
}

export -f 1billion

wiki103 (){
    nvidia-smi
    ./train.py \
        --train corpus/wikitext-103/wikitext-103/wiki.train.tokens \
        --min-occurrence 10 \
        --subsampling \
        --subsampling-threshold 0.0001 \
        --threads 16 \
        --batch-size 1000 \
        --path wiki103 \
        $@
}

export -f wiki103

wiki2 (){
    nvidia-smi
    ./train.py \
        --train corpus/wikitext-2/wikitext-2/wiki.train.tokens \
        --min-occurrence 10 \
        --subsampling \
        --subsampling-threshold 0.0001 \
        --threads 16 \
        --batch-size 1000 \
        --path wiki2 \
        $@
}

export -f wiki2

mode="--start-epoch 0 --epochs 1 $mode"

submit="jbsub -queue x86_12h -cores 1+1 -mem 64g -proj $proj -require v100"

btl_common="                           \
--annealing-schedule exponential_steps \
--annealing-max 5.0    \
--annealing-min 0.7    \
--annealing-start 0    \
--annealing-end   8    \
--beta 0.0             \
--initial-state random \
--noise                \
"

parallel $parallel_option $submit 1billion {} $mode \
         ::: --embedding \
         ::: 200 500 1000 \
         ::: --lr \
         ::: 0.001 0.003 0.0001 \
         ::: --model \
         ::: SkipGram_NEG

parallel $parallel_option $submit 1billion $btl_common {} $mode \
         ::: --embedding \
         ::: 200 500 1000 \
         ::: --model \
         ::: SG_BTL_Sequential \
         ::: --lr \
         ::: 0.001 0.003 0.0001 \
         ::: --affine "" \
	 ::: --beta \
	 ::: 0.0 0.1 1.0 \
         ::: --initialization \
         ::: gaussian logistic


