#!/bin/bash
#
# This script is for generate ann data for a model in training
#
# For the overall design of the ann driver, check run_train.sh
#
# This script continuously generate ann data using latest model from model_dir
# For training, run this script after initial ann data is created from run_train.sh
# Make sure parameter used here is consistent with the training script

##################################### Inital ANN Data generation ################################
# Passage ANCE(FirstP) 
initial_data_gen_cmd="\
python -m torch.distributed.launch --nproc_per_node=$gpu_no ../drivers/run_ann_data_gen.py --training_dir $model_dir \
--init_model_dir $pretrained_checkpoint_dir --model_type rdot_nll --output_dir $model_ann_data_dir \
--cache_dir "${model_ann_data_dir}cache/" --data_dir $preprocessed_data_dir --max_seq_length 512 \
--per_gpu_eval_batch_size 16 --topk_training 200 --negative_sample 20 \
"

# # Document ANCE(FirstP) 
# initial_data_gen_cmd="\
# python -m torch.distributed.launch --nproc_per_node=$gpu_no ../drivers/run_ann_data_gen.py --training_dir $model_dir \
# --init_model_dir $pretrained_checkpoint_dir --model_type rdot_nll --output_dir $model_ann_data_dir \
# --cache_dir "${model_ann_data_dir}cache/" --data_dir $preprocessed_data_dir --max_seq_length 512 \
# --per_gpu_eval_batch_size 16 --topk_training 200 --negative_sample 20 \
# "

# # Document ANCE(MaxP) 
# initial_data_gen_cmd="\
# python -m torch.distributed.launch --nproc_per_node=$gpu_no ../drivers/run_ann_data_gen.py --training_dir $model_dir \
# --init_model_dir $pretrained_checkpoint_dir --model_type rdot_nll_multi_chunk --output_dir $model_ann_data_dir \
# --cache_dir "${model_ann_data_dir}cache/" --data_dir $preprocessed_data_dir --max_seq_length 2048 \
# --per_gpu_eval_batch_size 16 --topk_training 200 --negative_sample 20 \
# "

echo $initial_data_gen_cmd
eval $initial_data_gen_cmd
