#!/usr/bin/env bash

echo "This will run all the evaluations to produce most of the tables in the paper. This requires Python2+numpy and Java, tested on python 2.7.12 and OpenJDK 1.8.0_232. This will take approx 10 minutes to run."

########################### Table 1 ###########################
echo ""
echo "Table 1 - Comparison with existing question generation methods on the test set of SQuAD Split 1 and Split 2"
echo ""
echo "Split 1"

echo ""
echo "BertGen(Large) + ASGen"
echo ""
PYTHONPATH='./evaluation_code/' python2 evaluation_code/qg/eval.py --src_file ./predictions/Table_4/squad_split_predictions/split1/squad_split1.pa.txt --tgt_file ./predictions/Table_4/squad_split_predictions/split1/squad_split1.q.txt --out_file ./predictions/Table_4/squad_split_predictions/split1/FullWiki_BertGen_Large_ASGen.txt

echo ""
echo "UniLM + ASGen"
echo ""
PYTHONPATH='./evaluation_code/' python2 evaluation_code/qg/eval.py --src_file ./predictions/Table_4/squad_split_predictions/split1/squad_split1.pa.txt --tgt_file ./predictions/Table_4/squad_split_predictions/split1/squad_split1.q.txt --out_file ./predictions/Table_2/unilm/split1/unilm_asgen.txt

echo ""
echo "Split 2"

echo ""
echo "BertGen(Large) + ASGen"
echo ""
PYTHONPATH='./evaluation_code/' python2 evaluation_code/qg/eval.py --src_file ./predictions/Table_4/squad_split_predictions/split2/squad_split2.pa.txt --tgt_file ./predictions/Table_4/squad_split_predictions/split2/squad_split2.q.txt --out_file ./predictions/Table_4/squad_split_predictions/split2/FullWiki_BertGen_Large_ASGen.txt

echo ""
echo "UniLM + ASGen"
echo ""
PYTHONPATH='./evaluation_code/' python2 evaluation_code/qg/eval.py --src_file ./predictions/Table_4/squad_split_predictions/split2/squad_split2.pa.txt --tgt_file ./predictions/Table_4/squad_split_predictions/split2/squad_split2.q.txt --out_file ./predictions/Table_2/unilm/split2/unilm_asgen.txt


########################### Table 2 ###########################
echo ""
echo "Table 2 - Application of ASGen to other question generation models"

echo ""
echo "Split 1"

echo ""
echo "Zhao et al. + ASGen"
echo ""
PYTHONPATH='./evaluation_code/' python2 evaluation_code/qg/eval_zhao.py --tgt ./predictions/Table_2/zhao/split1/golden.txt --src ./predictions/Table_2/zhao/split1/golden.txt --out ./predictions/Table_2/zhao/split1/zhao_asgen.txt

echo ""
echo "UniLM + ASGen"
echo ""
PYTHONPATH='./evaluation_code/' python2 evaluation_code/qg/eval.py --src_file ./predictions/Table_4/squad_split_predictions/split1/squad_split1.pa.txt --tgt_file ./predictions/Table_4/squad_split_predictions/split1/squad_split1.q.txt --out_file ./predictions/Table_2/unilm/split1/unilm_asgen.txt

echo ""
echo "Split 2"

echo ""
echo "Zhao et al. + ASGen"
echo ""
PYTHONPATH='./evaluation_code/' python2 evaluation_code/qg/eval_zhao.py --tgt ./predictions/Table_2/zhao/split2/golden.txt --src ./predictions/Table_2/zhao/split2/golden.txt --out ./predictions/Table_2/zhao/split2/zhao_asgen.txt

echo ""
echo "UniLM + ASGen"
echo ""
PYTHONPATH='./evaluation_code/' python2 evaluation_code/qg/eval.py --src_file ./predictions/Table_4/squad_split_predictions/split2/squad_split2.pa.txt --tgt_file ./predictions/Table_4/squad_split_predictions/split2/squad_split2.q.txt --out_file ./predictions/Table_2/unilm/split2/unilm_asgen.txt


########################### Table 3 ###########################
echo ""
echo "Table 5 - Comparison with existing question generation methods on the test set of MS MARCO and NewsQA"
echo ""
echo "MS MARCO"
echo ""

echo " Due to license Restrictions of MS Marco, we cannot distribute the contexts and ground truth questions. See the file ./predictions/Table_3/msmarco_predictions/BertGen_Large_ASGen.txt for our model outputs "
# PYTHONPATH='./evaluation_code/' python2 evaluation_code/qg/eval.py --src_file ./predictions/Table_3/msmarco_predictions/msmarco.pa.txt --tgt_file ./predictions/Table_3/msmarco_predictions/msmarco.q.txt --out_file ./predictions/Table_3/msmarco_predictions/BertGen_Large_ASGen.txt

echo ""
echo "NewsQA"
echo ""
echo " Due to license Restrictions of NewsQA, we cannot distribute the contexts and ground truth questions. See the file ./predictions/Table_3/newsqa_predictions/BertGen_Large_ASGen.txt for our model outputs "
# PYTHONPATH='./evaluation_code/' python2 evaluation_code/qg/eval.py --src_file ./predictions/Table_3/newsqa_predictions/newsqa.pa.txt --tgt_file ./predictions/Table_3/newsqa_predictions/newsqa.q.txt --out_file ./predictions/Table_3/newsqa_predictions/BertGen_Large_ASGen.txt

########################### Table 4 ###########################
echo ""
echo "Table 4 - Ablation  of  pre-training  methods, i.e.,pre-training on NS, ASGen, and ASGen without conditioning on a given answer (woans),  on  the test set of SQuAD splits."

echo ""
echo "Test Wiki"

echo ""
echo "Small-Wiki BertGen + ASGen woans"
echo ""
PYTHONPATH='./evaluation_code/' python2 evaluation_code/qg/eval.py --src_file ./predictions/Table_4/test_wiki_predictions/test_wiki.pa.txt --tgt_file ./predictions/Table_4/test_wiki_predictions/test_wiki.q.txt --out_file ./predictions/Table_4/test_wiki_predictions/SmallWiki_BertGen_ASGen_woans.txt

echo ""
echo "Small-Wiki BertGen + ASGen"
echo ""
PYTHONPATH='./evaluation_code/' python2 evaluation_code/qg/eval.py --src_file ./predictions/Table_4/test_wiki_predictions/test_wiki.pa.txt --tgt_file ./predictions/Table_4/test_wiki_predictions/test_wiki.q.txt --out_file ./predictions/Table_4/test_wiki_predictions/SmallWiki_BertGen_ASGen.txt

echo ""
echo "Full-Wiki BertGen + ASGen"
echo ""
PYTHONPATH='./evaluation_code/' python2 evaluation_code/qg/eval.py --src_file ./predictions/Table_4/test_wiki_predictions/test_wiki.pa.txt --tgt_file ./predictions/Table_4/test_wiki_predictions/test_wiki.q.txt --out_file ./predictions/Table_4/test_wiki_predictions/FullWiki_BertGen_ASGen.txt

echo ""
echo "Full-Wiki BertGen (Large) + ASGen"
echo ""
PYTHONPATH='./evaluation_code/' python2 evaluation_code/qg/eval.py --src_file ./predictions/Table_4/test_wiki_predictions/test_wiki.pa.txt --tgt_file ./predictions/Table_4/test_wiki_predictions/test_wiki.q.txt --out_file ./predictions/Table_4/test_wiki_predictions/FullWiki_BertGen_Large_ASGen.txt


echo ""
echo "Split 1"

echo ""
echo "Small-Wiki BertGen"
echo ""
PYTHONPATH='./evaluation_code/' python2 evaluation_code/qg/eval.py --src_file ./predictions/Table_4/squad_split_predictions/split1/squad_split1.pa.txt --tgt_file ./predictions/Table_4/squad_split_predictions/split1/squad_split1.q.txt --out_file ./predictions/Table_4/squad_split_predictions/split1/SmallWiki_BertGen.txt

echo ""
echo "Small-Wiki BertGen + NS"
echo ""
PYTHONPATH='./evaluation_code/' python2 evaluation_code/qg/eval.py --src_file ./predictions/Table_4/squad_split_predictions/split1/squad_split1.pa.txt --tgt_file ./predictions/Table_4/squad_split_predictions/split1/squad_split1.q.txt --out_file ./predictions/Table_4/squad_split_predictions/split1/SmallWiki_BertGen_NS.txt

echo ""
echo "Small-Wiki BertGen + ASGen woans"
echo ""
PYTHONPATH='./evaluation_code/' python2 evaluation_code/qg/eval.py --src_file ./predictions/Table_4/squad_split_predictions/split1/squad_split1.pa.txt --tgt_file ./predictions/Table_4/squad_split_predictions/split1/squad_split1.q.txt --out_file ./predictions/Table_4/squad_split_predictions/split1/SmallWiki_BertGen_ASGen_woans.txt

echo ""
echo "Small-Wiki BertGen + ASGen"
echo ""
PYTHONPATH='./evaluation_code/' python2 evaluation_code/qg/eval.py --src_file ./predictions/Table_4/squad_split_predictions/split1/squad_split1.pa.txt --tgt_file ./predictions/Table_4/squad_split_predictions/split1/squad_split1.q.txt --out_file ./predictions/Table_4/squad_split_predictions/split1/SmallWiki_BertGen_ASGen.txt

echo ""
echo "Full-Wiki BertGen + NS"
echo ""
PYTHONPATH='./evaluation_code/' python2 evaluation_code/qg/eval.py --src_file ./predictions/Table_4/squad_split_predictions/split1/squad_split1.pa.txt --tgt_file ./predictions/Table_4/squad_split_predictions/split1/squad_split1.q.txt --out_file ./predictions/Table_4/squad_split_predictions/split1/FullWiki_BertGen_NS.txt

echo ""
echo "Full-Wiki BertGen + ASGen"
echo ""
PYTHONPATH='./evaluation_code/' python2 evaluation_code/qg/eval.py --src_file ./predictions/Table_4/squad_split_predictions/split1/squad_split1.pa.txt --tgt_file ./predictions/Table_4/squad_split_predictions/split1/squad_split1.q.txt --out_file ./predictions/Table_4/squad_split_predictions/split1/FullWiki_BertGen_ASGen.txt

echo ""
echo "Full-Wiki BertGen (Large) + ASGen"
echo ""
PYTHONPATH='./evaluation_code/' python2 evaluation_code/qg/eval.py --src_file ./predictions/Table_4/squad_split_predictions/split1/squad_split1.pa.txt --tgt_file ./predictions/Table_4/squad_split_predictions/split1/squad_split1.q.txt --out_file ./predictions/Table_4/squad_split_predictions/split1/FullWiki_BertGen_Large_ASGen.txt


echo ""
echo "Split 2"

echo ""
echo "Small-Wiki BertGen"
echo ""
PYTHONPATH='./evaluation_code/' python2 evaluation_code/qg/eval.py --src_file ./predictions/Table_4/squad_split_predictions/split2/squad_split2.pa.txt --tgt_file ./predictions/Table_4/squad_split_predictions/split2/squad_split2.q.txt --out_file ./predictions/Table_4/squad_split_predictions/split2/SmallWiki_BertGen.txt

echo ""
echo "Small-Wiki BertGen + NS"
echo ""
PYTHONPATH='./evaluation_code/' python2 evaluation_code/qg/eval.py --src_file ./predictions/Table_4/squad_split_predictions/split2/squad_split2.pa.txt --tgt_file ./predictions/Table_4/squad_split_predictions/split2/squad_split2.q.txt --out_file ./predictions/Table_4/squad_split_predictions/split2/SmallWiki_BertGen_NS.txt

echo ""
echo "Small-Wiki BertGen + ASGen woans"
echo ""
PYTHONPATH='./evaluation_code/' python2 evaluation_code/qg/eval.py --src_file ./predictions/Table_4/squad_split_predictions/split2/squad_split2.pa.txt --tgt_file ./predictions/Table_4/squad_split_predictions/split2/squad_split2.q.txt --out_file ./predictions/Table_4/squad_split_predictions/split2/SmallWiki_BertGen_ASGen_woans.txt

echo ""
echo "Small-Wiki BertGen + ASGen"
echo ""
PYTHONPATH='./evaluation_code/' python2 evaluation_code/qg/eval.py --src_file ./predictions/Table_4/squad_split_predictions/split2/squad_split2.pa.txt --tgt_file ./predictions/Table_4/squad_split_predictions/split2/squad_split2.q.txt --out_file ./predictions/Table_4/squad_split_predictions/split2/SmallWiki_BertGen_ASGen.txt

echo ""
echo "Full-Wiki BertGen + NS"
echo ""
PYTHONPATH='./evaluation_code/' python2 evaluation_code/qg/eval.py --src_file ./predictions/Table_4/squad_split_predictions/split2/squad_split2.pa.txt --tgt_file ./predictions/Table_4/squad_split_predictions/split2/squad_split2.q.txt --out_file ./predictions/Table_4/squad_split_predictions/split2/FullWiki_BertGen_NS.txt

echo ""
echo "Full-Wiki BertGen + ASGen"
echo ""
PYTHONPATH='./evaluation_code/' python2 evaluation_code/qg/eval.py --src_file ./predictions/Table_4/squad_split_predictions/split2/squad_split2.pa.txt --tgt_file ./predictions/Table_4/squad_split_predictions/split2/squad_split2.q.txt --out_file ./predictions/Table_4/squad_split_predictions/split2/FullWiki_BertGen_ASGen.txt

echo ""
echo "Full-Wiki BertGen (Large) + ASGen"
echo ""
PYTHONPATH='./evaluation_code/' python2 evaluation_code/qg/eval.py --src_file ./predictions/Table_4/squad_split_predictions/split2/squad_split2.pa.txt --tgt_file ./predictions/Table_4/squad_split_predictions/split2/squad_split2.q.txt --out_file ./predictions/Table_4/squad_split_predictions/split2/FullWiki_BertGen_Large_ASGen.txt


########################### Table 6 ###########################

########################### Table 7 ###########################
echo ""
echo "Table 7 - Comparison of downstream MRC task EM/F1 scores after pre-training on synthetic data (Syn). The scores are obtained from development set of SQuAD-v1.1 and SQuAD-v2.0."

echo ""
echo "Dev v1.1"

echo ""
echo "BERT Large + syn data"
echo ""
python2 ./evaluation_code/evaluate-v1.1.py ./datasets/squad/dev-v1.1.json ./predictions/Table_7/squad-v1.1/BertGen_Large_ASGen.json

echo ""
echo "BERT WWM  + syn data"
echo ""
python2 ./evaluation_code/evaluate-v1.1.py ./datasets/squad/dev-v1.1.json ./predictions/Table_7/squad-v1.1/wwm_BertGen_Large_ASGen.json

echo ""
echo "Dev v2.0"

echo ""
echo "BERT Large + syn data"
echo ""
python2 ./evaluation_code/evaluate-v2.0.py ./datasets/squad/dev-v2.0.json ./predictions/Table_7/squad-v2.0/BertGen_Large_ASGen_predictions.json --na-prob-file ./predictions/Table_7/squad-v2.0/BertGen_Large_ASGen_null_odds.json --na-prob-thresh -3.604159

echo ""
echo "BERT WWM"
echo ""
python2 ./evaluation_code/evaluate-v2.0.py ./datasets/squad/dev-v2.0.json ./predictions/Table_7/squad-v2.0/wwm_predictions.json --na-prob-file ./predictions/Table_7/squad-v2.0/wwm_null_odds.json --na-prob-thresh -4.9828

echo ""
echo "BERT WWM + syn data"
echo ""
python2 ./evaluation_code/evaluate-v2.0.py ./datasets/squad/dev-v2.0.json ./predictions/Table_7/squad-v2.0/wwm_BertGen_Large_ASGen_predictions.json --na-prob-file ./predictions/Table_7/squad-v2.0/wwm_BertGen_Large_ASGen_null_odds.json --na-prob-thresh -1.17404

########################### Appendix Table 9 ###########################
echo ""
echo "Appendix Table 9 : Additional experiments on the effectiveness of AS on the test set of SQuAD Split3."

echo ""
echo "Zhao et al. + ASGen"
echo ""
PYTHONPATH='./evaluation_code/' python2 evaluation_code/qg/eval_zhao.py --tgt ./predictions/Appendix_Table_9/split3/golden.txt --src ./predictions/Appendix_Table_9/split3/golden.txt --out ./predictions/Appendix_Table_9/split3/zhao_asgen.txt

########################### Appendix Table 12 ###########################
echo ""
echo "Appendix Table 12 : EM/F1 scores of the BERT fine-tuned on QUASAR-Tdataset. The used synthetic data is generated from ASGen trained on SQuAD-v1.1"

echo ""
echo "Short Dev"

echo ""
echo "BERT"
echo ""
python2 ./evaluation_code/evaluate-v1.1.py ./datasets/quasar/short/dev-v1.1.json ./predictions/Appendix_Table_12/short/BERT_dev.json

echo ""
echo "BERT + SQuAD-v1.1"
echo ""
python2 ./evaluation_code/evaluate-v1.1.py ./datasets/quasar/short/dev-v1.1.json ./predictions/Appendix_Table_12/short/BERT_ASGen_dev.json


echo ""
echo "Short Test"

echo ""
echo "BERT"
echo ""
python2 ./evaluation_code/evaluate-v1.1.py ./datasets/quasar/short/test-v1.1.json ./predictions/Appendix_Table_12/short/BERT_test.json

echo ""
echo "BERT + SQuAD-v1.1"
echo ""
python2 ./evaluation_code/evaluate-v1.1.py ./datasets/quasar/short/test-v1.1.json ./predictions/Appendix_Table_12/short/BERT_ASGen_test.json


echo ""
echo "Long Dev"

echo ""
echo "BERT"
echo ""
python2 ./evaluation_code/evaluate-v1.1.py ./datasets/quasar/long/dev-v1.1.json ./predictions/Appendix_Table_12/long/BERT_dev.json

echo ""
echo "BERT + SQuAD-v1.1"
echo ""
python2 ./evaluation_code/evaluate-v1.1.py ./datasets/quasar/long/dev-v1.1.json ./predictions/Appendix_Table_12/long/BERT_ASGen_dev.json


echo ""
echo "Long Test"

echo ""
echo "BERT"
echo ""
python2 ./evaluation_code/evaluate-v1.1.py ./datasets/quasar/long/test-v1.1.json ./predictions/Appendix_Table_12/long/BERT_test.json

echo ""
echo "BERT + SQuAD-v1.1"
echo ""
python2 ./evaluation_code/evaluate-v1.1.py ./datasets/quasar/long/test-v1.1.json ./predictions/Appendix_Table_12/long/BERT_ASGen_test.json