#!/bin/bash

# Before running this script, make sure fairseq is installed (see requirements.txt)
# Prepare the dataset for NLP tasks (GLUE)
rm download_glue_data.py | true
wget https://gist.githubusercontent.com/W4ngatang/60c2bdb54d156a41194446737ce03e2e/raw/17b8dd0d724281ed7c3b2aeeda662b92809aadd5/download_glue_data.py
python download_glue_data.py --data_dir glue_data --tasks all

# preprocess RTE data
rm multiprocessing_bpe_encoder.py | true
wget https://raw.githubusercontent.com/pytorch/fairseq/master/examples/roberta/multiprocessing_bpe_encoder.py
bash preprocess_GLUE_tasks.sh ./glue_data all

# download RoBERTa-base pretrained model
wget https://dl.fbaipublicfiles.com/fairseq/models/roberta.base.tar.gz
tar -zxf roberta.base.tar.gz
rm roberta.base.tar.gz

# clean up
