#!/usr/bin/env bash

PARENT_DATA_DIR="./data"
DATA_DIR="$PARENT_DATA_DIR/other_data"
if [ -d $DATA_DIR ]; then
   echo "The directory '$DATA_DIR' exists."
else
   echo "Creating the directory '$DATA_DIR'."
   mkdir $DATA_DIR
fi

CURRENT_DIR=$PWD
echo "Current working directory '$CURRENT_DIR'"
cd $DATA_DIR

IDS="Dbh9QhbUDNpeUdjb0wxRms,Dbh9QhbZVhsUnRWRDhETzA,Dbh9QhbQ2Vic1kxMmZZQ1k,Dbh9Qhbd2JNdDBsQUdocVU,Dbh9QhbZlU4dXhHTFhZQU0"
Field_Separator=$IFS
IFS=,
for id in $IDS;
do
    gdown https://drive.google.com/uc?id=0Bz8a_$id
done

FILENAMES="ag_news_csv,amazon_review_full_csv,dbpedia_csv,yahoo_answers_csv,yelp_review_full_csv"

for filename in $FILENAMES;
do
    echo "$filename"
    if [ -d $filename ]; then
        echo "The directory '$DATA_DIR/$filename' exists. So not extracting."
    else
        echo "Extracting file '$filename.tar.gz' to '$DATA_DIR' directory."
        tar -xvzf $filename.tar.gz
    fi
    rm $filename.tar.gz
done
IFS=$Field_Separator

cd $CURRENT_DIR

TASKS="agnews,amzn,dbpedia,yahooqa,yelp"
Field_Separator=$IFS
IFS=,
for task in $TASKS;
do
    echo "Processing '$task'"
    python -m lang_exps.data.process_raw_data --task $task --data_dir $PARENT_DATA_DIR --mode "lll"
done
IFS=$Field_Separator

