#!/bin/bash 
# Generates CovidXRay dataset
# Arg 1 is the root directory
# e.g., /raid/<username>/datasets/cxray_dataset

# Make datalocation, download .zip, unzip
mkdir -p $1
kaggle datasets download andyczhao/covidx-cxr2 -p $1
unzip -d $1/ $1/*.zip

# Download labels from github
wget -P $1/test/ https://raw.githubusercontent.com/giocoal/CXR-ACGAN-chest-xray-generator-covid19-pneumonia/main/Data/COVIDx-splitted-resized-112/test_COVIDx9A.txt
wget -P $1/train/ https://raw.githubusercontent.com/giocoal/CXR-ACGAN-chest-xray-generator-covid19-pneumonia/main/Data/COVIDx-splitted-resized-112/train_COVIDx9A.txt

# Resize all images to (128,128)
python cxray_dataset.py --fxn resize --root_dir $1/train
python cxray_dataset.py --fxn resize --root_dir $1/test

# Make index.csv files
python cxray_dataset.py --fxn write_csv --root_dir $1/train
python cxray_dataset.py --fxn write_csv --root_dir $1/test

# Make train/val splits 
python cxray_dataset.py --fxn valsplit --root_dir $1 --split_seed 1234 --split_frac 0.6749

