#!/bin/bash

# assuming the original groove dataset is stored already..

GROOVE_DATASET_ROOT=$HOME"/workspaces/jointist/datasets/groove/e-gmd-v1.0.0"

META_CSV_PATH="${GROOVE_DATASET_ROOT}/e-gmd-v1.0.0.csv"
AUDIO_HDF5S_DIR=$HOME"/workspaces/jointist/datasets/groove_h5s/waveforms"
python3 jointist/dataset_creation/create_groove.py pack_audios_to_hdf5s \
  --dataset_root=$GROOVE_DATASET_ROOT \
  --meta_csv_path=$META_CSV_PATH \
  --hdf5s_dir=$AUDIO_HDF5S_DIR

# prceess midi with all the configs
StringArray="config_1 config_2 config_3"
for CONFIG_TYPE in $StringArray; do
  PROCESSED_MIDIS_DIR=$HOME"/workspaces/jointist/datasets/groove_processed/${CONFIG_TYPE}"
  MIDI_EVENTS_HDF5S_DIR=$HOME"/workspaces/jointist/datasets/groove_h5s/midi_events/closed_set_${CONFIG_TYPE}"
  python3 jointist/dataset_creation/create_groove.py pack_midi_events_to_hdf5s \
    --processed_midis_dir=$PROCESSED_MIDIS_DIR \
    --meta_csv_path=$META_CSV_PATH \
    --hdf5s_dir=$MIDI_EVENTS_HDF5S_DIR
done

# zip them with 0 compression for faster processing
cd $HOME"/workspaces/jointist/datasets"
zip -0 -r groove_h5s-waveforms.zip groove_h5s/waveforms
zip -0 -r groove_h5s-midi_events-config123.zip groove_h5s/midi_events

# save it to HDFS
hdfs dfs -mkdir -p hdfs://haruna/home/byte_speech_sv/projects/jointist/dataset_processed/groove
hdfs dfs -copyFromLocal groove_h5s-midi_events-config123.zip hdfs://haruna/home/byte_speech_sv/projects/jointist/dataset_processed/groove
hdfs dfs -copyFromLocal groove_h5s-waveforms.zip hdfs://haruna/home/byte_speech_sv/projects/jointist/dataset_processed/groove
