#!/bin/bash

set -euo pipefail

preprocessed_X_train="data/url/preprocessed/X_test.bin"
if [ -f "$preprocessed_X_train" ]; then
    echo "Preprocessed data already exists. Skipping..."
    exit 0
fi

echo "Preprocessing URL dataset..."
python3 experiment/preprocess/url_preprocess.py

./scripts/setup.sh
./experiment/build/csv2bin -k data/url/preprocessed/all_pos_key.csv -x data/url/preprocessed/all_pos_X.csv
./experiment/build/csv2bin -k data/url/preprocessed/X_train_key.csv -x data/url/preprocessed/X_train.csv -y data/url/preprocessed/y_train.csv
./experiment/build/csv2bin -k data/url/preprocessed/X_val_key.csv -x data/url/preprocessed/X_val.csv -y data/url/preprocessed/y_val.csv
./experiment/build/csv2bin -k data/url/preprocessed/X_test_key.csv -x data/url/preprocessed/X_test.csv -y data/url/preprocessed/y_test.csv
