#!/bin/bash

set -euo pipefail

# Define the dataset path
DATASET_PATH="data/url/malicious_phish.csv"
KAGGLE_DATASET="sid321axn/malicious-urls-dataset"
DOWNLOAD_PATH="data/url/"

# Check if the dataset already exists
if [ -f "$DATASET_PATH" ]; then
    echo "$DATASET_PATH already exists. Skipping download."
    exit 0
fi

# Download the dataset from Kaggle (https://www.kaggle.com/datasets/sid321axn/malicious-urls-dataset)
echo "Downloading dataset from Kaggle..."
kaggle datasets download -d $KAGGLE_DATASET -p $DOWNLOAD_PATH --unzip

echo "Dataset downloaded and extracted to $DOWNLOAD_PATH"
