#!/bin/bash

# Define the output directory
OUTPUT_DIR="graph_datasets"

# Create the directory if it doesn't exist
mkdir -p "$OUTPUT_DIR"

# Array of dataset names
declare -a datasets=("com-Friendster" "wiki-topcats" "com-LiveJournal")

# Array of corresponding URLs
declare -a urls=("https://suitesparse-collection-website.herokuapp.com/mat/SNAP/com-Friendster.mat"
                 "https://suitesparse-collection-website.herokuapp.com/mat/SNAP/wiki-topcats.mat"
                 "https://suitesparse-collection-website.herokuapp.com/mat/SNAP/com-LiveJournal.mat")

# Loop over datasets and URLs to download each one
for i in "${!datasets[@]}"; do
    dataset="${datasets[$i]}"
    url="${urls[$i]}"
    output_file="$OUTPUT_DIR/$dataset.mat"

    # Check if the file already exists
    if [ -f "$output_file" ]; then
        echo "File already exists: $output_file, skipping download."
    else
        # Use wget to download the file
        wget -q --show-progress -O "$output_file" "$url"

        # Check if the file was downloaded successfully
        if [ -f "$output_file" ]; then
            echo "Download completed: $output_file"
        else
            echo "Download failed for $dataset."
        fi
    fi
done

# run the preprocessing python script
python preprocess_graphs.py