#!/bin/bash

# Create data directory if it doesn't exist
mkdir -p data

# Download datasets
echo "Downloading MovieLens 20M dataset..."
if [ ! -f "data/ml-20m.zip" ]; then
    wget -P data https://files.grouplens.org/datasets/movielens/ml-20m.zip
fi

echo "Downloading Yelp dataset..."
if [ ! -f "data/Yelp-JSON.zip" ]; then
    echo "Attempting to download Yelp dataset with User Agent..."
    wget -U 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.1 Safari/605.1.15' -O 'data/Yelp-JSON.zip' "https://business.yelp.com/external-assets/files/Yelp-JSON.zip" || { echo "Download failed. Please download Yelp-JSON.zip from https://business.yelp.com/external-assets/files/Yelp-JSON.zip manually and place it in data/."; exit 1; }
fi

# Unzip datasets
echo "Unzipping MovieLens 20M dataset..."
unzip -o data/ml-20m.zip -d data/

echo "Unzipping Yelp dataset..."
if [ -f "data/Yelp-JSON.zip" ]; then
    unzip -o data/Yelp-JSON.zip -d data/ || { echo "Failed to unzip Yelp dataset. Check the file."; exit 1; }
else
    echo "Error: data/Yelp-JSON.zip not found. Please download it manually."
    exit 1
fi

# Unzip the inner tar file (yelp_dataset.tar)
echo "Unzipping Yelp dataset (second level: yelp_dataset.tar)..."
if [ -f "data/Yelp JSON/yelp_dataset.tar" ]; then
    tar -xvf "data/Yelp JSON/yelp_dataset.tar" -C data/ || { echo "Failed to extract yelp_dataset.tar. Check the file."; exit 1; }
else
    echo "Error: data/Yelp JSON/yelp_dataset.tar not found after unzipping Yelp-JSON.zip."
    exit 1
fi

# Create real_data directories in algorithm folders if they don't exist
mkdir -p HybElimUCB-RA/real_data HybUCB-AR/real_data

# Extract MovieLens ratings.csv to algorithm real_data folders with renamed file
echo "Extracting and renaming MovieLens ratings.csv to movielens.csv..."
if [ -f "data/ml-20m/ratings.csv" ]; then
    cp data/ml-20m/ratings.csv HybElimUCB-RA/real_data/movielens.csv || { echo "Failed to copy to HybElimUCB-RA/real_data"; exit 1; }
    cp data/ml-20m/ratings.csv HybUCB-AR/real_data/movielens.csv || { echo "Failed to copy to HybUCB-AR/real_data"; exit 1; }
    echo "Extraction completed successfully."
else
    echo "Error: data/ml-20m/ratings.csv not found. Check unzip process."
    exit 1
fi

# Process Yelp data and extract yelp.csv
echo "Processing Yelp dataset..."
if [ -f "data/yelp_academic_dataset_review.json" ]; then
    python data/dataset_processing/yelp_processing.py --input data/yelp_academic_dataset_review.json --output data/yelp.csv || { echo "Failed to process Yelp data."; exit 1; }
else
    echo "Error: data/yelp_academic_dataset_review.json not found after extracting yelp_dataset.tar."
    exit 1
fi
# Copy yelp.csv to algorithm real_data folders
echo "Copying processed Yelp data..."
if [ -f "data/yelp.csv" ]; then
    cp data/yelp.csv HybElimUCB-RA/real_data/yelp.csv || { echo "Failed to copy to HybElimUCB-RA/real_data"; exit 1; }
    cp data/yelp.csv HybUCB-AR/real_data/yelp.csv || { echo "Failed to copy to HybUCB-AR/real_data"; exit 1; }
    echo "Copying completed successfully."
else
    echo "Error: data/yelp.csv not found. Check Yelp processing."
    exit 1
fi

echo "Data setup completed successfully!"