#!/bin/bash

if [ $# -eq 0 ]; then
    echo "Usage: bash preprocess_dataset.sh DATASET_NAME [TOP_CATEGORIES]"
    exit 1
fi

dataset_name="$1"
top_categories="${2:-10}" 

# Path to check if dataset files exist
dataset_dir="/home/yqiao47/dataset/${dataset_name}"
meta_file="${dataset_dir}/meta_${dataset_name}.jsonl"
review_file="${dataset_dir}/${dataset_name}.jsonl"

# Check essential input files
if [ ! -d "$dataset_dir" ] || [ ! -f "$meta_file" ] || [ ! -f "$review_file" ]; then
    echo "Error: Required dataset files not found"
    exit 1
fi

# Create images directory if needed
mkdir -p "${dataset_dir}/images"

# Run the preprocessing script
echo "Starting preprocessing for $dataset_name with $top_categories top categories"
python preprocess.py "$dataset_name" "$top_categories"

# Check if preprocessing was successful
if [ $? -eq 0 ]; then
    echo "Preprocessing completed successfully for $dataset_name"
else
    echo "Error: Preprocessing failed for $dataset_name"
    exit 1
fi