#!/bin/bash
# Complete pipeline to process the close_drawer dataset

set -e  # Exit on error

# Configuration
DATA_ROOT="/scratch4/workspace/placeholder-hdp/dppo/data/close_drawer"
VARIATION="variation2"
OUTPUT_DIR="${DATA_ROOT}/${VARIATION}/processed"

# Path to RLBench scripts (reuse existing scripts)
RLBENCH_SCRIPTS="/home/placeholder/dsrl/dppo/RLBench"

echo "=========================================="
echo "Processing Close Drawer Dataset"
echo "=========================================="
echo "Data root: ${DATA_ROOT}"
echo "Variation: ${VARIATION}"
echo ""

# Create output directory
mkdir -p "${OUTPUT_DIR}"

# Step 1: Convert train data to NPZ
# The close_drawer dataset has variation2/train/episodes/ structure
# We need to create a temporary symlink to make it compatible with convert_to_npz.py
echo "Step 1/3: Converting train data to NPZ..."
TEMP_TRAIN_DIR="${OUTPUT_DIR}/temp_train"
mkdir -p "${TEMP_TRAIN_DIR}/variation0"
ln -sfn "${DATA_ROOT}/${VARIATION}/train/episodes" "${TEMP_TRAIN_DIR}/variation0/episodes"

python ${RLBENCH_SCRIPTS}/convert_to_npz.py \
  --data_path="${TEMP_TRAIN_DIR}" \
  --output_path="${OUTPUT_DIR}/train_raw.npz"

rm -rf "${TEMP_TRAIN_DIR}"

echo ""
echo "Step 2/3: Computing normalization statistics using robot bounds..."
python ${RLBENCH_SCRIPTS}/compute_normalization_robot_bounds.py \
  --data_path="${OUTPUT_DIR}/train_raw.npz" \
  --output_path="${OUTPUT_DIR}/normalization.npz"

echo ""
echo "Step 3/3: Normalizing train data..."
python ${RLBENCH_SCRIPTS}/normalize_dataset.py \
  --input_path="${OUTPUT_DIR}/train_raw.npz" \
  --output_path="${OUTPUT_DIR}/train_normalized.npz" \
  --normalization_path="${OUTPUT_DIR}/normalization.npz"

echo ""
echo "=========================================="
echo "Processing Complete!"
echo "=========================================="
echo ""
echo "Output files:"
echo "  Train (raw):        ${OUTPUT_DIR}/train_raw.npz"
echo "  Train (normalized): ${OUTPUT_DIR}/train_normalized.npz"
echo "  Normalization:      ${OUTPUT_DIR}/normalization.npz"
echo ""
echo "Dataset ready for training!"
