#!/bin/bash
# Complete pipeline to process the pick-and-place dataset
#
# Extracts REACH and CARRY phases as SEPARATE 64-step trajectories.
# 80 episodes -> 160 trajectories (80 REACH + 80 CARRY), each 64 steps.
# These are the phases learned by the diffusion policy.

set -e  # Exit on error

# Configuration
DATA_ROOT="/scratch4/workspace/placeholder-hdp/dppo/data/stack_blocks"
VARIATION="variation0"
OUTPUT_DIR="${DATA_ROOT}/${VARIATION}/processed"

# Path to RLBench scripts (for normalization)
RLBENCH_SCRIPTS="/home/placeholder/dsrl/dppo/RLBench"
# Path to local scripts
LOCAL_SCRIPTS="/home/placeholder/dsrl/dppo/RLBench_pick_place/make_dataset"

echo "=========================================="
echo "Processing Pick-and-Place Dataset"
echo "=========================================="
echo "Data root: ${DATA_ROOT}"
echo "Variation: ${VARIATION}"
echo "Extracting: REACH and CARRY as SEPARATE 64-step trajectories"
echo "80 episodes -> 160 trajectories (80 REACH + 80 CARRY)"
echo ""

# Create output directory
mkdir -p "${OUTPUT_DIR}"

# Step 1: Convert train data to NPZ (only REACH and CARRY phases)
echo "Step 1/3: Converting train data to NPZ (REACH + CARRY only)..."
python ${LOCAL_SCRIPTS}/convert_to_npz_reach_carry.py \
  --data_path="${DATA_ROOT}/${VARIATION}" \
  --output_path="${OUTPUT_DIR}/train_raw.npz"

echo ""
echo "Step 2/3: Computing normalization statistics using robot bounds..."
python ${RLBENCH_SCRIPTS}/compute_normalization_robot_bounds.py \
  --data_path="${OUTPUT_DIR}/train_raw.npz" \
  --output_path="${OUTPUT_DIR}/normalization.npz"

echo ""
echo "Step 3/3: Normalizing train data..."
python ${RLBENCH_SCRIPTS}/normalize_dataset.py \
  --input_path="${OUTPUT_DIR}/train_raw.npz" \
  --output_path="${OUTPUT_DIR}/train_normalized.npz" \
  --normalization_path="${OUTPUT_DIR}/normalization.npz"

echo ""
echo "=========================================="
echo "Processing Complete!"
echo "=========================================="
echo ""
echo "Output files:"
echo "  Train (raw):        ${OUTPUT_DIR}/train_raw.npz"
echo "  Train (normalized): ${OUTPUT_DIR}/train_normalized.npz"
echo "  Normalization:      ${OUTPUT_DIR}/normalization.npz"
echo ""
echo "Dataset info:"
echo "  - 160 trajectories (80 REACH + 80 CARRY)"
echo "  - Each trajectory has 64 steps"
echo "  - States: 22 dims (joint_pos, joint_vel, gripper, ee_pos, ee_quat)"
echo "  - Actions: 8 dims (joint_pos_target + gripper)"
echo ""
echo "Dataset ready for training!"
