#!/usr/bin/env bash
# Full pipeline: train (30000 steps) -> sample -> evaluate for each preproc.
# Preprocs: m, r, LGB_D, LGB_S, miceforest, zero, noise
# Usage: ./run_pipeline.sh <gpu_id>
# Run from AugMask_share root.
#
# Data: uses data/adult_semi_xy/<PATTERN>_p<RATIO>_<SEED>/ (e.g. NU1_p0.5_2).
# Ensure that folder exists; see data/README.md and create_minimal_preprocessed.py.

set -e
PROJECT="${PROJECT:-augmask}"
DATASET="adult"
COV="both"
# Match existing data folder: data/adult_semi_xy/NU1_p0.5_2/
PATTERN="NU1"
RATIO="0.5"
SEED="2"
STRATEGY="2"
EXTRA="aug_mask"
CDTD_CFG="configs/cdtd/default_bytype.yaml"

if [ -f ~/anaconda3/etc/profile.d/conda.sh ]; then
  source ~/anaconda3/etc/profile.d/conda.sh
elif [ -f ~/miniconda3/etc/profile.d/conda.sh ]; then
  source ~/miniconda3/etc/profile.d/conda.sh
elif command -v conda &>/dev/null; then
  eval "$(conda shell.bash hook)"
else
  echo "Error: conda not found."
  exit 1
fi

GPU="${1:?Usage: ./run_pipeline.sh <gpu_id>}"
export WANDB_MODE=disabled

echo "=============================================="
echo "Pipeline: all preprocs (30000 steps each)"
echo "Data: $DATASET, pattern=$PATTERN, p=$RATIO, seed=$SEED -> data/${DATASET}_semi_xy/${PATTERN}_p${RATIO}_${SEED}/"
echo "Preprocs: m r LGB_D LGB_S miceforest zero noise"
echo "=============================================="

conda activate augmask
cd augmask

for preproc in m r LGB_D LGB_S miceforest zero noise; do
  echo ""
  echo "========== Preproc: $preproc =========="
  conda activate augmask
  python main.py --model cdtd --data "$DATASET" --preproc "$preproc" --cov "$COV" --p "$RATIO" \
    --gpu "$GPU" --extra "$EXTRA" --strategy "$STRATEGY" --noise_seed "$SEED" --pattern "$PATTERN" \
    --cfg_path "$CDTD_CFG" --mode train
  cd ../evaluation
  conda activate synthcity
  python evaluator.py --method cdtd --extra "$EXTRA" --dataname "$DATASET" --cov "$COV" --p "$RATIO" \
    --gpu "$GPU" --preproc "$preproc" --noise_seed "$SEED" --pattern "$PATTERN" \
    --project_name "$PROJECT" --no_wandb
  cd ../augmask
  echo "=== Completed $preproc ==="
done

cd ..
echo "=============================================="
echo "Pipeline completed."
echo "=============================================="
