#!/bin/bash
# A simple and clean runner script for the new training framework.
#
# Usage:
#   ./run.sh --dataset-config <cfg1> [<cfg2> ...] [--methods <method1> <method2> ...]
#
# ==============================================================================
# Configuration directory description:
# 1. config/datasets_typical/   - Typical configs: fixed random strategy, c=0.1 (fast/comparison experiments)
# 2. config/datasets_vary_c/    - Vary C analysis: fixed random strategy, c values from 0.01 to 0.9
# 3. config/datasets_vary_e/    - Vary E analysis: fixed c∈{0.05, 0.5}, varying selection strategies (sar_pusb/sar_lbeA/sar_lbeB)
# ==============================================================================
#
# === Typical Config Experiment Examples (Typical Configs) ===
# Run all methods on a single dataset:
#   ./run.sh --dataset-config config/datasets_typical/param_sweep_20news_sbert.yaml
#   ./run.sh --dataset-config config/datasets_typical/param_sweep_imdb_sbert.yaml
#   ./run.sh --dataset-config config/datasets_typical/param_sweep_mnist.yaml
#   ./run.sh --dataset-config config/datasets_typical/param_sweep_fashionmnist.yaml
#   ./run.sh --dataset-config config/datasets_typical/param_sweep_cifar10.yaml
#   ./run.sh --dataset-config config/datasets_typical/param_sweep_connect4.yaml
#   ./run.sh --dataset-config config/datasets_typical/param_sweep_spambase.yaml
#   ./run.sh --dataset-config config/datasets_typical/param_sweep_alzheimermri.yaml
#   ./run.sh --dataset-config config/datasets_typical/param_sweep_mushrooms.yaml
#
# Batch run multiple datasets (typical configs):
#   ./run.sh --dataset-config config/datasets_typical/param_sweep_20news_sbert.yaml config/datasets_typical/param_sweep_imdb_sbert.yaml config/datasets_typical/param_sweep_mnist.yaml config/datasets_typical/param_sweep_fashionmnist.yaml config/datasets_typical/param_sweep_cifar10.yaml config/datasets_typical/param_sweep_alzheimermri.yaml config/datasets_typical/param_sweep_connect4.yaml config/datasets_typical/param_sweep_spambase.yaml config/datasets_typical/param_sweep_mushrooms.yaml
#
# === Vary C Analysis Experiment Examples (Label Ratio Analysis) ===
# Run all methods on a single dataset:
#   ./run.sh --dataset-config config/datasets_vary_c/param_sweep_20news_sbert.yaml
#   ./run.sh --dataset-config config/datasets_vary_c/param_sweep_imdb_sbert.yaml
#   ./run.sh --dataset-config config/datasets_vary_c/param_sweep_mnist.yaml
#   ./run.sh --dataset-config config/datasets_vary_c/param_sweep_fashionmnist.yaml
#   ./run.sh --dataset-config config/datasets_vary_c/param_sweep_cifar10.yaml
#   ./run.sh --dataset-config config/datasets_vary_c/param_sweep_connect4.yaml
#   ./run.sh --dataset-config config/datasets_vary_c/param_sweep_spambase.yaml
#   ./run.sh --dataset-config config/datasets_vary_c/param_sweep_alzheimermri.yaml
#   ./run.sh --dataset-config config/datasets_vary_c/param_sweep_mushrooms.yaml
#
# Batch run multiple datasets (vary C analysis):
#   ./run.sh --dataset-config config/datasets_vary_c/param_sweep_20news_sbert.yaml config/datasets_vary_c/param_sweep_imdb_sbert.yaml config/datasets_vary_c/param_sweep_mnist.yaml config/datasets_vary_c/param_sweep_fashionmnist.yaml config/datasets_vary_c/param_sweep_cifar10.yaml config/datasets_vary_c/param_sweep_alzheimermri.yaml config/datasets_vary_c/param_sweep_connect4.yaml config/datasets_vary_c/param_sweep_spambase.yaml config/datasets_vary_c/param_sweep_mushrooms.yaml
#
# === Vary E Analysis Experiment Examples (Selection Strategy Analysis) ===
# Run all methods on a single dataset:
#   ./run.sh --dataset-config config/datasets_vary_e/param_sweep_20news_sbert.yaml
#   ./run.sh --dataset-config config/datasets_vary_e/param_sweep_imdb_sbert.yaml
#   ./run.sh --dataset-config config/datasets_vary_e/param_sweep_mnist.yaml
#   ./run.sh --dataset-config config/datasets_vary_e/param_sweep_fashionmnist.yaml
#   ./run.sh --dataset-config config/datasets_vary_e/param_sweep_cifar10.yaml
#   ./run.sh --dataset-config config/datasets_vary_e/param_sweep_connect4.yaml
#   ./run.sh --dataset-config config/datasets_vary_e/param_sweep_spambase.yaml
#   ./run.sh --dataset-config config/datasets_vary_e/param_sweep_alzheimermri.yaml
#   ./run.sh --dataset-config config/datasets_vary_e/param_sweep_mushrooms.yaml
#
# Batch run multiple datasets (vary E analysis):
#   ./run.sh --dataset-config config/datasets_vary_e/param_sweep_20news_sbert.yaml config/datasets_vary_e/param_sweep_imdb_sbert.yaml config/datasets_vary_e/param_sweep_mnist.yaml config/datasets_vary_e/param_sweep_fashionmnist.yaml config/datasets_vary_e/param_sweep_cifar10.yaml config/datasets_vary_e/param_sweep_alzheimermri.yaml config/datasets_vary_e/param_sweep_connect4.yaml config/datasets_vary_e/param_sweep_spambase.yaml config/datasets_vary_e/param_sweep_mushrooms.yaml
#
# === Specified Method Run Examples ===
# Run only specific methods:
#   ./run.sh --dataset-config config/datasets_typical/param_sweep_mnist.yaml --methods nnpu pn
#   ./run.sh --dataset-config config/datasets_vary_c/param_sweep_mnist.yaml --methods nnpu pn
#   ./run.sh --dataset-config config/datasets_vary_e/param_sweep_cifar10.yaml --methods vaepu holisticpu
#
# Re-train poorly performing methods:
#   ./run.sh --dataset-config config/datasets_vary_c/param_sweep_20news_sbert.yaml --methods robustpu selfpu lagam
#
# === Single Method Complete Scan Examples ===
# Run complete parameter scan for a single method on a single dataset:
#   ./run.sh --dataset-config config/datasets_typical/param_sweep_cifar10.yaml --methods vaepu
#   ./run.sh --dataset-config config/datasets_vary_c/param_sweep_imdb_sbert.yaml --methods vaepu
#   ./run.sh --dataset-config config/datasets_vary_e/param_sweep_mnist.yaml --methods pan

# --- Script Configuration ---
# All available methods. If --methods is not provided, all these will be run.
ALL_METHODS="pn nnpu nnpusb distpu robustpu selfpu vpu lagam bbepu puet p3mixc p3mixe lbe pulcpbf holisticpu vaepu cgenpu pan"

# Exit immediately if a command exits with a non-zero status.
set -e

# --- Argument Parsing ---
DATASET_CONFIGS=()
METHODS_TO_RUN=""

while [[ "$#" -gt 0 ]]; do
    case $1 in
        --dataset-config)
            shift
            # Collect one or more configs until next option or end
            while [[ "$#" -gt 0 && "$1" != --* ]]; do
                DATASET_CONFIGS+=("$1")
                shift
            done
            continue ;;
        --methods) shift; METHODS_TO_RUN="$@"; break ;; # Consume all remaining args for methods
        *) echo "Unknown parameter passed: $1"; exit 1 ;;
    esac
    shift
done

if [ ${#DATASET_CONFIGS[@]} -eq 0 ]; then
    echo "Error: --dataset-config is a required argument."
    echo "Usage: $0 --dataset-config <cfg1> [<cfg2> ...] [--methods ...]"
    exit 1
fi

for cfg in "${DATASET_CONFIGS[@]}"; do
    if [ ! -f "$cfg" ]; then
        echo "Error: Dataset config file not found at: $cfg"
        exit 1
    fi
done

if [ -z "$METHODS_TO_RUN" ]; then
    echo "No --methods specified, running all available methods."
    METHODS_TO_RUN=$ALL_METHODS
fi

# Ensure results/runs directory exists for logging
mkdir -p results/runs

# Normalize: allow comma-separated method lists (e.g., "nnpu, pulda")
METHODS_TO_RUN=$(echo "$METHODS_TO_RUN" | tr ',' ' ')

# --- Execution ---
# Extract a base name from the config files for the log
CONFIG_BASENAME=$(basename "${DATASET_CONFIGS[0]}" .yaml)
if [ ${#DATASET_CONFIGS[@]} -gt 1 ]; then
    CONFIG_BASENAME+="_plus$(( ${#DATASET_CONFIGS[@]} - 1 ))"
fi
LOG_FILE="results/runs/run_${CONFIG_BASENAME}_$(date +%Y%m%d_%H%M%S).log"

echo "======================================================================"
echo "Starting training run..."
echo "Dataset Configs: ${DATASET_CONFIGS[*]}"
echo "Methods to run: $METHODS_TO_RUN"
echo "Log file: $LOG_FILE"
echo "======================================================================"

# Construct the command and execute it, teeing output to the log file
CMD_DATASETS=""
for cfg in "${DATASET_CONFIGS[@]}"; do
    CMD_DATASETS+=" $cfg"
done

# If Python output seems to be buffering, you can try adding the -u flag:
# COMMAND="python -u run_train.py --dataset-config$CMD_DATASETS --methods $METHODS_TO_RUN"
COMMAND="python run_train.py --dataset-config$CMD_DATASETS --methods $METHODS_TO_RUN"

echo "Executing command: $COMMAND"
echo "----------------------------------------------------------------------"

# Execute the command, showing output on the console and saving it to a log file.
$COMMAND 2>&1 | tee "$LOG_FILE"

echo "----------------------------------------------------------------------"
echo "Training run finished."
echo "Log saved to: $LOG_FILE"
echo "======================================================================"
