#!/usr/bin/env bash
# run_parallel_experiments.sh
#
# Runs multiple MoE‑placement programs in parallel through evaluator.py,
# then waits until every job is finished before returning.
#
# USAGE
#   ./run_parallel_experiments.sh <results_dir> <config_path> <program_folder> <program_names…>
#
# EXAMPLE
#   ./run_parallel_experiments.sh ./results ./configs/exp1.yaml ./programs \
#       greedy_placement load_aware_placement random_baseline
#
# WHAT THE SCRIPT DOES
#   1. Creates a timestamped sub‑directory inside <results_dir>.
#   2. Copies the supplied config into that directory.
#   3. Launches every <program_name>.py found in <program_folder> **in parallel**.
#   4. Captures the JSON printed after the marker line
#        === Output Results ===
#      (that’s how *evaluator.py* prints its metrics) :contentReference[oaicite:0]{index=0}
#   5. Adds `config_path` and `program_path` into each JSON file.
#   6. Waits (`wait`) until all background jobs finish, so the script
#      “hangs” until everything is done instead of detaching.
#   7. Produces a tiny *summary.json* that simply aggregates every result.
#
# REQUIREMENTS
#   * Bash 4+
#   * Python 3 on $PATH
set -euo pipefail

########## ── options, arguments & sanity checks ────────────────────────
# Optional flags:
#   -t|--topology-type <name>  : overwrite 'topology_type' in the copied YAML
#   --max-procs <N>            : limit concurrent program runs (0 = unlimited)
#
# Positional args (after -- if needed):
#   <results_dir> <config_path> <program_folder> <program_names…>

OVERRIDE_TOPOLOGY=""
MAX_PROCS=0

while [[ $# -gt 0 ]]; do
  case "$1" in
    -t|--topology-type)
      [[ $# -ge 2 ]] || { echo "Missing value for $1" >&2; exit 1; }
      OVERRIDE_TOPOLOGY=$2; shift 2;;
    --max-procs)
      [[ $# -ge 2 ]] || { echo "Missing value for $1" >&2; exit 1; }
      MAX_PROCS=$2; shift 2;;
    --) shift; break;;
    -*) echo "Unknown option: $1" >&2; exit 1;;
    *)  break;;
  esac
done

if (( $# < 4 )); then
  echo "Usage: $0 [--topology-type NAME] [--max-procs N] <results_dir> <config_path> <program_folder> <program_names…>" >&2
  exit 1
fi

RESULTS_DIR=$1
CONFIG_PATH=$2
PROGRAM_FOLDER=$3
shift 3                     # the rest are program names
PROGRAM_NAMES=("$@")

[[ -d "$RESULTS_DIR"   ]] || { echo "Results dir '$RESULTS_DIR' not found"; exit 1; }
[[ -f "$CONFIG_PATH"   ]] || { echo "Config '$CONFIG_PATH' not found";      exit 1; }
[[ -d "$PROGRAM_FOLDER" ]] || { echo "Program folder '$PROGRAM_FOLDER' not found"; exit 1; }
[[ -f evaluator.py     ]] || { echo "Run from the directory that contains evaluator.py"; exit 1; }

########## ── create experiment directory ───────────────────────────────
STAMP=$(date +'%Y%m%d_%H%M%S')
CONFIG_BASENAME=$(basename "$CONFIG_PATH" .yaml)
if [[ -n "$OVERRIDE_TOPOLOGY" ]]; then
  EXP_DIR="${RESULTS_DIR}/${CONFIG_BASENAME}_${STAMP}_${OVERRIDE_TOPOLOGY}"
else
  EXP_DIR="${RESULTS_DIR}/${CONFIG_BASENAME}_${STAMP}"
fi
mkdir -p "$EXP_DIR"/logs
cp "$CONFIG_PATH" "$EXP_DIR/"
DEST_CFG="$EXP_DIR/$(basename "$CONFIG_PATH")"
export DEST_CFG

# If requested, overwrite topology_type in the copied YAML
if [[ -n "$OVERRIDE_TOPOLOGY" ]]; then
  if grep -Eq '^[[:space:]]*topology_type:' "$DEST_CFG"; then
    # Use a portable in-place edit (creates .bak backup; works on GNU/BSD sed)
    sed -E -i.bak 's|^[[:space:]]*topology_type:[[:space:]].*$|topology_type: "'"$OVERRIDE_TOPOLOGY"'"|' "$DEST_CFG"
  else
    printf '\n%s\n' 'topology_type: "'"$OVERRIDE_TOPOLOGY"'"' >> "$DEST_CFG"
  fi
fi


echo "➜ Experiment directory: $EXP_DIR"
echo "➜ Launching ${#PROGRAM_NAMES[@]} experiment(s)…"

########## ── helper to run a single program ────────────────────────────
run_one() {
  local prog_name=$1
  local cfg_path=$DEST_CFG
  local prog_folder=$3
  local exp_dir=$4

  local prog_path="${prog_folder}/${prog_name}.py"
  [[ -f "$prog_path" ]] || { echo "Program '$prog_path' not found" >&2; return 1; }

  local log_file="${exp_dir}/logs/${prog_name}.log"
  local out_tmp="${exp_dir}/${prog_name}.json.tmp"
  local out_final="${exp_dir}/${prog_name}_results.json"
  local out_placement="${exp_dir}/${prog_name}_placement"

  # ❶ run evaluator, capture full log
  python evaluator.py --config_path "$cfg_path" --program_path "$prog_path" --save_placement_to_file "$out_placement" \
      2>&1 | tee "$log_file" | \
      awk '/^=== Output Results ===/{flag=1;next}flag' > "$out_tmp"

  # ❷ post‑process JSON (append metadata)
  python - <<PY
import json, sys, pathlib, os, textwrap
tmp = pathlib.Path("$out_tmp")
if tmp.stat().st_size == 0:
    sys.exit(f"No JSON for {tmp}")
data = json.load(tmp.open())
data.setdefault("metadata", {}).update({"config_path": "$cfg_path",
                                        "program_path": "$prog_path"})
json.dump(data, open("$out_final", "w"), indent=2)
PY
  rm -f "$out_tmp"
  echo "  ✔ $prog_name finished"
}

########## ── launch every program in background ────────────────────────
for name in "${PROGRAM_NAMES[@]}"; do
  run_one "$name" "$CONFIG_PATH" "$PROGRAM_FOLDER" "$EXP_DIR" &
done

########## ── wait until all jobs are done ──────────────────────────────
wait
echo "✓ All experiments completed"

########## ── optional tiny summary file ────────────────────────────────
# Build a JSON array with the exact order of PROGRAM_NAMES for Python to read
PROGRAM_ORDER_JSON='['
for n in "${PROGRAM_NAMES[@]}"; do
  PROGRAM_ORDER_JSON+="\"$n\","  # append with comma
done
PROGRAM_ORDER_JSON="${PROGRAM_ORDER_JSON%,}]"  # strip trailing comma, close array
export PROGRAM_ORDER_JSON
export EXP_DIR

python - <<'PY'
import json, os, pathlib
exp_dir = pathlib.Path(os.environ["EXP_DIR"])
order = json.loads(os.environ["PROGRAM_ORDER_JSON"])  # preserves CLI order
summary = {}
for name in order:
    with open(exp_dir / f"{name}_results.json") as f:
        summary[name] = json.load(f)
with open(exp_dir / "summary.json", "w") as f:
    json.dump(summary, f, indent=2)
PY

echo "✓ summary.json created"

echo "Results stored under: $EXP_DIR"