#!/usr/bin/env bash
# Run output_score.py for every SAE, pairing each dl_local_dir with the matching features file.
# NAME = folder basename under each BASE (e.g., batch_topk_50, gated_1070, jumprelu_327, topk_80)
# Will search features files using several common prefixes to avoid naming mismatches.

set -Eeuo pipefail
IFS=$'\n\t'

# -------- Config --------
DEVICE="${DEVICE:-cuda:5}"
MODEL_TYPE="gemma2_9b"

# Script/Repo roots (absolute)
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"

# Features directory (absolute)
FEATURES_DIR="$REPO_ROOT/data/features/gemma2-9b-l20"

# SAE base directories (absolute, from your tree)
BASES=(
  "/home/dslabra5/sae4steer/SAEBench/sae_bench/custom_saes/downloaded_saes/trained_saes___google_gemma-2-9b_gated_top_k/resid_post_layer_20"
)

# Logs
LOG_DIR="$SCRIPT_DIR/../logs/output_score_gemma2_9b"
mkdir -p "$LOG_DIR"

# Safety: globs that don't match expand to nothing
shopt -s nullglob

# -------- Helpers --------
find_features_file() {
  local name="$1"
  local dir="$2"

  # Try several likely filename patterns (ordered by preference)
  local patterns=(
    "$dir/gemma2-9b_${name}_features.json"
    "$dir/gemma2_9b_${name}_features.json"
    "$dir/gemma_9b_${name}_features.json"
    "$dir/${name}_features.json"
  )

  local cand
  for pat in "${patterns[@]}"; do
    for cand in $pat; do
      [[ -f "$cand" ]] && { echo "$cand"; return 0; }
    done
  done

  # Not found
  return 1
}

# -------- Main --------
# Check features dir exists
if [[ ! -d "$FEATURES_DIR" ]]; then
  echo "[error] FEATURES_DIR not found: $FEATURES_DIR"
  echo "        Please verify the repository layout or adjust FEATURES_DIR."
  exit 1
fi

for BASE in "${BASES[@]}"; do
  if [[ ! -d "$BASE" ]]; then
    echo "[warn] Base not found: $BASE"
    continue
  fi

  for SAE_DIR in "$BASE"/*; do
    [[ -d "$SAE_DIR" ]] || continue

    NAME="$(basename "$SAE_DIR")"  # e.g., batch_topk_50 / gated_1070 / jumprelu_327 / topk_80

    FEATURES_FILE=""
    if FEATURES_FILE="$(find_features_file "$NAME" "$FEATURES_DIR")"; then
      :
    else
      echo "[skip] Missing features for $NAME"
      echo "       Looked in: $FEATURES_DIR"
      echo "       Tried: gemma2-9b_${NAME}_features.json | gemma2_9b_${NAME}_features.json | gemma_9b_${NAME}_features.json | ${NAME}_features.json"
      continue
    fi

    echo "===================="
    echo "Scoring SAE: $NAME"
    echo "  dl_local_dir  : $SAE_DIR"
    echo "  features_file : $FEATURES_FILE"
    echo "  device/model  : $DEVICE / $MODEL_TYPE"
    echo "===================="

    # Run and tee output to a per-SAE log
    set +e
    python "$SCRIPT_DIR/output_score.py" \
      --device "$DEVICE" \
      --model_type "$MODEL_TYPE" \
      --features_file "$FEATURES_FILE" \
      --dl_local_dir "$SAE_DIR" \
      |& tee "$LOG_DIR/${NAME}.log"
    STATUS=${PIPESTATUS[0]}
    set -e

    if [[ $STATUS -ne 0 ]]; then
      echo "[error] Failed on $NAME (exit $STATUS)"
    else
      echo "[done] $NAME"
    fi
    echo
  done
done
