#!/usr/bin/env bash
# run_probers.sh — generate many configs, run prober.py on each, then clean up.
# Usage: bash examples/run_probers.sh
# Optional: DRY_RUN=1 bash examples/run_probers.sh   # only print what would run

set -euo pipefail

# ---------- MATRIX (edit as needed) ----------
SAMPLES_PATHS=(
  "artifacts/lie/llmscan_questions_1000_all.jsonl"
  "artifacts/lie/llmscan_wikidata.jsonl"
  "artifacts/lie/sciq.jsonl"
  "artifacts/toxicity/surge_ai_toxicity.jsonl"
  "artifacts/toxicity/real_toxicity_prompts.jsonl"
  "artifacts/backdoor/sleeper_agent.jsonl"
  "artifacts/backdoor/vpi.jsonl"
  "artifacts/backdoor/mtba.jsonl"
  "artifacts/jailbreaking/llmscan_gcg.jsonl"
  "artifacts/jailbreaking/pap.jsonl"
  "artifacts/jailbreaking/autodan.jsonl"

)
RESULT_DIRS=(
  "results/lie/questions_1000/"
  "results/lie/wikidata/"
  "results/lie/sciq/"
  "results/toxicity/surge_ai_toxicity/"
  "results/toxicity/real_toxicity_prompts/"
  "results/backdoor/sleeper_agent/"
  "results/backdoor/vpi/"
  "results/backdoor/mtba/"
  "results/jailbreaking/gcg/"
  "results/jailbreaking/pap/"
  "results/jailbreaking/autodan/"
)

# ---------- STATIC CONFIG (shared across runs) ----------
# deepseek-ai/DeepSeek-R1-Distill-Llama-8B
# Qwen/Qwen2.5-14B-Instruct
# MODEL_NAME="deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
# MODEL_NAME="meta-llama/CodeLlama-13b-Instruct-hf"
# MODEL_NAME="meta-llama/Llama-3.3-70B-Instruct"
# MODEL_NAME="Qwen/Qwen2.5-14B-Instruct"
# HIDDEN_LAYERS="[0, 14, 27]"
# HEADS="[0, 12, 23]"
MODEL_NAME="meta-llama/Llama-3.2-3B-Instruct"
HIDDEN_LAYERS="[0, 14, 27]"
HEADS="[0, 12, 23]"
# MODEL_NAME="meta-llama/Llama-3.1-8B-Instruct" 
# HIDDEN_LAYERS="[0, 18, 35]"
# HEADS="[0, 12, 23]"
COMBINE_CAUSAL_EFFECTS="true"
BATCH_SIZE="4"
MAX_SAMPLES="1000"
USE_MICROSACCADE_INTERVENTION="true"
USE_ATTENTION_INTERVENTION="false"  # not yet supported
USE_LAYER_INTERVENTION="false"  # not yet supported
USE_GAUSSIAN_NOISE="false"
USE_RANDOM_NOISE="true"

PYTHON_BIN="${PYTHON_BIN:-python3}"
PROBER_PY="examples/prober.py"
CFG_TMPDIR="$(mktemp -d -t probercfgs.XXXX)"
trap 'rm -rf "$CFG_TMPDIR"' EXIT

# ---------- VALIDATION ----------
command -v "$PYTHON_BIN" >/dev/null || { echo "Error: python3 not found."; exit 1; }
[[ -f "$PROBER_PY" ]] || { echo "Error: $PROBER_PY not found. Run from repo root."; exit 1; }
[[ ${#SAMPLES_PATHS[@]} -eq ${#RESULT_DIRS[@]} ]] || { echo "Error: SAMPLES_PATHS and RESULT_DIRS length mismatch."; exit 1; }

# Helper: try both CLI styles the project might use.
run_prober() {
  local cfg="$1"
  # Style A: GNU long option
  if [[ "${DRY_RUN:-0}" == "1" ]]; then
    echo "   $PYTHON_BIN $PROBER_PY --config_path \"$cfg\""
  else
    if "$PYTHON_BIN" "$PROBER_PY" --config_path "$cfg"; then
      return 0
    fi
    # Style B: key value without dashes
    "$PYTHON_BIN" "$PROBER_PY" config_path "$cfg"
  fi
}

# ---------- GENERATE, RUN, CLEAN ----------
echo "Creating configs in: $CFG_TMPDIR"
for i in "${!SAMPLES_PATHS[@]}"; do
  sample="${SAMPLES_PATHS[$i]}"
  outdir="${RESULT_DIRS[$i]}"
  cfg="$CFG_TMPDIR/config_${i}.yaml"

  cat > "$cfg" <<YAML
# Auto-generated by run_probers.sh
samples_path: "${sample}"
model_name: "${MODEL_NAME}"
hidden_layers: ${HIDDEN_LAYERS}
heads: ${HEADS}
combine_causal_effects: ${COMBINE_CAUSAL_EFFECTS}
batch_size: ${BATCH_SIZE}
max_samples: ${MAX_SAMPLES}
results_dir: "${outdir}"
use_microsaccade_intervention: ${USE_MICROSACCADE_INTERVENTION}
use_attention_intervention: ${USE_ATTENTION_INTERVENTION}
use_layer_intervention: ${USE_LAYER_INTERVENTION}
use_gaussian_noise: ${USE_GAUSSIAN_NOISE}
use_random_noise: ${USE_RANDOM_NOISE}
YAML

  echo "==> Running prober with: $cfg for samples $sample, results in $outdir"
  run_prober "$cfg"
done

echo "All runs finished."
echo "Cleaning up: $CFG_TMPDIR"
# (trap handles deletion)
