#!/usr/bin/env bash
# Measure hand-tuned Pallas vs XLA baseline on tpu-node-2 using the full-50 harness.
# Produces apples-to-apples speedups for the 8 priority kernels that ship with optimized.py.
set -euo pipefail
source $HOME/miniconda3/etc/profile.d/conda.sh
conda activate autocomp
cd /path/to/autocomp

export AUTOCOMP_TPU_NAME=tpu-node-2
export AUTOCOMP_JAXBENCH_PROFILE=1

OUTDIR="output/handtuned"
mkdir -p "$OUTDIR"

echo "============================================"
echo "Hand-tuned Pallas measurement on $AUTOCOMP_TPU_NAME"
echo "============================================"

python -m autocomp.baselines.measure_handtuned \
    --output_dir "$OUTDIR"

echo "============================================"
echo "DONE -- see $OUTDIR/summary.json"
echo "============================================"
