#!/bin/bash -l
#SBATCH --output=scripts/logs/baseline.out
#SBATCH -w rlab7
#SBATCH -G 4
conda init
conda activate pqcache
CUDA_VISIBLE_DEVICES=0,1,2,3
export PYTHONNOUSERSITE=1

# Run LongBench baseline (dense attention) over a fixed dataset list.
MODEL_KEY=llama-3.1
EXP_NAME=baseline

# Datasets to iterate
DATASETS=(
  # multifieldqa_en
  # multifieldqa_zh
  # trec
  lcc
  # gov_report
  # narrativeqa
)

# Change to repo root of long_context_eval
cd /filer/tmp1/WIRED/sampling/long_context_eval

echo "Running baseline on: ${DATASETS[*]} (model=${MODEL_KEY}, exp=${EXP_NAME})"

for ds in "${DATASETS[@]}"; do
  echo "[baseline] dataset=${ds}"
  python run_benchmark.py \
      --model_key "${MODEL_KEY}" \
      --exp_name "${EXP_NAME}" \
      --datasets "${ds}"
done

echo "Baseline runs completed."


