#!/usr/bin/env bash
set -euo pipefail

# Simple runner for embedding geometry analysis supporting multiple models.

# EleutherAI/pythia-160m,  -- 
# EleutherAI/pythia-410m,
# EleutherAI/pythia-1b,
# EleutherAI/pythia-1.4b,
# EleutherAI/pythia-2.8b,
# EleutherAI/pythia-6.9b,
# Qwen/Qwen2.5-0.5B,        --
# Qwen/Qwen2.5-1.5B,        -- 
# meta-llama/Llama-3.2-1B,  -- 
# meta-llama/Llama-3.2-3B,
# meta-llama/Llama-3.1-8B, --
# google/gemma-3-1b-pt
# google/gemma-3-270m
MODELS=${1:-"EleutherAI/pythia-160m EleutherAI/pythia-410m EleutherAI/pythia-1b  EleutherAI/pythia-1.4b EleutherAI/pythia-2.8b EleutherAI/pythia-6.9b Qwen/Qwen2.5-0.5B Qwen/Qwen2.5-1.5B meta-llama/Llama-3.2-1B meta-llama/Llama-3.2-3B meta-llama/Llama-3.1-8B google/gemma-3-1b-pt google/gemma-3-270m"}
# MODELS=${1:-"google/gemma-3-1b-pt google/gemma-3-270m EleutherAI/pythia-410m EleutherAI/pythia-1.4b"}
DATASET=${2:-"hidden_capacity/data/fanfics_1k_chunks.csv"}

echo "Models:  ${MODELS}"
echo "Dataset: ${DATASET}"

CUDA_VISIBLE_DEVICES=1 python embedding_geometry.py \
  --models ${MODELS} \
  --dataset "${DATASET}" \
  --text-column text \
  --max-samples 1000 \
  --batch-size 4 \
  --device "cuda"

