#!/usr/bin/env bash
# run_v5_lenaware_and_validation.sh
# v5: strict length-aware augmentation + upgraded validation

set -euo pipefail

DATA_DIR="../data"
SHEET1="$DATA_DIR/Sheet_1.csv"
SHEET2="$DATA_DIR/Sheet_2.csv"

AUG1="$DATA_DIR/10KSheet_1_augmented_v5_lenaware.csv"
AUG2="$DATA_DIR/10KSheet_2_augmented_v5_lenaware.csv"

OUTDIR_VAL="$DATA_DIR/validation_reports_v5"

mkdir -p "$OUTDIR_VAL"

python augment_v5_semantic_lenaware.py \
  --sheet1 ../data/Sheet_1.csv \
  --sheet2 ../data/Sheet_2.csv \
  --out1 ../data/10KSheet_1_augmented_v5_lenaware.csv \
  --out2 ../data/10KSheet_2_augmented_v5_lenaware.csv \
  --n 10000 \
  --lo 0.74 --hi 0.99 --min_lo 0.72 \
  --nearest_tol 0.10 \
  --len_word_tol 0.25 --len_char_tol 0.25 \
  --batch_size 128 \
  --max_tries_per_item 12 \
  --k_max 2 \
  --swaps_max 12 \
  --max_per_seed 8 \
  --per_seed_dupe_times 3 \
  --hard_attempt_budget 1500

python kl_validation_v2.py \
  --sheet1 "$SHEET1" \
  --sheet2 "$SHEET2" \
  --aug1   "$AUG1" \
  --aug2   "$AUG2" \
  --outdir "$OUTDIR_VAL" \
  --k 5 \
  --bins 30 \
  --svd_dim 50 \
  --seed 7

echo "All done."
echo "Validation -> $OUTDIR_VAL (kl_validation_report.json + kl_validation_summary.md)"
