#!/usr/bin/env bash
set -euo pipefail

# Create a cleaned copy of the repo and zip it for anonymous supplementary material.
#
# What it does:
# - Copies the repo excluding VCS and cache files (.git, __pycache__, etc.)
# - Removes conda "prefix:" lines from any env.yml files
# - Optionally removes a third-party pyproject with author email (safe to delete)
# - Produces a zip at the parent directory
# - Prints a quick grep to double-check no emails remain
#
# Usage:
#   bash scripts/make_supp_zip.sh

repo_name="acr-wmdp"
timestamp="$(date +%Y%m%d-%H%M%S)"
src_root="$(pwd)"
out_parent="${src_root}/.."
clean_dir="${out_parent}/${repo_name}-supp-${timestamp}"
zip_path="${out_parent}/${repo_name}-supp-${timestamp}.zip"

echo "[1/5] Making cleaned copy at: ${clean_dir}"
mkdir -p "${clean_dir}"

# Use rsync for robust copying with excludes
rsync -a \
  --exclude='.git' \
  --exclude='.gitignore' \
  --exclude='.gitattributes' \
  --exclude='.github' \
  --exclude='__pycache__' \
  --exclude='*.pyc' \
  --exclude='.DS_Store' \
  --exclude='*.ipynb' \
  --exclude='*.ipynb_checkpoints*' \
  --exclude='.pytest_cache' \
  --exclude='.mypy_cache' \
  --exclude='.vscode' \
  --exclude='.idea' \
  ./ "${clean_dir}/"

echo "[2/5] Stripping conda \"prefix:\" lines from env files"
while IFS= read -r -d '' f; do
  # Remove any lines beginning with 'prefix:'
  awk 'BEGIN{FS=OFS="\n"} !/^prefix:/ {print}' "$f" >"$f.tmp" && mv "$f.tmp" "$f"
done < <(find "${clean_dir}" -type f -name 'env.yml' -print0)

echo "[3/5] Removing third-party author metadata file (safe): flrt_repo/pyproject.toml"
rm -f "${clean_dir}/src/enhanced_gcg/flrt_repo/pyproject.toml" || true

echo "[4/5] Creating zip: ${zip_path}"
(cd "${clean_dir}/.." && zip -qr "${zip_path}" "$(basename "${clean_dir}")")

echo "[5/5] Quick sanity scan for e-mails in cleaned copy (should be empty)"
if command -v rg >/dev/null 2>&1; then
  rg --pcre2 -n "[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+" "${clean_dir}" || true
else
  grep -RInE "[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+" "${clean_dir}" || true
fi

cat <<EOF
Done.

Result directory: ${clean_dir}
Result zip:       ${zip_path}

Optional extra hardening (if you have exiftool/ImageMagick):
  exiftool -overwrite_original -all= ${clean_dir}/figures/* 2>/dev/null || true
  mogrify -strip ${clean_dir}/figures/*.png 2>/dev/null || true
  # Re-zip after additional stripping if desired
EOF
