#!/bin/bash

# Usage:
# ./merge_and_push_all.sh -c /path/to/parent_folder -r repo_id

# Parse command line arguments
while getopts "c:r:" opt; do
  case $opt in
    c) PARENT_DIR="$OPTARG" ;;
    r) REPO_ID_BASE="$OPTARG" ;;
    *) echo "Usage: $0 [-c <CHECKPOINTS_ROOT>] [-r <REPO_ID_BASE>]" && exit 1 ;;
  esac
done

module purge
module load arch/h100 cuda/12.4.1
source .venv/bin/activate

# Cluster-specific settings
export OMP_NUM_THREADS=64
export MKL_NUM_THREADS=64
export NUMEXPR_NUM_THREADS=64

# We are on an offline partition
export HF_DATASETS_OFFLINE=1
export TRANSFORMERS_OFFLINE=1

# Loop through each subdirectory matching opt-* in the parent directory
for dir in "$PARENT_DIR"/opt_step-*; do
  # if __merged in the name, skip
  if [[ "$dir" == *"__merged" ]]; then
    continue
  fi
  if [ -d "$dir" ]; then
    MODEL_NAME=$(basename "$dir")
    REPO_ID="${REPO_ID_BASE}-${MODEL_NAME}"

    echo "Processing $REPO_ID"
    python merge_and_push.py "$dir" --overwrite

    echo "Done with $MODEL_NAME"
    echo "--------------------------------------------"
  fi
done
