#!/bin/bash

# Default number of samples
DEFAULT_TRAIN_SAMPLES=100000
DEFAULT_TEST_SAMPLES=1000

# Argument parsing
NUM_TRAIN_SAMPLES=$DEFAULT_TRAIN_SAMPLES
NUM_TEST_SAMPLES=$DEFAULT_TEST_SAMPLES

while getopts t:e:h OPT
do
  case $OPT in
    "t" ) NUM_TRAIN_SAMPLES="$OPTARG" ;;
    "e" ) NUM_TEST_SAMPLES="$OPTARG" ;;
    "h" ) echo "Usage: $0 [-t TRAIN_SAMPLES] [-e TEST_SAMPLES]" 1>&2
          exit 1 ;;
    * ) echo "Invalid option" 1>&2
          exit 1 ;;
  esac
done
shift $((OPTIND - 1))

# Base output directory
BASE_OUTPUT_DIR="data/data/small/index"
# Fixed seed value
SEED_TRAIN=42
SEED_TEST=43

# List of n values
# N_VALUES=(20 50 100 200 1000)
N_VALUES=(13)
# List of m values (example)
M_VALUES=(1 2 4 8)


# Generate a dataset for each n and m
for n in "${N_VALUES[@]}"
do
  for m in "${M_VALUES[@]}"
  do
    # Generate an output directory name based on n and m
    OUTPUT_DIR="${BASE_OUTPUT_DIR}/n=${n}_m=${m}"
    mkdir -p "$OUTPUT_DIR" # Create a directory in advance

    echo "Generating dataset for n=$n, m=$m (Train: $NUM_TRAIN_SAMPLES, Test: $NUM_TEST_SAMPLES) into $OUTPUT_DIR..."
    python3 src/data/indexdataset.py \
      --n "$n" \
      --m "$m" \
      --num_train_samples "$NUM_TRAIN_SAMPLES" \
      --num_test_samples "$NUM_TEST_SAMPLES" \
      --seed_train "$SEED_TRAIN" \
      --seed_test "$SEED_TEST" \
      --output_dir "$OUTPUT_DIR" # Pass the generated directory name
  done
done

echo "All datasets generated."