#!/bin/bash

# Default parameters
TRAIN_SAMPLES=100000
TEST_SAMPLES=1000
SEQ_LENGTH=45
OUTPUT_DIR="data/data/small/square_mod19/n=${SEQ_LENGTH}"
TRAIN_SEED=42
TEST_SEED=123

# Command line argument analysis
while [[ $# -gt 0 ]]; do
    case $1 in
        --train_samples)
            TRAIN_SAMPLES="$2"
            shift 2
            ;; 
        --test_samples)
            TEST_SAMPLES="$2"
            shift 2
            ;; 
        --sequence_length)
            SEQ_LENGTH="$2"
            shift 2
            ;; 
        --output_dir)
            OUTPUT_DIR="$2"
            shift 2
            ;; 
        --train_seed)
            TRAIN_SEED="$2"
            shift 2
            ;; 
        --test_seed)
            TEST_SEED="$2"
            shift 2
            ;; 
        *)
            echo "Unknown option: $1"
            exit 1
            ;; 
    esac
done

# Create output directory
mkdir -p "$OUTPUT_DIR"

# Generate dataset
python3 src/data/squaremoddataset.py \
    --train_samples "$TRAIN_SAMPLES" \
    --test_samples "$TEST_SAMPLES" \
    --sequence_length "$SEQ_LENGTH" \
    --output_dir "$OUTPUT_DIR" \
    --train_seed "$TRAIN_SEED" \
    --test_seed "$TEST_SEED"

echo "Dataset generation is complete."
echo "Output file:"
echo "  - $OUTPUT_DIR/data.train     (${TRAIN_SAMPLES} samples)"
echo "  - $OUTPUT_DIR/data.test      (${TEST_SAMPLES} samples)"
echo "  - $OUTPUT_DIR/data-inv.train (${TRAIN_SAMPLES} samples)"
echo "  - $OUTPUT_DIR/data-inv.test  (${TEST_SAMPLES} samples)"
