#!/bin/bash
export PYTHONPATH=$PYTHONPATH:$(pwd)

# Ensure reproducibility at system level
export PYTHONHASHSEED=42
export GRB_NUM_THREADS=1
export OMP_NUM_THREADS=1
export MKL_NUM_THREADS=1
export NUMEXPR_NUM_THREADS=1
export OPENBLAS_NUM_THREADS=1

# Define variables
MODEL_NAME="roberta-base"
TASK_NAME="stsb"
OUTPUT_DIR="output/${TASK_NAME}_optimal_lora_seed${SEED}_$(date +%Y%m%d_%H%M%S)"
LORA_R_VALUES="0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16"
LORA_BUDGET=1000000  # Approx 5% of BERT-base parameters
BATCH_SIZE=32
LEARNING_RATE=1e-4
NUM_EPOCHS=20
RECOVERY_STEPS=100  # Recovery steps after pruning
EXTENDED_RECOVERY_STEPS=200

SEEDS=(42 2025 777)

# Loop through each seed and run the experiment
for SEED in "${SEEDS[@]}"; do
    echo "========================================"
    echo "Starting training with seed ${SEED}"
    echo "========================================"
    
    # Create unique output directory for this seed
    OUTPUT_DIR="output/${TASK_NAME}_optimal_lora_seed${SEED}_$(date +%Y%m%d_%H%M%S)"
    
    # Create output directory
    mkdir -p $OUTPUT_DIR

    # Run the training script with current seed
    python scripts/run_glue.py \
        --model_name_or_path $MODEL_NAME \
        --task_name $TASK_NAME \
        --output_dir $OUTPUT_DIR \
        --lora_r_values $LORA_R_VALUES \
        --lora_budget $LORA_BUDGET \
        --per_device_train_batch_size $BATCH_SIZE \
        --per_device_eval_batch_size $((BATCH_SIZE * 4)) \
        --learning_rate $LEARNING_RATE \
        --num_train_epochs $NUM_EPOCHS \
        --weight_decay 0.01 \
        --num_warmup_steps 100 \
        --max_seq_length 128 \
        --lr_scheduler_type linear \
        --seed $SEED \
        --overwrite_output_dir \
        --apply_pruning \
        --pruning_target_reduction 0.7 \
        --pruning_steps 5 \
        --importance_ema_decay 0.99 \
        --momentum_penalty_weight 0.05 \
        --recovery_steps $RECOVERY_STEPS \
        --extended_recovery_steps $EXTENDED_RECOVERY_STEPS

    # Save final reproducibility verification
    echo "Execution completed: $(date)" >> $OUTPUT_DIR/reproducibility_info.txt
    echo "Exit code: $?" >> $OUTPUT_DIR/reproducibility_info.txt
    echo "Used seed: ${SEED}" >> $OUTPUT_DIR/reproducibility_info.txt
    
    echo "Completed training with seed ${SEED}"
    echo "========================================"
done

echo "All training runs completed successfully!"