#!/bin/bash
#Set job requirements

#SBATCH -n 1
#SBATCH -t 07:00:00
#SBATCH --job-name=run_projections_BERT_ours
#SBATCH --partition=gpu_a100
#SBATCH --gpus=1
#SBATCH --gpus-per-node=1
#SBATCH --mem=40G
#SBATCH --mail-type=BEGIN,END
#SBATCH --mail-user=f.g.holstege@uva.nl

# Set directory for code
cd $HOME/opt-separation

# Loading modules and activating environment
module load 2023
source ~/JHS_installations/venvs/env_opt_separation/bin/activate

# define the model type
MODEL_TYPE="bert"

# Define model names and types
PROJECTION_METHODS=( "opt-sep-proj")
LAYERS=(7 8 9 10 11 12)
DATASETS=("bios")
P_Y_Z_VALUES=(0.5 0.6 0.7 0.8 0.9)  # Different p_y_z values to loop over
SEEDS=(1 2 3)  # Seeds to loop over

# Default values
DEVICE="cuda"  # Changed from mps to cuda for server GPU
BATCH_SIZE=128
TORCH_DTYPE="float16"
OUTPUT_DIR="$TMPDIR/projections"  # Using TMPDIR for temporary storage
MAX_LENGTH=512
APPLY="True"
APPLY_STRATEGY_PROJ="cls"
EMBEDDING_STRATEGY="cls"
CALCULATE_COV="True"

SAMPLE_DATA="True"  # Set to True for sampled data, False for full data

# Create output directory
mkdir -p "$OUTPUT_DIR"
if [ ! -d "$OUTPUT_DIR" ]; then
    echo "Failed to create output directory: $OUTPUT_DIR"
    exit 1
fi

for DATASET in "${DATASETS[@]}"; do
    echo "Processing dataset: $DATASET"
    
    for PROJECTION_METHOD in "${PROJECTION_METHODS[@]}"; do
        for LAYER in "${LAYERS[@]}"; do
            for SEED in "${SEEDS[@]}"; do
                for P_Y_Z in "${P_Y_Z_VALUES[@]}"; do
                    echo "Running projections for method: $PROJECTION_METHOD, layer: $LAYER, seed: $SEED, p_y_z: $P_Y_Z"

                    # define the model name based on sampling and seed
                    MODEL="models/bios/BERT_base_sampled_pyz${P_Y_Z}_bs16_lr0.0005_e2_seed${SEED}"

                    # Run projections
                    CMD="python calc_projections.py \
                        --model_name $MODEL\
                        --model_type $MODEL_TYPE \
                        --dataset $DATASET \
                        --projection_method $PROJECTION_METHOD \
                        --layers \"$LAYER\" \
                        --device $DEVICE \
                        --batch_size $BATCH_SIZE \
                        --torch_dtype $TORCH_DTYPE \
                        --output_dir \"$OUTPUT_DIR\" \
                        --seed $SEED \
                        --max_length $MAX_LENGTH \
                        --apply_strategy $APPLY_STRATEGY_PROJ \
                        --embedding_strategy $EMBEDDING_STRATEGY \
                        --calculate_cov $CALCULATE_COV \
                        --sample_data $SAMPLE_DATA \
                        --p_y_z $P_Y_Z"
                    echo "Running command: $CMD"
                    eval $CMD

                    # Copy projections to the expected location immediately after calculation
                    echo "Copying projections to permanent storage..."
                    # Create subdirectory for this p_y_z value and seed
                    mkdir -p "$HOME/opt-separation/projections/$DATASET/seed${SEED}/p_y_z_$P_Y_Z"
                    cp -r "$OUTPUT_DIR/$DATASET" "$HOME/opt-separation/projections"
                    
                    # Optional: Add sleep between runs to let GPU cool down
                    sleep 5
                done
            done
        done
    done
done

echo "All projection calculations completed for all seeds, models, datasets, and p_y_z values!"

# Copy results back to home directory
if [ -d "$OUTPUT_DIR" ]; then
    cp -r "$OUTPUT_DIR" "$HOME/opt-separation/results"
    echo "Copied results to $HOME/opt-separation/results"
else
    echo "Warning: Output directory not found: $OUTPUT_DIR"
fi