#!/bin/bash
#Set job requirements

#SBATCH -n 1
#SBATCH -t 00:30:00
#SBATCH --job-name=run_projections
#SBATCH --partition=gpu_a100
#SBATCH --gpus=1
#SBATCH --gpus-per-node=1
#SBATCH --mem=30G
#SBATCH --mail-type=BEGIN,END
#SBATCH --mail-user=f.g.holstege@uva.nl

# Set directory for code
cd $HOME/opt-separation

# Loading modules and activating environment
module load 2023
source ~/JHS_installations/venvs/env_opt_separation/bin/activate

# Define arrays for different configurations
MODELS=(
   "meta-llama/Llama-3.1-8B-Instruct"
)
#"meta-llama/Llama-3.1-8B-Instruct"
MODEL_TYPES=(
    "llama"
)
PROJECTION_METHODS=("opt-sep-proj" "LEACE" "SAL")
LAYERS=( "last_9")
DATASETS=("winobias")

# Default values
DEVICE="cuda"  # Changed from mps to cuda for server GPU
BATCH_SIZE=64
TORCH_DTYPE="float16"
OUTPUT_DIR_PROJ="$TMPDIR/projections"  # Using TMPDIR for temporary storage
OUTPUT_DIR_PRED="$TMPDIR"
SEED=0
MAX_LENGTH=40
APPLY="True"
APPLY_STRATEGY_CALC="all"
APPLY_STRATEGY_PROJ="all"
EMBEDDING_STRATEGY="last_non_pad"
INDEPENDENT="False"
EVAL_TYPE='prediction_winobias'

# Create output directory
mkdir -p "$OUTPUT_DIR_PROJ"
mkdir -p "$OUTPUT_DIR_PRED"

# Add near the beginning of the script:
echo "TMPDIR is set to: $TMPDIR"
echo "Current working directory: $(pwd)"
echo "User: $(whoami)"

# Add these checks
if [ -z "$TMPDIR" ]; then
    echo "Error: TMPDIR is not set"
    exit 1
fi

# Test write permissions
if ! touch "$OUTPUT_DIR_PROJ/test.txt" 2>/dev/null; then
    echo "Error: Cannot write to $OUTPUT_DIR_PROJ"
    exit 1
fi
rm -f "$OUTPUT_DIR_PROJ/test.txt"

if ! touch "$OUTPUT_DIR_PRED/test.txt" 2>/dev/null; then
    echo "Error: Cannot write to $OUTPUT_DIR_PRED"
    exit 1
fi
rm -f "$OUTPUT_DIR_PRED/test.txt"

# Fix the directory check to use correct variable names
if [ ! -d "$OUTPUT_DIR_PROJ" ] || [ ! -d "$OUTPUT_DIR_PRED" ]; then
    echo "Failed to create output directories"
    exit 1
fi

# Loop over models, datasets, projection methods, and layers
for i in "${!MODELS[@]}"; do
    MODEL_NAME="${MODELS[$i]}"
    MODEL_TYPE="${MODEL_TYPES[$i]}"
    
    echo "Processing model: $MODEL_NAME"
    
    for DATASET in "${DATASETS[@]}"; do
        echo "Processing dataset: $DATASET"
        
        for PROJECTION_METHOD in "${PROJECTION_METHODS[@]}"; do
            for LAYER in "${LAYERS[@]}"; do
                echo "Running projections for method: $PROJECTION_METHOD, layer: $LAYER"
                
                # Run projections
                CMD="python calc_projections.py \
                    --model_name \"$MODEL_NAME\" \
                    --model_type $MODEL_TYPE \
                    --dataset $DATASET \
                    --projection_method $PROJECTION_METHOD \
                    --layers \"$LAYER\" \
                    --device $DEVICE \
                    --batch_size $BATCH_SIZE \
                    --torch_dtype $TORCH_DTYPE \
                    --output_dir \"$OUTPUT_DIR_PROJ\" \
                    --seed $SEED \
                    --max_length $MAX_LENGTH \
                    --apply_strategy $APPLY_STRATEGY_PROJ\
                    --embedding_strategy $EMBEDDING_STRATEGY\
                    --independent_layers $INDEPENDENT \
                    --calculate_cov=True"
                echo "Running command: $CMD"
                eval $CMD

                # Copy projections to the expected location immediately after calculation
                echo "Copying projections to permanent storage..."
                cp -r "$OUTPUT_DIR_PROJ/$DATASET" "$HOME/opt-separation/projections/"
                
                # Run gender scores calculation
                PRED_CMD="python calc_pred_LLM.py \
                    --dataset $DATASET \
                    --model_name \"$MODEL_NAME\" \
                    --model_type $MODEL_TYPE \
                    --batch_size $BATCH_SIZE \
                    --max_length $MAX_LENGTH \
                    --apply_projection \"$APPLY\" \
                    --projection_method $PROJECTION_METHOD \
                    --layers \"$LAYER\" \
                    --apply_strategy $APPLY_STRATEGY_CALC \
                    --embedding_strategy $EMBEDDING_STRATEGY \
                    --torch_dtype $TORCH_DTYPE \
                    --independent_layers $INDEPENDENT \
                    --output_folder \"$OUTPUT_DIR_PRED\" \
                    --device $DEVICE"

                echo "Running command: $PRED_CMD"
                eval $PRED_CMD

                # Copy projections from temporary to permanent location
                MODEL_NAME_SHORT=$(python -c "from config import map_model_name; print(map_model_name('$MODEL_NAME'))")
                SUFFIX=""
                if [ "$APPLY" = "True" ]; then
                    if [ "$LAYER" = "lm_head" ]; then
                        SUFFIX="_${PROJECTION_METHOD}_lm_head"
                    elif [ "$LAYER" = "all" ]; then
                        SUFFIX="_${PROJECTION_METHOD}_all"
                    elif [[ "$LAYER" =~ ^last_ ]]; then
                        SUFFIX="_${PROJECTION_METHOD}_${LAYER}"
                    fi
                    # Fix: Added spaces after [ and before ]
                    if [ "$INDEPENDENT" = "True" ]; then
                        SUFFIX="${SUFFIX}_${EMBEDDING_STRATEGY}_independent"
                        echo "Independent layers: $SUFFIX"
                    else
                        SUFFIX="${SUFFIX}_${EMBEDDING_STRATEGY}_dependent"
                        echo "Dependent layers: $SUFFIX"
                    fi
                fi

                SOURCE_FILE="$OUTPUT_DIR_PRED/${DATASET}_professions_${MODEL_NAME_SHORT}${SUFFIX}.csv"
                DEST_FILE="$HOME/opt-separation/data/result_data/${DATASET}_professions_${MODEL_NAME_SHORT}${SUFFIX}.csv"

                if [ ! -f "$SOURCE_FILE" ]; then
                    echo "Warning: Source file not found: $SOURCE_FILE"
                    echo "Checking directory contents:"
                    ls -l "$OUTPUT_DIR_PRED"
                else
                    cp "$SOURCE_FILE" "$DEST_FILE"
                    echo "Copied results to data/result_data/${DATASET}_professions_${MODEL_NAME_SHORT}${SUFFIX}.csv"
                fi
               
                # Optional: Add sleep between runs to let GPU cool down
                sleep 5
            done
        done
    done
done

echo "All projection and predictions completed for all models and datasets!"

if [ -d "$OUTPUT_DIR_PRED" ]; then
    cp -r "$OUTPUT_DIR_PRED" "$HOME/opt-separation/results"
    echo "Copied results to $HOME/opt-separation/results"
else
    echo "Warning: Output directory not found: $OUTPUT_DIR_PRED"
fi