#!/bin/bash

# Define parameters
DATASET="multilingual"
BASE_MODEL="multilingual-e5-base"
#BASE_MODEL='bert-base-uncased'  # Use this for testing with a smaller model
EMBEDDING_TYPE="mean"
SAVE="True"
SAMPLE="True"
FINETUNED_SAMPLE="True"  # Set to True if using finetuned sampled model

# Define multiple seeds to test
SEEDS=(1 2 3 4 5 ) 
STANDARDIZE="False"  # Set to True if standardizing the data
ADD_SEED="True"

# Define projection methods to test
PROJ_METHODS=("balanced-LEACE")

# Define lambda values to test
LAMBDA_VALUES=(1 0.1 0.01 0.001 0.0001 0.00001)
#

# Define P_Y_Z values to test (probability of Y=1 given Z=1)
P_Y_Z_VALUES=(0.5 0.6 0.7 0.8 0.9)

# Create a timestamp for the result file
RESULT_FILE="lambda_sweep_multi_seed_pyz_multilingual_ME5_v4.csv"

echo "Starting lambda sweep across multiple projection methods, seeds, and P_Y_Z values"
echo "Results will be saved to: results/last_layer/$RESULT_FILE"

# Outermost loop through P_Y_Z values
for p_y_z in "${P_Y_Z_VALUES[@]}"
do
    echo "==============================================="
    echo "Processing P_Y_Z value: $p_y_z"
    echo "==============================================="

    # Loop through seeds
    for seed in "${SEEDS[@]}"
    do
        echo "============================================="
        echo "Processing seed: $seed"
        echo "============================================="

        # Loop through projection methods
        for proj_method in "${PROJ_METHODS[@]}"
        do
            echo "============================================"
            echo "Processing projection method: $proj_method"
            echo "============================================"
            
            # Loop through lambda values for each projection method
            for lambda in "${LAMBDA_VALUES[@]}"
            do
                echo "Running with lambda = $lambda"
                
                # Handle special parameters for causal-LEACE if needed
                EXTRA_ARGS=""
                if [[ "$proj_method" == "causal-LEACE" ]]; then
                    EXTRA_ARGS="--causal_LEACE_variant estimate_y"
                fi

                # Set model name based on sampling condition
                if [ "$FINETUNED_SAMPLE" = "True" ]; then
                    MODEL="ME5_base_sampled_pyz${p_y_z}_bs16_lr5e-05_e5"
                else
                    MODEL=$BASE_MODEL
                fi
                
                python eval_last_layer.py \
                    --dataset $DATASET \
                    --model_name $MODEL \
                    --embedding_type $EMBEDDING_TYPE \
                    --proj_method $proj_method \
                    --device cpu \
                    --seeds $seed \
                    --add_seed $ADD_SEED \
                    --sample_data $SAMPLE \
                    --p_y_z $p_y_z \
                    --lambda_val $lambda \
                    --save $SAVE \
                    --result_file_name $RESULT_FILE\
                    --standardize $STANDARDIZE\
                    --add_seed $ADD_SEED\
                    --causal_LEACE_variant balance \
                    $EXTRA_ARGS                
                echo "Completed run with P_Y_Z = $p_y_z, seed = $seed, projection method = $proj_method, lambda = $lambda"
                echo "---------------------------------"
            done
        done
    done
done

echo "Lambda sweep completed for all P_Y_Z values, seeds, projection methods, and lambda values."
echo "Results saved to: results/last_layer/$RESULT_FILE"


