#!/bin/bash

# BHRA Merge and Evaluation Script
# Uses the new block_hadamard_hira.py and merge_save_block_hadamard_hira.py implementation

# Configuration variables
MODEL="meta-llama/Llama-3.2-1B"
GPU_ID=0  # Specify which GPU to use

# BHRA trained model directories to process
RUN_DIRS=(
    # Add your BHRA experiment directories here
    # Example:
    # "experiments/block_hadamard_hira_commonsense_reasoning/Llama-3.2-1B/20250101_120000_rank_32_blocks4_lr0.001_square"
    
)

# Process each run directory 
for RUN_DIR in "${RUN_DIRS[@]}"; do
    echo "=== Processing: $RUN_DIR ==="
    echo "=== BHRA Model Evaluation Script ==="
    echo "=== Processing: $RUN_DIR ==="

    # Extract method from the directory structure (following project standard)
    METHOD="Block_Hadamard_HiRA"
    echo "Method: $METHOD"

    # Get the adapter and merged model paths (following project standard)
    FINAL_MODEL_PATH="$RUN_DIR/final_model"
    ADAPTER_PATH="$RUN_DIR/block_hadamard_hira_adapter"
    MERGED_MODEL_PATH="$RUN_DIR/merged_model"

    echo "=== Starting BHRA Merging ==="
    if [ ! -d "$ADAPTER_PATH" ]; then
        echo "Error: Adapter not found at $ADAPTER_PATH"
        exit 1
    fi

    # Check if merged model already exists
    if [ -d "$MERGED_MODEL_PATH" ]; then
        echo "Merged model already exists at $MERGED_MODEL_PATH, skipping merge step"
    else
        echo "Merged model not found, proceeding with BHRA merge"
        
        CUDA_VISIBLE_DEVICES=$GPU_ID python merge_save_block_hadamard_hira.py \
            --base_model "$MODEL" \
            --adapter_path "$ADAPTER_PATH" \
            --output_path "$MERGED_MODEL_PATH"
        
        if [ $? -ne 0 ]; then
            echo "Error: BHRA merge failed"
            exit 1
        fi
    fi

    echo "=== Starting Evaluation ==="
    # Check if merged model exists
    if [ ! -d "$MERGED_MODEL_PATH" ]; then
        echo "Error: Merged model not found at $MERGED_MODEL_PATH"
        exit 1
    fi

    declare -a datasets=(
        "ARC-Challenge"
        "ARC-Easy"
        "boolq"
        "hellaswag"
        "openbookqa"
        "piqa"
        "social_i_qa"
        "winogrande"
    )

    # Loop through datasets and evaluate
    for dataset in "${datasets[@]}"; do
        echo "=== Evaluating BHRA model on $dataset ==="
        
        # Export wandb offline mode before running evaluation
        # export WANDB_MODE=offline
        
        CUDA_VISIBLE_DEVICES=$GPU_ID python instruction_tuning_eval/commonsense_eval.py \
            --model "$MERGED_MODEL_PATH" \
            --dataset "$dataset" \
            --data_file "data/commonsense/$dataset/test.json" \
            --batch_size 128 \
            --tensor_parallel_size 1 \
            --run_dir "$RUN_DIR"
        
        if [ $? -ne 0 ]; then
            echo "Warning: Evaluation failed for dataset $dataset"
        else
            echo "✅ Successfully evaluated on $dataset"
        fi
    done

    # Clean up merged model directory
    echo "=== Cleaning up merged model ==="
    if [ -d "$MERGED_MODEL_PATH" ]; then
        echo "Removing merged model directory: $MERGED_MODEL_PATH"
        rm -rf "$MERGED_MODEL_PATH"
        if [ $? -eq 0 ]; then
            echo "Successfully removed merged model directory"
        else
            echo "Warning: Failed to remove merged model directory"
        fi
    else
        echo "Merged model directory not found - nothing to clean up"
    fi

    echo "=== Processing Complete for BHRA Model ==="

    # Save run information
    echo "Saving run information..."
    cat << EOF > "$RUN_DIR/run_info.txt"

BHRA Run processed at: $(date)
Method: $METHOD (BHRA)
Base model: $MODEL
Model directory: $RUN_DIR
Model type: BHRA (Block-wise Hadamard product adaptation)
Configuration: BHRA with block-wise adaptation

Description:
This model uses BHRA (Block-wise Hadamard product high-rank adaptation)
using the formula: ΔW^{(i,j)} = W_0^{(i,j)} ⊙ (A^{(i,j)} · B^{(i,j)})
- Block adaptation: Divides weight matrix into blocks for fine-grained control
- Hadamard product: Element-wise multiplication for efficient adaptation
- Independent blocks: Each block adapts independently with its own low-rank matrices

Parameters:
- r = rank per block (BHRA rank)
- num_blocks = number of blocks per dimension
- block_arrangement = block arrangement pattern (square/linear)
- alpha = scaling factor

echo "=== BHRA Evaluation Complete ==="
EOF

done