#!/bin/bash

# GLMP Pipeline Runner

set -e  # Exit on any error

echo "=========================================="
echo "GLMP Pipeline Runner"
echo "=========================================="

# Prepare Sample Data
echo "Preparing sample data..."
folder="TXG_0001879_07142022"
source_url="https://cf.10xgenomics.com/samples/xenium/1.0.1/Xenium_FFPE_Human_Breast_Cancer_Rep1/Xenium_FFPE_Human_Breast_Cancer_Rep1_he_image.tif"
target_path="Samples/${folder}/data/histology.tif"

mkdir -p "$(dirname "$target_path")"
wget --quiet --show-progress "$source_url" -O "$target_path"

# Add Pixel Size (MPP) Information
echo "Adding pixel size information..."
MPP="0.3638" # Provide the MPP value for the WSI
echo "$MPP" > "Samples/${folder}/data/pixel-size.txt"

# Tissue patch grouping
echo "Grouping tissue patches..."
MIN_FOREGROUND_PERCENTAGE="0.001"

python detect_foreground.py \
    --sample_dir "Samples/${folder}" \
    --min_foreground_percentage "$MIN_FOREGROUND_PERCENTAGE"

python visualize_foreground.py \
    --sample_dir "Samples/${folder}" \
    --overlay_color "255,0,0" \
    --overlay_alpha 100 \
    --output_name "custom_visualization.jpg"

HUGGING_FACE_HUB_TOKEN="${HF_TOKEN:-}"
NUM_CLUSTERS=10
NUM_SAMPLES_PER_CLUSTER=25
MODEL_NAME="USERNAME/MODELNAME"

python extract_and_cluster.py \
       --sample_dir "Samples/${folder}" \
       --login_token "$HUGGING_FACE_HUB_TOKEN" \
       --model_name "$MODEL_NAME" \
       --k "$NUM_CLUSTERS" \
       --n_patches "$NUM_SAMPLES_PER_CLUSTER"

# Generate Biological Descriptions with MLLM
echo "Generating biological descriptions with MLLM..."
# Set the name of the analysis, which should match the prefix of the clustering results folder.
ANALYSIS_NAME="mymethod"
# Set the number of times to repeat the MLLM analysis.
NUM_RUNS=10

# Check which authentication method to use
if [ -n "$GOOGLE_API_KEY" ]; then
    echo "Using Google GenAI authentication..."
    python run_mllm.py \
        --sample_dir "Samples/${folder}" \
        --analysis_name "$ANALYSIS_NAME" \
        --prompt_txt_path "prompt.txt" \
        --num_runs "$NUM_RUNS" \
        --api_provider "google-genai"
elif [ -n "$GOOGLE_APPLICATION_CREDENTIALS" ]; then
    echo "Using Vertex AI authentication..."
    if [ -z "$PROJECT_ID" ] || [ -z "$LOCATION" ]; then
        echo "WARNING: For Vertex AI, you may need to set PROJECT_ID and LOCATION environment variables"
    fi
    python run_mllm.py \
        --sample_dir "Samples/${folder}" \
        --analysis_name "$ANALYSIS_NAME" \
        --prompt_txt_path "prompt.txt" \
        --num_runs "$NUM_RUNS" \
        --api_provider "vertex-ai"
else
    echo "ERROR: Please set either GOOGLE_API_KEY or GOOGLE_APPLICATION_CREDENTIALS"
    echo "For Google GenAI: export GOOGLE_API_KEY='your_api_key'"
    echo "For Vertex AI: export GOOGLE_APPLICATION_CREDENTIALS='path/to/credentials.json'"
    exit 1
fi

# Generate Weighted MLLM Embeddings
echo "Generating weighted MLLM embeddings..."

if [ -n "$GOOGLE_API_KEY" ]; then
    echo "Using Google GenAI for embeddings..."
    echo python generate_mllm_embeddings.py \
        --sample_dir "Samples/${folder}" \
        --analysis_name "$ANALYSIS_NAME" \
        --api_provider "google-genai"
elif [ -n "$GOOGLE_APPLICATION_CREDENTIALS" ]; then
    echo "Using Vertex AI for embeddings..."
    if [ -z "$PROJECT_ID" ] || [ -z "$LOCATION" ]; then
        echo "ERROR: For Vertex AI embeddings, PROJECT_ID and LOCATION are required"
        echo "Set them like: export PROJECT_ID='your-project' LOCATION='us-central1'"
        exit 1
    fi
    python generate_mllm_embeddings.py \
        --sample_dir "Samples/${folder}" \
        --analysis_name "$ANALYSIS_NAME" \
        --api_provider "vertex-ai" \
        --project_id "$PROJECT_ID" \
        --location "$LOCATION"
fi

echo "=========================================="
echo "Pipeline completed successfully!"
echo "=========================================="
