#!/usr/bin/env python3
"""
Simple example of the density-based adaptive k approach.

This shows how a single density metric is used to determine the optimal k
for k-NN calculations across different embedding types.
"""

import numpy as np
from sklearn.datasets import make_blobs
from sklearn.preprocessing import StandardScaler

def create_sample_embeddings():
    """Create sample embeddings with different density characteristics."""
    
    # Dense, well-clustered embeddings (like DINO features)
    dense_embeddings, _ = make_blobs(n_samples=500, centers=5, cluster_std=0.5, 
                                   n_features=128, random_state=42)
    dense_embeddings = StandardScaler().fit_transform(dense_embeddings)
    
    # Sparse, spread-out embeddings (like some model-based features)
    sparse_embeddings, _ = make_blobs(n_samples=500, centers=10, cluster_std=2.0, 
                                    n_features=256, random_state=42)
    sparse_embeddings = StandardScaler().fit_transform(sparse_embeddings)
    
    return dense_embeddings, sparse_embeddings

def test_density_metric():
    """Test the single density metric on different embedding types."""
    
    from SelecetionStrategy import calculate_embedding_density_metric, get_adaptive_k_for_embedding
    
    # Create sample embeddings
    dense_embeddings, sparse_embeddings = create_sample_embeddings()
    
    print("=== Single Density Metric Approach ===\n")
    
    # Test on dense embeddings
    print("1. Dense, well-clustered embeddings:")
    dense_metric = calculate_embedding_density_metric(dense_embeddings, max_size=50)
    dense_k = get_adaptive_k_for_embedding(dense_embeddings, max_size=50, base_k=5)
    print(f"   Density metric: {dense_metric:.3f}")
    print(f"   Adaptive k: {dense_k}")
    print()
    
    # Test on sparse embeddings
    print("2. Sparse, spread-out embeddings:")
    sparse_metric = calculate_embedding_density_metric(sparse_embeddings, max_size=50)
    sparse_k = get_adaptive_k_for_embedding(sparse_embeddings, max_size=50, base_k=5)
    print(f"   Density metric: {sparse_metric:.3f}")
    print(f"   Adaptive k: {sparse_k}")
    print()
    
    print("=== How it works ===")
    print("- Single metric: coefficient of variation of local densities")
    print("- Higher metric = more dense space = SMALLER k (smaller delta)")
    print("- Lower metric = more sparse space = LARGER k (larger delta)")
    print("- Reason: In dense areas, ProbCover maintains diverse selection")
    print("  so we want smaller balls to avoid over-selecting from same neighborhood")
    print("- Same approach works for all embedding types (DINO, SimCLR, model-based, etc.)")
    print()

if __name__ == "__main__":
    test_density_metric()
