#!/usr/bin/env python3
"""
Complete Annoy Demo Script

This script demonstrates how to:
1. Build an Annoy index
2. Save it to disk
3. Load it and perform searches
4. Use different distance metrics
"""

import sys
import os
import random
import time

# Add current directory to Python path so we can import annoy
sys.path.insert(0, '/home/name/panorama/annoy')

from annoy import AnnoyIndex

def demo_basic_usage():
    """Basic usage example with angular distance (cosine similarity)"""
    print("=== Basic Usage Demo ===")
    
    # Create index for 40-dimensional vectors using angular distance
    f = 40  # vector dimension
    t = AnnoyIndex(f, 'angular')
    
    print(f"Creating index with {f} dimensions using angular distance...")
    
    # Add 1000 random vectors
    for i in range(1000):
        v = [random.gauss(0, 1) for z in range(f)]
        t.add_item(i, v)
    
    # Build the index with 10 trees
    print("Building index with 10 trees...")
    t.build(10)
    
    # Save to disk
    index_file = 'demo.ann'
    t.save(index_file)
    print(f"Index saved to {index_file}")
    
    # Load index from disk (this is very fast - just mmaps the file)
    u = AnnoyIndex(f, 'angular')
    u.load(index_file)
    print("Index loaded from disk")
    
    # Find nearest neighbors
    item_id = 0
    n_neighbors = 10
    neighbors = u.get_nns_by_item(item_id, n_neighbors)
    print(f"Top {n_neighbors} neighbors of item {item_id}: {neighbors}")
    
    # Search by vector
    query_vector = [random.gauss(0, 1) for _ in range(f)]
    neighbors_by_vector = u.get_nns_by_vector(query_vector, n_neighbors)
    print(f"Top {n_neighbors} neighbors of random vector: {neighbors_by_vector}")
    
    # Get distances as well
    neighbors_with_distances = u.get_nns_by_item(item_id, n_neighbors, include_distances=True)
    print(f"Neighbors with distances: {neighbors_with_distances}")
    
    # Clean up
    os.remove(index_file)
    print()

def demo_different_metrics():
    """Demonstrate different distance metrics"""
    print("=== Different Distance Metrics Demo ===")
    
    # Sample 3D vectors for easy visualization
    vectors = [
        [1, 0, 0],
        [0, 1, 0], 
        [0, 0, 1],
        [1, 1, 0],
        [0.5, 0.5, 0.5]
    ]
    
    metrics = ['angular', 'euclidean', 'manhattan', 'dot']
    
    for metric in metrics:
        print(f"\n--- Using {metric} distance ---")
        
        # Create index
        index = AnnoyIndex(3, metric)
        
        # Add vectors
        for i, vector in enumerate(vectors):
            index.add_item(i, vector)
        
        # Build index
        index.build(10)
        
        # Find neighbors of vector [1, 0, 0]
        neighbors = index.get_nns_by_item(0, 3, include_distances=True)
        print(f"Neighbors of [1,0,0]: items {neighbors[0]} with distances {neighbors[1]}")

def demo_performance_tuning():
    """Demonstrate performance tuning parameters"""
    print("=== Performance Tuning Demo ===")
    
    f = 100
    n_items = 10000
    
    print(f"Creating index with {n_items} items of {f} dimensions...")
    
    # Create larger dataset
    index = AnnoyIndex(f, 'angular')
    for i in range(n_items):
        v = [random.gauss(0, 1) for _ in range(f)]
        index.add_item(i, v)
    
    # Build with different number of trees
    for n_trees in [10, 50, 100]:
        print(f"\nBuilding with {n_trees} trees...")
        
        # Rebuild index
        temp_index = AnnoyIndex(f, 'angular')
        for i in range(min(1000, n_items)):  # Use smaller subset for demo
            v = [random.gauss(0, 1) for _ in range(f)]
            temp_index.add_item(i, v)
        
        start_time = time.time()
        temp_index.build(n_trees)
        build_time = time.time() - start_time
        
        print(f"Build time: {build_time:.2f} seconds")
        
        # Test search performance with different search_k values
        query_vector = [random.gauss(0, 1) for _ in range(f)]
        
        for search_k in [100, 1000, 5000]:
            start_time = time.time()
            neighbors = temp_index.get_nns_by_vector(query_vector, 10, search_k=search_k)
            search_time = time.time() - start_time
            print(f"  Search time (search_k={search_k}): {search_time*1000:.2f}ms")

def demo_persistent_index():
    """Demonstrate saving and loading indexes for production use"""
    print("=== Persistent Index Demo ===")
    
    index_file = 'production.ann'
    f = 50
    
    # Check if index already exists
    if os.path.exists(index_file):
        print(f"Loading existing index from {index_file}...")
        index = AnnoyIndex(f, 'angular')
        index.load(index_file)
        print(f"Loaded index with {index.get_n_items()} items and {index.get_n_trees()} trees")
    else:
        print(f"Creating new index and saving to {index_file}...")
        index = AnnoyIndex(f, 'angular')
        
        # Add items
        for i in range(5000):
            v = [random.gauss(0, 1) for _ in range(f)]
            index.add_item(i, v)
        
        # Build and save
        index.build(50)
        index.save(index_file)
        print(f"Created and saved index with {index.get_n_items()} items")
    
    # Use the index
    query_vector = [random.gauss(0, 1) for _ in range(f)]
    neighbors = index.get_nns_by_vector(query_vector, 5, include_distances=True)
    print(f"Query results: {neighbors}")
    
    # Clean up
    if os.path.exists(index_file):
        os.remove(index_file)

def main():
    """Run all demos"""
    print("Annoy Library Demo")
    print("=" * 50)
    
    # Set random seed for reproducible results
    random.seed(42)
    
    try:
        demo_basic_usage()
        demo_different_metrics()
        demo_performance_tuning()
        demo_persistent_index()
        
        print("\n" + "=" * 50)
        print("All demos completed successfully!")
        print("\nKey takeaways:")
        print("1. Use 'angular' for cosine similarity, 'euclidean' for L2 distance")
        print("2. More trees = better accuracy but larger index and slower build")
        print("3. Higher search_k = better accuracy but slower queries")
        print("4. Indexes can be saved/loaded for production use")
        print("5. Building is separate from querying - build once, query many times")
        
    except Exception as e:
        print(f"Error running demo: {e}")
        return 1
    
    return 0

if __name__ == "__main__":
    exit(main())
