#!/usr/bin/env python3
"""
Statistics generator - produces optimization hints and performance considerations.
Synthesized from all three agents' approaches to query optimization.
"""

import sqlite3
import json
from pathlib import Path

def generate_statistics(conn):
    """Generate statistics and optimization hints."""
    cursor = conn.cursor()
    
    # Get all tables with row counts
    cursor.execute("SELECT name FROM sqlite_master WHERE type='table' ORDER BY name")
    tables = [row[0] for row in cursor.fetchall()]
    
    table_stats = {}
    optimization_hints = {
        "large_tables": [],
        "indexed_columns": {},
        "common_aggregations": [],
        "efficient_filters": [],
        "join_optimization": []
    }
    
    for table in tables:
        # Get row count
        cursor.execute(f"SELECT COUNT(*) FROM {table}")
        row_count = cursor.fetchone()[0]
        
        # Get column info
        cursor.execute(f"PRAGMA table_info({table})")
        columns = cursor.fetchall()
        
        # Get indexes
        cursor.execute(f"PRAGMA index_list({table})")
        indexes = cursor.fetchall()
        
        indexed_cols = set()
        for idx in indexes:
            cursor.execute(f"PRAGMA index_info({idx[1]})")
            idx_cols = cursor.fetchall()
            for col in idx_cols:
                indexed_cols.add(col[2])
        
        table_stats[table] = {
            "row_count": row_count,
            "column_count": len(columns),
            "indexed_columns": list(indexed_cols),
            "has_primary_key": any(col[5] for col in columns)
        }
        
        # Generate optimization hints
        if row_count > 10000:
            optimization_hints["large_tables"].append({
                "table": table,
                "row_count": row_count,
                "hint": f"Filter {table} early in queries to reduce result set"
            })
        
        if indexed_cols:
            optimization_hints["indexed_columns"][table] = list(indexed_cols)
        
        # Detect common aggregation patterns
        for col in columns:
            col_name = col[1].lower()
            col_type = col[2].upper()
            
            if 'count' in col_name or 'total' in col_name or 'sum' in col_name:
                optimization_hints["common_aggregations"].append({
                    "table": table,
                    "column": col[1],
                    "suggested_function": "SUM" if 'sum' in col_name or 'total' in col_name else "COUNT"
                })
            
            # Identify efficient filter columns
            if col[5]:  # Primary key
                optimization_hints["efficient_filters"].append({
                    "table": table,
                    "column": col[1],
                    "reason": "Primary key - most selective filter"
                })
            elif col[1] in indexed_cols:
                optimization_hints["efficient_filters"].append({
                    "table": table,
                    "column": col[1],
                    "reason": "Indexed column - fast filtering"
                })
            elif col_name in ['year', 'date', 'status', 'type', 'category']:
                optimization_hints["efficient_filters"].append({
                    "table": table,
                    "column": col[1],
                    "reason": "Common filter column - likely selective"
                })
    
    # Analyze join optimization
    for table in tables:
        cursor.execute(f"PRAGMA foreign_key_list({table})")
        foreign_keys = cursor.fetchall()
        
        for fk in foreign_keys:
            from_table = table
            from_col = fk[3]
            to_table = fk[2]
            to_col = fk[4]
            
            # Check if both sides are indexed
            from_indexed = from_col in table_stats[from_table]["indexed_columns"]
            to_indexed = to_col in table_stats[to_table]["indexed_columns"] if to_table in table_stats else False
            
            if from_indexed and to_indexed:
                optimization_hints["join_optimization"].append({
                    "join": f"{from_table}.{from_col} → {to_table}.{to_col}",
                    "status": "Optimal - both sides indexed"
                })
            elif from_indexed or to_indexed:
                optimization_hints["join_optimization"].append({
                    "join": f"{from_table}.{from_col} → {to_table}.{to_col}",
                    "status": "Good - one side indexed"
                })
            else:
                optimization_hints["join_optimization"].append({
                    "join": f"{from_table}.{from_col} → {to_table}.{to_col}",
                    "status": "Consider indexing for better performance"
                })
    
    # Generate query pattern recommendations
    query_patterns = []
    
    # Pattern 1: Large table filtering
    for large_table in optimization_hints["large_tables"]:
        query_patterns.append({
            "pattern": f"Queries on {large_table['table']}",
            "recommendation": f"Always include WHERE clause to filter {large_table['table']} (has {large_table['row_count']} rows)"
        })
    
    # Pattern 2: Aggregation optimization
    aggregation_tables = set(agg["table"] for agg in optimization_hints["common_aggregations"])
    for table in aggregation_tables:
        query_patterns.append({
            "pattern": f"Aggregations on {table}",
            "recommendation": f"Use GROUP BY with aggregate functions on pre-calculated columns when available"
        })
    
    # Pattern 3: Join order optimization
    if optimization_hints["join_optimization"]:
        optimal_joins = [j for j in optimization_hints["join_optimization"] if "Optimal" in j["status"]]
        if optimal_joins:
            query_patterns.append({
                "pattern": "Multi-table joins",
                "recommendation": f"Start joins with indexed relationships like {optimal_joins[0]['join']}"
            })
    
    return {
        "table_statistics": table_stats,
        "optimization_hints": optimization_hints,
        "query_patterns": query_patterns
    }

def main():
    """Main entry point."""
    db_path = Path("./database.sqlite")
    
    if not db_path.exists():
        print(json.dumps({"error": "Database file not found"}))
        return
    
    try:
        conn = sqlite3.connect(str(db_path))
        results = generate_statistics(conn)
        conn.close()
        
        # Output as JSON
        print(json.dumps(results, indent=2))
        
    except Exception as e:
        print(json.dumps({"error": str(e)}))

if __name__ == "__main__":
    main()