#!/usr/bin/env python3
"""
Schema Validator - Enhanced from iter10 and iter8
Provides hierarchical validation with comprehensive column mapping
"""

import sqlite3
import os

def validate_schema():
    """Validate schema with complete column location mapping."""
    
    try:
        conn = sqlite3.connect("database.sqlite")
        cursor = conn.cursor()
        
        output = []
        output.append("# SCHEMA VALIDATION")
        output.append("(Hierarchical column reference - prevents 'no such column' errors)")
        output.append("")
        
        # Get all tables
        cursor.execute("SELECT name FROM sqlite_master WHERE type='table' ORDER BY name")
        tables = [row[0] for row in cursor.fetchall()]
        
        output.append(f"## Database Overview: {len(tables)} tables")
        output.append("")
        
        # Build column location index
        column_index = {}
        
        for table in tables:
            table_escaped = f"`{table}`" if any(c in table for c in [' ', '-', '.']) else table
            
            output.append(f"### TABLE: {table}")
            
            try:
                # Get row count for context
                cursor.execute(f"SELECT COUNT(*) FROM {table_escaped}")
                row_count = cursor.fetchone()[0]
                output.append(f"Rows: {row_count}")
                
                # Get columns with full details
                cursor.execute(f"PRAGMA table_info({table_escaped})")
                columns = cursor.fetchall()
                
                output.append("")
                output.append("**Column Reference:**")
                
                for col in columns:
                    col_id, col_name, col_type, not_null, default, is_pk = col
                    
                    # Build location reference
                    location = f"{table}.{col_name}"
                    column_index[col_name.lower()] = location
                    
                    # Build description
                    desc = f"  {location}"
                    desc += f" [{col_type or 'UNKNOWN'}]"
                    
                    # Add constraints
                    constraints = []
                    if is_pk:
                        constraints.append("PRIMARY KEY")
                    if not_null:
                        constraints.append("NOT NULL")
                    if default is not None:
                        constraints.append(f"DEFAULT={default}")
                    
                    if constraints:
                        desc += f" ({', '.join(constraints)})"
                    
                    output.append(desc)
                
                # Get foreign keys
                cursor.execute(f"PRAGMA foreign_key_list({table_escaped})")
                fks = cursor.fetchall()
                
                if fks:
                    output.append("")
                    output.append("**Foreign Keys:**")
                    for fk in fks:
                        output.append(f"  {table}.{fk[3]} → {fk[2]}.{fk[4]}")
                
                # Get indexes for performance hints
                cursor.execute(f"PRAGMA index_list({table_escaped})")
                indexes = cursor.fetchall()
                
                indexed_cols = set()
                for idx in indexes:
                    cursor.execute(f"PRAGMA index_info({idx[1]})")
                    idx_cols = cursor.fetchall()
                    for ic in idx_cols:
                        indexed_cols.add(ic[2])
                
                if indexed_cols:
                    output.append("")
                    output.append(f"**Indexed Columns:** {', '.join(indexed_cols)}")
                
                output.append("")
                
            except Exception as e:
                output.append(f"  Error reading table: {e}")
                output.append("")
        
        # Add column search index
        output.append("## COLUMN LOCATION INDEX")
        output.append("(Quick lookup - find where each column exists)")
        output.append("")
        
        # Group columns by similarity
        grouped = {}
        for col_lower, location in column_index.items():
            # Extract key parts of column name
            for keyword in ['id', 'name', 'date', 'time', 'amount', 'count', 'type', 'status', 'description']:
                if keyword in col_lower:
                    if keyword not in grouped:
                        grouped[keyword] = []
                    grouped[keyword].append(location)
                    break
        
        for keyword in sorted(grouped.keys()):
            output.append(f"**Columns with '{keyword}':**")
            for location in sorted(grouped[keyword])[:15]:  # Limit to 15
                output.append(f"  {location}")
            if len(grouped[keyword]) > 15:
                output.append(f"  ... and {len(grouped[keyword]) - 15} more")
            output.append("")
        
        # Add relationship summary
        output.append("## RELATIONSHIP MAP")
        output.append("")
        
        relationships = []
        for table in tables:
            table_escaped = f"`{table}`" if any(c in table for c in [' ', '-', '.']) else table
            try:
                cursor.execute(f"PRAGMA foreign_key_list({table_escaped})")
                fks = cursor.fetchall()
                for fk in fks:
                    relationships.append((table, fk[3], fk[2], fk[4]))
            except:
                pass
        
        if relationships:
            output.append("**Explicit Foreign Keys:**")
            for from_table, from_col, to_table, to_col in relationships:
                output.append(f"  {from_table}.{from_col} → {to_table}.{to_col}")
        else:
            output.append("No explicit foreign keys defined.")
        
        output.append("")
        
        conn.close()
        
        # Write output
        os.makedirs("tool_output", exist_ok=True)
        with open("tool_output/schema_validator_output.txt", "w", encoding='utf-8') as f:
            f.write("\n".join(output))
        
        print(f"Schema validation complete - {len(tables)} tables, {len(column_index)} columns indexed")
        
    except Exception as e:
        # Fallback output
        error_output = [
            "# SCHEMA VALIDATION",
            "",
            "ERROR: Could not validate schema",
            f"Reason: {e}",
            "",
            "Please ensure database.sqlite exists and is readable."
        ]
        
        os.makedirs("tool_output", exist_ok=True)
        with open("tool_output/schema_validator_output.txt", "w") as f:
            f.write("\n".join(error_output))
        
        print(f"Schema validation failed: {e}")
        raise

if __name__ == "__main__":
    validate_schema()