#!/usr/bin/env python3
"""
Schema Validator - Generates exact column-to-table mappings to prevent "no such column" errors.
This is the PRIMARY tool for eliminating the #1 cause of SQL failures.
"""

import sqlite3
import json
import os

def validate_schema():
    """Generate comprehensive schema documentation with exact column locations."""

    conn = sqlite3.connect("database.sqlite")
    cursor = conn.cursor()

    # Create output directory
    os.makedirs("tool_output", exist_ok=True)

    schema_doc = {
        "tables": {},
        "column_to_table": {},
        "foreign_keys": {},
        "primary_keys": {},
        "junction_tables": []
    }

    try:
        # Get all tables
        cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
        tables = [row[0] for row in cursor.fetchall()]

        for table in tables:
            # Get detailed column information
            cursor.execute(f"PRAGMA table_info({table})")
            columns = cursor.fetchall()

            # Store table info
            schema_doc["tables"][table] = {
                "columns": [],
                "column_types": {},
                "nullable": {},
                "primary_key": []
            }

            for col in columns:
                col_name = col[1]
                col_type = col[2]
                not_null = col[3]
                is_pk = col[5]

                # Add to table columns
                schema_doc["tables"][table]["columns"].append(col_name)
                schema_doc["tables"][table]["column_types"][col_name] = col_type
                schema_doc["tables"][table]["nullable"][col_name] = not not_null

                # Track primary keys
                if is_pk:
                    schema_doc["tables"][table]["primary_key"].append(col_name)
                    if table not in schema_doc["primary_keys"]:
                        schema_doc["primary_keys"][table] = []
                    schema_doc["primary_keys"][table].append(col_name)

                # Create reverse mapping: column -> table
                col_lower = col_name.lower()
                if col_lower not in schema_doc["column_to_table"]:
                    schema_doc["column_to_table"][col_lower] = []
                schema_doc["column_to_table"][col_lower].append({
                    "table": table,
                    "column": col_name,
                    "type": col_type,
                    "is_primary_key": bool(is_pk)
                })

            # Get foreign keys
            cursor.execute(f"PRAGMA foreign_key_list({table})")
            fks = cursor.fetchall()

            for fk in fks:
                from_col = fk[3]
                to_table = fk[2]
                to_col = fk[4]

                if table not in schema_doc["foreign_keys"]:
                    schema_doc["foreign_keys"][table] = []

                schema_doc["foreign_keys"][table].append({
                    "from_column": from_col,
                    "to_table": to_table,
                    "to_column": to_col
                })

        # Detect junction tables (tables with 2+ foreign keys)
        for table, fks in schema_doc["foreign_keys"].items():
            if len(fks) >= 2:
                # Check if it's primarily a linking table
                table_cols = len(schema_doc["tables"][table]["columns"])
                fk_cols = len(fks)

                # If most columns are foreign keys, it's likely a junction table
                if fk_cols >= table_cols * 0.4:
                    schema_doc["junction_tables"].append({
                        "table": table,
                        "connects": [fk["to_table"] for fk in fks],
                        "foreign_keys": fks
                    })

        # Generate human-readable documentation
        output = []
        output.append("# SCHEMA VALIDATION REPORT")
        output.append("# Exact Column-to-Table Mappings")
        output.append("")

        # Section 1: Column Location Quick Reference
        output.append("## COLUMN LOCATION REFERENCE")
        output.append("Use this to find exactly which table contains each column:")
        output.append("")

        # Group columns by name for easy lookup
        for col_name in sorted(schema_doc["column_to_table"].keys()):
            locations = schema_doc["column_to_table"][col_name]
            if len(locations) == 1:
                loc = locations[0]
                output.append(f"- **{col_name}**: {loc['table']}.{loc['column']} ({loc['type']})")
            else:
                output.append(f"- **{col_name}**: APPEARS IN MULTIPLE TABLES")
                for loc in locations:
                    pk_marker = " [PK]" if loc['is_primary_key'] else ""
                    output.append(f"  - {loc['table']}.{loc['column']} ({loc['type']}){pk_marker}")

        output.append("")
        output.append("## TABLE SCHEMAS")
        output.append("")

        for table in sorted(schema_doc["tables"].keys()):
            info = schema_doc["tables"][table]
            output.append(f"### Table: {table}")

            # Primary keys
            if info["primary_key"]:
                output.append(f"Primary Key: {', '.join(info['primary_key'])}")

            # Columns
            output.append("Columns:")
            for col in info["columns"]:
                col_type = info["column_types"][col]
                nullable = " (nullable)" if info["nullable"][col] else ""
                pk = " [PK]" if col in info["primary_key"] else ""
                output.append(f"  - {col}: {col_type}{nullable}{pk}")

            # Foreign keys
            if table in schema_doc["foreign_keys"]:
                output.append("Foreign Keys:")
                for fk in schema_doc["foreign_keys"][table]:
                    output.append(f"  - {fk['from_column']} → {fk['to_table']}.{fk['to_column']}")

            output.append("")

        # Section 3: Junction Tables
        if schema_doc["junction_tables"]:
            output.append("## JUNCTION TABLES (Many-to-Many Relationships)")
            for jt in schema_doc["junction_tables"]:
                output.append(f"- **{jt['table']}**: connects {' ↔ '.join(jt['connects'])}")
                for fk in jt['foreign_keys']:
                    output.append(f"  - {fk['from_column']} → {fk['to_table']}.{fk['to_column']}")
            output.append("")

        # Section 4: Common Column Name Conflicts
        output.append("## IMPORTANT: Column Name Conflicts")
        output.append("These columns appear in multiple tables - BE SPECIFIC:")
        output.append("")

        for col_name, locations in schema_doc["column_to_table"].items():
            if len(locations) > 1:
                output.append(f"**{col_name}** appears in:")
                for loc in locations:
                    output.append(f"  - {loc['table']} (as {loc['column']})")
                output.append("")

        # Write outputs
        with open("tool_output/schema_validation.txt", "w") as f:
            f.write("\n".join(output))

        with open("tool_output/schema_validation.json", "w") as f:
            json.dump(schema_doc, f, indent=2)

        print("Schema validation complete - see tool_output/schema_validation.txt")

    finally:
        conn.close()

if __name__ == "__main__":
    validate_schema()