#!/usr/bin/env python3
"""
Extracts complete, accurate schema documentation with column purposes.
Focuses on exact column names and their meanings.
"""

import sqlite3
import json
import os

def document_schema(db_path="database.sqlite"):
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()

    documentation = {
        "tables": {},
        "column_purposes": {},
        "data_types": {},
        "primary_keys": {},
        "foreign_keys": {}
    }

    try:
        # Get all tables
        cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
        tables = [row[0] for row in cursor.fetchall()]

        for table in tables:
            # Get detailed table info
            cursor.execute(f"PRAGMA table_info({table})")
            columns = cursor.fetchall()

            # Get row count
            cursor.execute(f"SELECT COUNT(*) FROM {table}")
            row_count = cursor.fetchone()[0]

            documentation["tables"][table] = {
                "row_count": row_count,
                "columns": [],
                "column_details": {}
            }

            # Document each column
            for col in columns:
                col_name = col[1]
                col_type = col[2]
                not_null = col[3]
                default_val = col[4]
                is_pk = col[5]

                documentation["tables"][table]["columns"].append(col_name)
                documentation["tables"][table]["column_details"][col_name] = {
                    "type": col_type,
                    "nullable": not not_null,
                    "default": default_val,
                    "is_primary_key": is_pk > 0
                }

                # Store primary key info
                if is_pk > 0:
                    if table not in documentation["primary_keys"]:
                        documentation["primary_keys"][table] = []
                    documentation["primary_keys"][table].append(col_name)

                # Infer column purpose from name
                purpose = infer_column_purpose(col_name, col_type, table)
                if table not in documentation["column_purposes"]:
                    documentation["column_purposes"][table] = {}
                documentation["column_purposes"][table][col_name] = purpose

                # Store data type info
                if table not in documentation["data_types"]:
                    documentation["data_types"][table] = {}
                documentation["data_types"][table][col_name] = col_type

            # Get foreign keys
            cursor.execute(f"PRAGMA foreign_key_list({table})")
            fks = cursor.fetchall()

            if fks:
                documentation["foreign_keys"][table] = []
                for fk in fks:
                    documentation["foreign_keys"][table].append({
                        "from": fk[3],
                        "to_table": fk[2],
                        "to_column": fk[4]
                    })

        # Save documentation
        os.makedirs("tool_output", exist_ok=True)

        with open("tool_output/schema_documentation.json", "w") as f:
            json.dump(documentation, f, indent=2)

        # Generate human-readable summary
        generate_schema_summary(documentation, cursor)

        print("Schema documentation complete - results in tool_output/")

    except Exception as e:
        print(f"Error documenting schema: {e}")
        documentation["error"] = str(e)
        with open("tool_output/schema_documentation.json", "w") as f:
            json.dump(documentation, f, indent=2)

    finally:
        conn.close()

def infer_column_purpose(col_name, col_type, table):
    """Infer the purpose of a column from its name and type."""

    col_lower = col_name.lower()

    # ID columns
    if col_lower.endswith('_id') or col_lower == 'id':
        if 'user' in col_lower:
            return "User identifier"
        elif 'product' in col_lower:
            return "Product identifier"
        elif 'order' in col_lower:
            return "Order identifier"
        else:
            return f"Unique identifier for {table}"

    # Geographic columns
    if col_lower in ['latitude', 'lat']:
        return "Geographic latitude coordinate"
    elif col_lower in ['longitude', 'lon', 'lng']:
        return "Geographic longitude coordinate"
    elif col_lower in ['city', 'town']:
        return "City or town name"
    elif col_lower in ['state', 'province']:
        return "State or province"
    elif col_lower in ['country']:
        return "Country name or code"
    elif col_lower in ['zip', 'zip_code', 'postal_code']:
        return "Postal/ZIP code"
    elif col_lower in ['address', 'street']:
        return "Street address"

    # Demographic columns
    elif 'population' in col_lower:
        return "Population count"
    elif 'household' in col_lower:
        return "Household-related data"
    elif 'income' in col_lower:
        return "Income information"
    elif 'age' in col_lower:
        return "Age or age-related data"

    # Name columns
    elif col_lower in ['name', 'title']:
        return f"Name or title"
    elif col_lower in ['first_name', 'firstname']:
        return "Person's first name"
    elif col_lower in ['last_name', 'lastname', 'surname']:
        return "Person's last name"

    # Date/Time columns
    elif 'date' in col_lower or 'time' in col_lower:
        if 'create' in col_lower:
            return "Creation date/time"
        elif 'update' in col_lower or 'modify' in col_lower:
            return "Last update date/time"
        else:
            return "Date/time value"

    # Numeric columns
    elif 'count' in col_lower or 'total' in col_lower:
        return "Count or total"
    elif 'price' in col_lower or 'cost' in col_lower:
        return "Price or cost amount"
    elif 'amount' in col_lower or 'value' in col_lower:
        return "Numeric amount or value"
    elif 'percentage' in col_lower or 'percent' in col_lower or 'pct' in col_lower:
        return "Percentage value"

    # Status/Type columns
    elif 'status' in col_lower:
        return "Status indicator"
    elif 'type' in col_lower or 'category' in col_lower:
        return "Type or category classification"
    elif 'flag' in col_lower or col_lower.startswith('is_'):
        return "Boolean flag/indicator"

    # Description columns
    elif col_lower in ['description', 'desc', 'notes', 'comments']:
        return "Descriptive text"

    # Default based on type
    elif 'INT' in col_type.upper():
        return "Numeric value"
    elif 'TEXT' in col_type.upper() or 'CHAR' in col_type.upper():
        return "Text value"
    elif 'REAL' in col_type.upper() or 'FLOAT' in col_type.upper():
        return "Decimal number"
    elif 'BLOB' in col_type.upper():
        return "Binary data"
    else:
        return "Data value"

def generate_schema_summary(documentation, cursor):
    """Generate a human-readable schema summary."""

    summary = []
    summary.append("# DATABASE SCHEMA DOCUMENTATION\n\n")

    # Overview
    summary.append("## Overview\n")
    summary.append(f"Total tables: {len(documentation['tables'])}\n\n")

    # Tables with details
    summary.append("## Table Details\n\n")

    for table, info in documentation["tables"].items():
        summary.append(f"### {table}\n")
        summary.append(f"- **Rows**: {info['row_count']:,}\n")
        summary.append(f"- **Columns**: {len(info['columns'])}\n")

        # Primary key
        if table in documentation["primary_keys"]:
            summary.append(f"- **Primary Key**: {', '.join(documentation['primary_keys'][table])}\n")

        # Foreign keys
        if table in documentation["foreign_keys"]:
            summary.append("- **Foreign Keys**:\n")
            for fk in documentation["foreign_keys"][table]:
                summary.append(f"  - {fk['from']} → {fk['to_table']}.{fk['to_column']}\n")

        summary.append("\n**Columns**:\n")

        # List columns with their purposes
        for col in info["columns"]:
            col_detail = info["column_details"][col]
            purpose = documentation["column_purposes"][table][col]

            summary.append(f"- `{col}` ({col_detail['type']}): {purpose}")

            if col_detail["is_primary_key"]:
                summary.append(" [PK]")
            if col_detail["nullable"]:
                summary.append(" [nullable]")

            summary.append("\n")

        summary.append("\n")

    # Relationships
    if documentation["foreign_keys"]:
        summary.append("## Table Relationships\n\n")
        for table, fks in documentation["foreign_keys"].items():
            for fk in fks:
                summary.append(f"- {table}.{fk['from']} → {fk['to_table']}.{fk['to_column']}\n")
        summary.append("\n")

    with open("tool_output/schema_summary.txt", "w") as f:
        f.writelines(summary)

if __name__ == "__main__":
    document_schema()