#!/usr/bin/env python3
"""
Table attribution mapper - identifies which table owns which data attributes.
Critical for avoiding column confusion and improving SQL accuracy.
Cross-pollinated from column_strict_aggregate_3a's attribution focus.
"""

import sqlite3
import json
from pathlib import Path
from collections import defaultdict

def analyze_table_ownership(conn):
    """Analyze which tables own which attributes."""
    cursor = conn.cursor()
    
    # Get all tables
    cursor.execute("SELECT name FROM sqlite_master WHERE type='table' ORDER BY name")
    tables = [row[0] for row in cursor.fetchall()]
    
    attribution_map = {}
    common_mistakes = []
    
    # Analyze each table for ownership patterns
    table_profiles = {}
    for table in tables:
        # Get columns
        cursor.execute(f"PRAGMA table_info({table})")
        columns = cursor.fetchall()
        
        # Get row count
        cursor.execute(f"SELECT COUNT(*) FROM {table}")
        row_count = cursor.fetchone()[0]
        
        # Categorize columns by type and name patterns
        profile = {
            "table": table,
            "row_count": row_count,
            "columns": {},
            "likely_owns": [],
            "is_junction": False,
            "is_lookup": False,
            "is_detail": False,
            "is_aggregate": False
        }
        
        for col in columns:
            col_name = col[1].lower()
            col_type = col[2]
            
            profile["columns"][col[1]] = {
                "type": col_type,
                "nullable": not col[3],
                "primary_key": bool(col[5])
            }
            
            # Detect ownership patterns
            if "name" in col_name or "title" in col_name or "description" in col_name:
                profile["likely_owns"].append(f"{col[1]} (descriptive)")
            elif "total" in col_name or "sum" in col_name or "count" in col_name:
                profile["likely_owns"].append(f"{col[1]} (aggregate)")
                profile["is_aggregate"] = True
            elif "date" in col_name or "time" in col_name or "year" in col_name:
                profile["likely_owns"].append(f"{col[1]} (temporal)")
            elif col_name.endswith("_id") and not col[5]:  # Foreign key pattern
                profile["is_junction"] = True
        
        # Detect table type
        if len(columns) <= 3 and profile["is_junction"]:
            profile["table_type"] = "junction"
        elif row_count < 100 and not profile["is_junction"]:
            profile["table_type"] = "lookup"
            profile["is_lookup"] = True
        elif profile["is_aggregate"]:
            profile["table_type"] = "aggregate"
        elif "_" in table and any(word in table.lower() for word in ["detail", "line", "item"]):
            profile["table_type"] = "detail"
            profile["is_detail"] = True
        else:
            profile["table_type"] = "entity"
        
        table_profiles[table] = profile
    
    # Build attribution map based on common patterns
    entity_groups = defaultdict(list)
    
    for table, profile in table_profiles.items():
        # Group tables by entity type
        if profile["table_type"] == "entity":
            entity_name = table.rstrip('s')  # Simple pluralization removal
            entity_groups[entity_name].append(table)
        
        # Look for related tables
        for other_table in table_profiles:
            if other_table != table:
                if table in other_table or other_table in table:
                    # Related tables found
                    if profile["table_type"] == "entity" and table_profiles[other_table]["table_type"] in ["detail", "aggregate"]:
                        # Document ownership distinction
                        entity_groups[table].append(f"{other_table} (related)")
    
    # Generate attribution map
    for entity, related_tables in entity_groups.items():
        attributions = []
        
        for table in related_tables:
            if table in table_profiles:
                profile = table_profiles[table]
                if profile["table_type"] == "entity":
                    attributions.append(f"Core attributes → {table} table")
                elif profile["table_type"] == "aggregate":
                    attributions.append(f"Aggregated/summary data → {table} table")
                elif profile["table_type"] == "detail":
                    attributions.append(f"Detail/transaction records → {table} table")
                elif profile["table_type"] == "junction":
                    attributions.append(f"Relationships → {table} table")
        
        if attributions:
            attribution_map[entity.title()] = attributions
    
    # Detect common confusion patterns
    for table1, profile1 in table_profiles.items():
        for table2, profile2 in table_profiles.items():
            if table1 < table2:  # Avoid duplicates
                # Check for similar column names that might cause confusion
                cols1 = set(c.lower() for c in profile1["columns"].keys())
                cols2 = set(c.lower() for c in profile2["columns"].keys())
                common_cols = cols1.intersection(cols2)
                
                if len(common_cols) > 2:  # Significant overlap
                    # Check if they have different meanings
                    if profile1["table_type"] != profile2["table_type"]:
                        common_mistakes.append(
                            f"Confusing {table1}.{list(common_cols)[0]} with {table2}.{list(common_cols)[0]} - different contexts"
                        )
    
    # Add specific patterns based on common database designs
    for table, profile in table_profiles.items():
        # Check for stats tables
        if "stat" in table.lower() or "metric" in table.lower():
            entity = table.replace("_stats", "").replace("_metrics", "").replace("stats", "").replace("metrics", "")
            if entity:
                attribution_map[f"{entity.title()} Statistics"] = [
                    f"Performance metrics → {table} table",
                    f"NOT in main {entity} table (if exists)"
                ]
        
        # Check for history/archive tables
        if "history" in table.lower() or "archive" in table.lower() or "log" in table.lower():
            entity = table.replace("_history", "").replace("_archive", "").replace("_log", "")
            if entity:
                attribution_map[f"{entity.title()} History"] = [
                    f"Historical records → {table} table",
                    f"Current data → {entity} table (if exists)"
                ]
    
    return {
        "attribution_map": attribution_map,
        "common_mistakes": common_mistakes,
        "table_profiles": {
            table: {
                "type": profile["table_type"],
                "row_count": profile["row_count"],
                "likely_owns": profile["likely_owns"]
            }
            for table, profile in table_profiles.items()
        }
    }

def main():
    """Main entry point."""
    db_path = Path("./database.sqlite")
    
    if not db_path.exists():
        print(json.dumps({"error": "Database file not found"}))
        return
    
    try:
        conn = sqlite3.connect(str(db_path))
        results = analyze_table_ownership(conn)
        conn.close()
        
        # Output as JSON
        print(json.dumps(results, indent=2))
        
    except Exception as e:
        print(json.dumps({"error": str(e)}))

if __name__ == "__main__":
    main()