#!/usr/bin/env python3
"""
Attribution Mapper - Maps which table owns which data with high precision
Combines iter2's ownership focus with iter9's exact naming precision
"""

import sqlite3
import os
import json
from collections import defaultdict

def map_attribution():
    """Create a precise map of data ownership across tables."""
    
    db_path = './database.sqlite'
    if not os.path.exists(db_path):
        print("ERROR: database.sqlite not found")
        return
    
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    
    # Get all tables
    cursor.execute("SELECT name FROM sqlite_master WHERE type='table' ORDER BY name")
    tables = [t[0] for t in cursor.fetchall()]
    
    attribution_map = {}
    ownership_conflicts = []
    common_mistakes = []
    
    # Analyze each table for what it owns
    for table in tables:
        cursor.execute(f"PRAGMA table_info([{table}])")
        columns = cursor.fetchall()
        
        # Determine table's primary purpose
        table_lower = table.lower()
        column_names = [col[1] for col in columns]
        column_names_lower = [c.lower() for c in column_names]
        
        # Identify what this table owns
        ownership = {
            'table': table,
            'owns': [],
            'primary_key': None,
            'foreign_keys': [],
            'likely_purpose': None
        }
        
        # Find primary key
        for col in columns:
            if col[5] == 1:  # is primary key
                ownership['primary_key'] = col[1]
        
        # Categorize ownership based on column patterns
        for col_name in column_names:
            col_lower = col_name.lower()
            
            # Personal information
            if any(x in col_lower for x in ['name', 'firstname', 'lastname', 'email', 'phone', 'address']):
                if 'customer' in table_lower:
                    ownership['owns'].append(f"Customer {col_name}")
                elif 'employee' in table_lower:
                    ownership['owns'].append(f"Employee {col_name}")
                elif 'supplier' in table_lower:
                    ownership['owns'].append(f"Supplier {col_name}")
                elif 'user' in table_lower or 'person' in table_lower:
                    ownership['owns'].append(f"Person {col_name}")
                else:
                    ownership['owns'].append(f"{col_name} data")
            
            # Financial data
            elif any(x in col_lower for x in ['price', 'cost', 'amount', 'salary', 'total', 'subtotal']):
                ownership['owns'].append(f"Financial: {col_name}")
            
            # Temporal data
            elif any(x in col_lower for x in ['date', 'time', 'year', 'month', 'created', 'updated']):
                ownership['owns'].append(f"Temporal: {col_name}")
            
            # Status/State data
            elif any(x in col_lower for x in ['status', 'state', 'active', 'enabled', 'flag']):
                ownership['owns'].append(f"Status: {col_name}")
            
            # Quantity data
            elif any(x in col_lower for x in ['quantity', 'count', 'number', 'qty']):
                ownership['owns'].append(f"Quantity: {col_name}")
        
        # Detect table purpose
        if 'order' in table_lower:
            ownership['likely_purpose'] = "Order/Transaction data"
        elif 'product' in table_lower or 'item' in table_lower:
            ownership['likely_purpose'] = "Product/Item catalog"
        elif 'customer' in table_lower or 'client' in table_lower:
            ownership['likely_purpose'] = "Customer information"
        elif 'employee' in table_lower or 'staff' in table_lower:
            ownership['likely_purpose'] = "Employee/Staff data"
        elif 'detail' in table_lower:
            ownership['likely_purpose'] = "Detail/Line item data"
        elif any(x in table_lower for x in ['_to_', '_map', 'junction']):
            ownership['likely_purpose'] = "Junction/Relationship mapping"
        
        attribution_map[table] = ownership
    
    # Detect potential conflicts (similar columns in different tables)
    column_occurrences = defaultdict(list)
    for table in tables:
        cursor.execute(f"PRAGMA table_info([{table}])")
        columns = cursor.fetchall()
        for col in columns:
            col_name_lower = col[1].lower()
            column_occurrences[col_name_lower].append((table, col[1]))
    
    # Find columns that appear in multiple tables (potential confusion points)
    for col_name, occurrences in column_occurrences.items():
        if len(occurrences) > 1:
            # Skip common ID columns
            if not any(x in col_name for x in ['id', '_id', 'key']):
                conflict = {
                    'column_pattern': col_name,
                    'appears_in': occurrences,
                    'warning': f"Column '{col_name}' appears in multiple tables - verify correct source"
                }
                ownership_conflicts.append(conflict)
                
                # Generate common mistake warnings
                if 'name' in col_name:
                    common_mistakes.append(f"Name confusion: {occurrences} - use table prefix to disambiguate")
                elif 'date' in col_name:
                    common_mistakes.append(f"Date ambiguity: {occurrences} - verify temporal context")
                elif 'amount' in col_name or 'total' in col_name:
                    common_mistakes.append(f"Amount confusion: {occurrences} - check aggregation level")
    
    # Find foreign key relationships
    for table in tables:
        cursor.execute(f"PRAGMA foreign_key_list([{table}])")
        fks = cursor.fetchall()
        for fk in fks:
            if table in attribution_map:
                attribution_map[table]['foreign_keys'].append({
                    'from': fk[3],
                    'to_table': fk[2],
                    'to_column': fk[4]
                })
    
    # Save results
    os.makedirs('./tool_output', exist_ok=True)
    
    output = {
        'attribution_map': attribution_map,
        'ownership_conflicts': ownership_conflicts,
        'common_mistakes': common_mistakes,
        'summary': {
            'total_tables': len(tables),
            'tables_with_conflicts': len([c for c in ownership_conflicts if len(c['appears_in']) > 1]),
            'potential_confusion_points': len(common_mistakes)
        }
    }
    
    with open('./tool_output/attribution_map.json', 'w') as f:
        json.dump(output, f, indent=2)
    
    # Print summary
    print("ATTRIBUTION MAPPING COMPLETE")
    print(f"Analyzed {len(tables)} tables")
    print("\nKey Ownership Assignments:")
    
    for table, info in attribution_map.items():
        if info['owns']:
            print(f"\n{table}:")
            print(f"  Purpose: {info['likely_purpose'] or 'General data'}")
            print(f"  Owns: {', '.join(info['owns'][:5])}")
            if len(info['owns']) > 5:
                print(f"        ...and {len(info['owns']) - 5} more attributes")
    
    if ownership_conflicts:
        print("\n⚠️  POTENTIAL CONFUSION POINTS:")
        for mistake in common_mistakes[:5]:
            print(f"  - {mistake}")
    
    print(f"\nDetailed attribution map saved to ./tool_output/attribution_map.json")
    
    conn.close()

if __name__ == "__main__":
    map_attribution()