#!/usr/bin/env python3
"""
Evidence Reconciler - Maps common evidence naming errors to actual schema
Implements iter9's proactive name mapping with iter2's precision focus
"""

import sqlite3
import os
import json
import re
from difflib import get_close_matches

def reconcile_evidence():
    """Create mappings for common evidence naming variations."""
    
    db_path = './database.sqlite'
    if not os.path.exists(db_path):
        print("ERROR: database.sqlite not found")
        return
    
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    
    # Get all tables and columns
    cursor.execute("SELECT name FROM sqlite_master WHERE type='table' ORDER BY name")
    tables = [t[0] for t in cursor.fetchall()]
    
    all_columns = {}
    column_list = []
    
    for table in tables:
        cursor.execute(f"PRAGMA table_info([{table}])")
        columns = cursor.fetchall()
        all_columns[table] = [col[1] for col in columns]
        for col in columns:
            column_list.append({
                'table': table,
                'column': col[1],
                'type': col[2]
            })
    
    # Common evidence patterns and their mappings
    evidence_mappings = {}
    
    # 1. CompanyName variations
    company_patterns = ['CompanyName', 'Company Name', 'company_name', 'Company']
    for pattern in company_patterns:
        matches = []
        for col_info in column_list:
            col_lower = col_info['column'].lower()
            if 'company' in col_lower or 'supplier' in col_lower or 'customer' in col_lower:
                if 'name' in col_lower:
                    matches.append(f"{col_info['table']}.{col_info['column']}")
        if matches:
            evidence_mappings[pattern] = matches
    
    # 2. ShipVia / Shipping Method variations
    ship_patterns = ['ShipVia', 'Ship Via', 'ship_via', 'ShippingMethod', 'Shipping Method']
    for pattern in ship_patterns:
        matches = []
        for col_info in column_list:
            col_lower = col_info['column'].lower()
            if 'ship' in col_lower or 'carrier' in col_lower:
                matches.append(f"{col_info['table']}.{col_info['column']}")
        if matches:
            evidence_mappings[pattern] = matches
    
    # 3. Common naming variations (with/without spaces, underscores vs camelCase)
    naming_variations = {}
    for col_info in column_list:
        col = col_info['column']
        table = col_info['table']
        full_name = f"{table}.{col}"
        
        # Generate variations
        variations = set()
        
        # Space variations
        if ' ' in col:
            variations.add(col.replace(' ', ''))
            variations.add(col.replace(' ', '_'))
        elif '_' in col:
            variations.add(col.replace('_', ' '))
            variations.add(col.replace('_', ''))
        
        # CamelCase to underscore
        camel_to_underscore = re.sub('([a-z0-9])([A-Z])', r'\1_\2', col).lower()
        variations.add(camel_to_underscore)
        
        # Underscore to CamelCase
        if '_' in col:
            parts = col.split('_')
            camel = parts[0].lower() + ''.join(p.capitalize() for p in parts[1:])
            variations.add(camel)
            pascal = ''.join(p.capitalize() for p in parts)
            variations.add(pascal)
        
        for var in variations:
            if var != col:
                if var not in naming_variations:
                    naming_variations[var] = []
                naming_variations[var].append(full_name)
    
    # 4. Common semantic equivalents
    semantic_mappings = {
        'Employee Territory': [],
        'Salary': [],
        'Wage': [],
        'Pay': [],
        'Territory': [],
        'Region': [],
        'Division': [],
        'Department': []
    }
    
    for pattern in semantic_mappings:
        pattern_lower = pattern.lower()
        for col_info in column_list:
            col_lower = col_info['column'].lower()
            if pattern_lower in col_lower or get_close_matches(pattern_lower, [col_lower], cutoff=0.8):
                semantic_mappings[pattern].append(f"{col_info['table']}.{col_info['column']}")
    
    # 5. Find columns that might be mistaken for each other
    confusable_columns = []
    checked_pairs = set()
    
    for i, col1 in enumerate(column_list):
        for col2 in column_list[i+1:]:
            if col1['table'] != col2['table']:
                pair_key = tuple(sorted([f"{col1['table']}.{col1['column']}", 
                                       f"{col2['table']}.{col2['column']}"]))
                if pair_key not in checked_pairs:
                    checked_pairs.add(pair_key)
                    
                    # Check if columns have similar names
                    similarity = get_close_matches(col1['column'].lower(), 
                                                 [col2['column'].lower()], 
                                                 n=1, cutoff=0.7)
                    if similarity:
                        confusable_columns.append({
                            'column1': f"{col1['table']}.{col1['column']}",
                            'column2': f"{col2['table']}.{col2['column']}",
                            'warning': f"Similar names - verify correct table"
                        })
    
    # 6. Detect missing common requests
    common_requests = ['salary', 'email', 'phone', 'address', 'territory', 'commission', 'bonus']
    missing_data = {}
    
    for request in common_requests:
        found = False
        for col_info in column_list:
            if request in col_info['column'].lower():
                found = True
                break
        if not found:
            # Find alternatives
            alternatives = []
            if request == 'salary':
                for col_info in column_list:
                    if any(x in col_info['column'].lower() for x in ['wage', 'pay', 'compensation']):
                        alternatives.append(f"{col_info['table']}.{col_info['column']}")
            elif request == 'territory':
                for col_info in column_list:
                    if any(x in col_info['column'].lower() for x in ['region', 'area', 'district']):
                        alternatives.append(f"{col_info['table']}.{col_info['column']}")
            
            missing_data[request] = {
                'status': 'NOT FOUND',
                'alternatives': alternatives or ['No direct equivalent in database']
            }
    
    # Save results
    os.makedirs('./tool_output', exist_ok=True)
    
    output = {
        'evidence_mappings': evidence_mappings,
        'naming_variations': naming_variations,
        'semantic_mappings': {k: v for k, v in semantic_mappings.items() if v},
        'confusable_columns': confusable_columns[:10],  # Top 10 most confusable
        'missing_data': missing_data,
        'recommendations': []
    }
    
    # Generate recommendations
    if evidence_mappings:
        output['recommendations'].append("Multiple columns could match common evidence terms - use table prefixes")
    if confusable_columns:
        output['recommendations'].append("Similar column names across tables - always specify table name")
    if missing_data:
        output['recommendations'].append("Some commonly requested data is not available - provide alternatives")
    
    with open('./tool_output/evidence_reconciliation.json', 'w') as f:
        json.dump(output, f, indent=2)
    
    # Print summary
    print("EVIDENCE RECONCILIATION COMPLETE")
    print(f"Analyzed {len(tables)} tables with {len(column_list)} total columns")
    
    if evidence_mappings:
        print("\n📝 Common Evidence Mappings:")
        for pattern, matches in list(evidence_mappings.items())[:5]:
            print(f"  '{pattern}' → {matches[:2]}")
    
    if missing_data:
        print("\n⚠️  Missing Common Data:")
        for request, info in list(missing_data.items())[:5]:
            alt = info['alternatives'][0] if info['alternatives'] else 'No equivalent'
            print(f"  '{request}' → {alt}")
    
    if confusable_columns:
        print(f"\n🔄 Found {len(confusable_columns)} potentially confusable column pairs")
        for conf in confusable_columns[:3]:
            print(f"  - {conf['column1']} ↔ {conf['column2']}")
    
    print(f"\nDetailed reconciliation map saved to ./tool_output/evidence_reconciliation.json")
    
    conn.close()

if __name__ == "__main__":
    reconcile_evidence()