#!/usr/bin/env python3
"""
Relationship Mapper Tool
Maps foreign key relationships and common join patterns.
"""

import sqlite3
import json
import os
from collections import defaultdict

def map_relationships(db_path="database.sqlite"):
    """Map all relationships between tables."""

    os.makedirs("tool_output", exist_ok=True)

    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()

    relationships = {
        "foreign_keys": [],
        "inferred_relationships": [],
        "join_paths": defaultdict(list),
        "date_join_candidates": []
    }

    try:
        # Get all tables
        cursor.execute("SELECT name FROM sqlite_master WHERE type='table' ORDER BY name")
        tables = [row[0] for row in cursor.fetchall()]

        table_columns = {}
        date_columns_by_table = {}

        for table in tables:
            cursor.execute(f"PRAGMA table_info(`{table}`)")
            columns = cursor.fetchall()
            table_columns[table] = {col[1]: col[2] for col in columns}

            # Track date columns for potential date-based joins
            date_cols = [col[1] for col in columns
                         if any(word in col[1].lower() for word in ['date', 'time', 'start', 'stop', 'end'])]
            if date_cols:
                date_columns_by_table[table] = date_cols

        # Get explicit foreign keys
        for table in tables:
            cursor.execute(f"PRAGMA foreign_key_list(`{table}`)")
            fks = cursor.fetchall()

            for fk in fks:
                fk_info = {
                    "from_table": table,
                    "from_column": fk[3],
                    "to_table": fk[2],
                    "to_column": fk[4],
                    "constraint_name": fk[0] if fk[0] else f"fk_{table}_{fk[3]}"
                }
                relationships["foreign_keys"].append(fk_info)

                # Add to join paths
                join_key = tuple(sorted([table, fk[2]]))
                relationships["join_paths"][str(join_key)].append({
                    "type": "foreign_key",
                    "condition": f"{table}.{fk[3]} = {fk[2]}.{fk[4]}"
                })

        # Look for date-based join candidates (for weather-type databases)
        for table1, date_cols1 in date_columns_by_table.items():
            for table2, date_cols2 in date_columns_by_table.items():
                if table1 >= table2:
                    continue

                # Check if tables share date columns
                common_date_cols = set([col.lower() for col in date_cols1]) & set([col.lower() for col in date_cols2])
                if common_date_cols:
                    for col in common_date_cols:
                        # Find actual column names (case-sensitive)
                        col1 = next((c for c in date_cols1 if c.lower() == col), None)
                        col2 = next((c for c in date_cols2 if c.lower() == col), None)

                        if col1 and col2:
                            relationships["date_join_candidates"].append({
                                "table1": table1,
                                "table2": table2,
                                "join_column": col,
                                "condition": f"{table1}.{col1} = {table2}.{col2}"
                            })

        # Infer relationships based on column names
        for table1 in tables:
            for col1, type1 in table_columns[table1].items():
                # Skip if already a known FK
                if any(fk["from_table"] == table1 and fk["from_column"] == col1
                       for fk in relationships["foreign_keys"]):
                    continue

                # Check for ID pattern matching
                if col1.lower().endswith(('_id', 'id')) and col1.lower() != 'id':
                    # Try to find matching table
                    potential_table = col1[:-3] if col1.lower().endswith('_id') else col1[:-2]

                    for table2 in tables:
                        if table2.lower() == potential_table.lower():
                            # Check if table2 has an ID column
                            if 'id' in [c.lower() for c in table_columns[table2].keys()]:
                                id_col = next(c for c in table_columns[table2].keys() if c.lower() == 'id')
                                relationships["inferred_relationships"].append({
                                    "from_table": table1,
                                    "from_column": col1,
                                    "to_table": table2,
                                    "to_column": id_col,
                                    "confidence": "high",
                                    "reason": "Column name matches table name + _id pattern"
                                })

                                join_key = tuple(sorted([table1, table2]))
                                relationships["join_paths"][str(join_key)].append({
                                    "type": "inferred",
                                    "condition": f"{table1}.{col1} = {table2}.{id_col}",
                                    "confidence": "high"
                                })

        # Convert defaultdict to regular dict for JSON serialization
        relationships["join_paths"] = dict(relationships["join_paths"])

    except Exception as e:
        relationships["error"] = str(e)
    finally:
        conn.close()

    # Write to output file
    output_path = "tool_output/relationships.json"
    with open(output_path, 'w') as f:
        json.dump(relationships, f, indent=2)

    print(f"Relationship mapping complete - results in {output_path}")
    print(f"Found {len(relationships['foreign_keys'])} explicit foreign keys")
    print(f"Inferred {len(relationships['inferred_relationships'])} additional relationships")

    if relationships.get("date_join_candidates"):
        print(f"Found {len(relationships['date_join_candidates'])} potential date-based joins")

if __name__ == "__main__":
    map_relationships()