#!/usr/bin/env python3
"""
Unified analyzer that runs all tools and generates complete database documentation.
Focuses on providing all information the eval model needs.
"""

import sqlite3
import json
import os
import subprocess
import sys

def analyze_database():
    """Run all analysis tools and compile results."""

    print("Starting complete database analysis...")

    # Run individual tools
    tools = [
        ("schema_documenter.py", "Documenting schema"),
        ("data_sampler.py", "Sampling data"),
        ("relationship_mapper.py", "Mapping relationships")
    ]

    for tool, description in tools:
        print(f"\n{description}...")
        try:
            result = subprocess.run([sys.executable, f"tools/{tool}"],
                                  capture_output=True, text=True)
            if result.returncode != 0:
                print(f"Warning: {tool} had issues: {result.stderr}")
        except Exception as e:
            print(f"Error running {tool}: {e}")

    # Generate common query patterns
    generate_query_patterns()

    # Compile final documentation
    compile_documentation()

def generate_query_patterns():
    """Generate common query patterns based on the database structure."""

    conn = sqlite3.connect("database.sqlite")
    cursor = conn.cursor()

    patterns = {
        "count_patterns": [],
        "join_patterns": [],
        "aggregation_patterns": [],
        "filter_patterns": []
    }

    try:
        # Load relationships
        if os.path.exists("tool_output/relationships.json"):
            with open("tool_output/relationships.json") as f:
                relationships = json.load(f)
        else:
            relationships = {}

        # Get tables
        cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
        tables = [row[0] for row in cursor.fetchall()]

        # Generate count patterns
        for table in tables[:5]:
            patterns["count_patterns"].append({
                "description": f"Count all rows in {table}",
                "sql": f"SELECT COUNT(*) FROM {table};"
            })

            # Get primary key if exists
            cursor.execute(f"PRAGMA table_info({table})")
            columns = cursor.fetchall()
            pk_cols = [col[1] for col in columns if col[5] > 0]

            if pk_cols:
                patterns["count_patterns"].append({
                    "description": f"Count distinct {pk_cols[0]} in {table}",
                    "sql": f"SELECT COUNT(DISTINCT {pk_cols[0]}) FROM {table};"
                })

        # Generate join patterns from relationships
        for rel in relationships.get("direct_relationships", [])[:5]:
            patterns["join_patterns"].append({
                "description": f"Join {rel['from_table']} with {rel['to_table']}",
                "sql": f"""SELECT t1.*, t2.*
FROM {rel['from_table']} t1
JOIN {rel['to_table']} t2 ON t1.{rel['from_column']} = t2.{rel['to_column']};"""
            })

        # Generate aggregation patterns
        for table in tables[:3]:
            cursor.execute(f"PRAGMA table_info({table})")
            columns = cursor.fetchall()

            # Find numeric columns
            numeric_cols = [col[1] for col in columns
                          if 'INT' in col[2].upper() or 'REAL' in col[2].upper()]

            if numeric_cols:
                col = numeric_cols[0]
                patterns["aggregation_patterns"].append({
                    "description": f"Average {col} from {table}",
                    "sql": f"SELECT AVG({col}) FROM {table};"
                })

                patterns["aggregation_patterns"].append({
                    "description": f"Sum of {col} from {table}",
                    "sql": f"SELECT SUM({col}) FROM {table};"
                })

        # Generate filter patterns
        for table in tables[:3]:
            cursor.execute(f"PRAGMA table_info({table})")
            columns = cursor.fetchall()

            for col in columns[:2]:
                col_name = col[1]
                col_type = col[2]

                if 'TEXT' in col_type.upper():
                    patterns["filter_patterns"].append({
                        "description": f"Filter {table} by {col_name}",
                        "sql": f"SELECT * FROM {table} WHERE {col_name} = 'value';"
                    })
                elif 'INT' in col_type.upper():
                    patterns["filter_patterns"].append({
                        "description": f"Filter {table} by {col_name} range",
                        "sql": f"SELECT * FROM {table} WHERE {col_name} BETWEEN 10 AND 100;"
                    })

        # Save patterns
        with open("tool_output/query_patterns.json", "w") as f:
            json.dump(patterns, f, indent=2)

    except Exception as e:
        print(f"Error generating patterns: {e}")

    finally:
        conn.close()

def compile_documentation():
    """Compile all analysis results into final documentation."""

    print("\nCompiling final documentation...")

    # Load all results
    results = {}
    files = [
        ("schema_documentation.json", "schema"),
        ("data_samples.json", "samples"),
        ("relationships.json", "relationships"),
        ("query_patterns.json", "patterns")
    ]

    for filename, key in files:
        filepath = f"tool_output/{filename}"
        if os.path.exists(filepath):
            with open(filepath) as f:
                results[key] = json.load(f)

    # Generate comprehensive report
    report = []
    report.append("# COMPLETE DATABASE DOCUMENTATION\n\n")

    # 1. Database Overview
    if "schema" in results:
        schema = results["schema"]
        report.append("## 1. DATABASE OVERVIEW\n\n")
        report.append(f"**Total Tables**: {len(schema.get('tables', {}))}\n\n")

        # List tables with row counts
        report.append("### Tables and Row Counts:\n")
        for table, info in schema.get("tables", {}).items():
            report.append(f"- **{table}**: {info['row_count']:,} rows\n")
        report.append("\n")

    # 2. Schema with Exact Column Names
    if "schema" in results:
        report.append("## 2. COMPLETE SCHEMA (Exact Column Names)\n\n")

        for table, info in schema.get("tables", {}).items():
            report.append(f"### {table}\n\n")

            # Primary key
            if table in schema.get("primary_keys", {}):
                pks = schema["primary_keys"][table]
                report.append(f"**Primary Key**: {', '.join(pks)}\n\n")

            # Columns
            report.append("**Columns**:\n")
            for col in info.get("columns", []):
                col_detail = info["column_details"][col]
                col_type = col_detail["type"]

                # Get purpose
                purpose = ""
                if table in schema.get("column_purposes", {}):
                    purpose = schema["column_purposes"][table].get(col, "")

                report.append(f"- `{col}` ({col_type})")
                if purpose:
                    report.append(f": {purpose}")
                if col_detail.get("is_primary_key"):
                    report.append(" [PK]")
                if col_detail.get("nullable"):
                    report.append(" [nullable]")
                report.append("\n")

            # Foreign keys
            if table in schema.get("foreign_keys", {}):
                report.append("\n**Foreign Keys**:\n")
                for fk in schema["foreign_keys"][table]:
                    report.append(f"- {fk['from']} → {fk['to_table']}.{fk['to_column']}\n")

            report.append("\n")

    # 3. Sample Data with Exact Values
    if "samples" in results:
        samples = results["samples"]
        report.append("## 3. SAMPLE DATA (Exact Values and Format)\n\n")

        for table, data in samples.get("table_samples", {}).items():
            if data.get("sample_rows"):
                report.append(f"### {table} Sample Rows:\n\n")

                # Show first 3 rows
                for i, row in enumerate(data["sample_rows"][:3], 1):
                    report.append(f"**Row {i}**:\n")
                    for col, val in row.items():
                        if val is None:
                            report.append(f"- {col}: NULL\n")
                        elif isinstance(val, str):
                            report.append(f"- {col}: `{val}`\n")
                        else:
                            report.append(f"- {col}: {val}\n")
                    report.append("\n")

            # Common values
            if table in samples.get("common_values", {}):
                cols_with_values = samples["common_values"][table]
                if cols_with_values:
                    report.append(f"### {table} Common Values:\n\n")
                    for col, values in cols_with_values.items():
                        if values:
                            report.append(f"**{col}**:\n")
                            for v in values[:5]:
                                if v["value"] is not None:
                                    report.append(f"  - `{v['value']}` (frequency: {v['frequency']})\n")
                            report.append("\n")

    # 4. Relationships
    if "relationships" in results:
        rel = results["relationships"]
        report.append("## 4. TABLE RELATIONSHIPS (Exact Join Columns)\n\n")

        # Direct relationships
        if rel.get("direct_relationships"):
            report.append("### Direct Foreign Keys:\n")
            for r in rel["direct_relationships"]:
                report.append(f"- {r['from_table']}.{r['from_column']} = ")
                report.append(f"{r['to_table']}.{r['to_column']}\n")
            report.append("\n")

        # Junction tables
        if rel.get("junction_tables"):
            report.append("### Junction Tables:\n")
            for j in rel["junction_tables"]:
                report.append(f"**{j['table']}** connects {' and '.join(j['connects'])}\n")
                for table, cols in j["join_columns"].items():
                    report.append(f"  - {j['table']}.{cols['from']} → {table}.{cols['to']}\n")
                report.append("\n")

    # 5. Common Query Patterns
    if "patterns" in results:
        patterns = results["patterns"]
        report.append("## 5. COMMON QUERY PATTERNS\n\n")

        sections = [
            ("count_patterns", "Count Queries"),
            ("join_patterns", "Join Queries"),
            ("aggregation_patterns", "Aggregation Queries"),
            ("filter_patterns", "Filter Queries")
        ]

        for key, title in sections:
            if patterns.get(key):
                report.append(f"### {title}:\n\n")
                for p in patterns[key][:3]:
                    report.append(f"**{p['description']}**:\n")
                    report.append(f"```sql\n{p['sql']}\n```\n\n")

    # 6. Important Notes
    report.append("## 6. IMPORTANT NOTES\n\n")
    report.append("- Use exact column names as shown above\n")
    report.append("- Match the exact case of values shown in samples\n")
    report.append("- Use the specified join columns for relationships\n")
    report.append("- Follow the query patterns for common operations\n")

    # Save final documentation
    with open("tool_output/complete_documentation.txt", "w") as f:
        f.writelines(report)

    print("Complete documentation generated - results in tool_output/")

if __name__ == "__main__":
    analyze_database()