#!/usr/bin/env python3
"""
Enhanced Query Pattern Matcher Tool
Generates SQL templates for common query patterns with column order awareness.
"""

import json
import os
import sqlite3

def match_query_patterns(db_path="database.sqlite"):
    """Generate SQL pattern templates with column ordering rules."""

    os.makedirs("tool_output", exist_ok=True)

    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()

    patterns = {
        "count_patterns": [],
        "top_most_patterns": [],
        "aggregation_patterns": [],
        "yes_no_patterns": [],
        "list_patterns": [],
        "column_order_patterns": [],
        "multi_info_patterns": []
    }

    try:
        # Get sample tables for examples
        cursor.execute("SELECT name FROM sqlite_master WHERE type='table' LIMIT 5")
        sample_tables = [row[0] for row in cursor.fetchall() if not row[0].startswith("sqlite_")]

        if sample_tables:
            # Use first table for examples
            table = sample_tables[0]
            cursor.execute(f"PRAGMA table_info(`{table}`)")
            columns = [col[1] for col in cursor.fetchall()]

            # COUNT Patterns
            patterns["count_patterns"] = [
                {
                    "pattern": "How many [entities]",
                    "template": "SELECT COUNT(*)",
                    "example": f"SELECT COUNT(*) FROM {table}",
                    "column_rule": "Return ONLY the count, no other columns"
                },
                {
                    "pattern": "How many different/unique [column]",
                    "template": "SELECT COUNT(DISTINCT column)",
                    "example": f"SELECT COUNT(DISTINCT {columns[0] if columns else 'column'}) FROM {table}",
                    "column_rule": "Return ONLY the distinct count"
                },
                {
                    "pattern": "Number of [entities] that [condition]",
                    "template": "SELECT COUNT(*) FROM table WHERE condition",
                    "example": f"SELECT COUNT(*) FROM {table} WHERE condition",
                    "column_rule": "Return ONLY the count"
                },
                {
                    "pattern": "Count of [column] when [condition]",
                    "template": "SELECT COUNT(CASE WHEN condition THEN 1 END)",
                    "example": "SELECT COUNT(CASE WHEN status = 'active' THEN 1 END)",
                    "column_rule": "Return conditional count only"
                }
            ]

            # Top/Most Patterns with Column Order
            patterns["top_most_patterns"] = [
                {
                    "pattern": "[Entity] with highest/most/maximum [metric]",
                    "template": "SELECT entity_columns FROM table ORDER BY metric DESC LIMIT 1",
                    "example": f"SELECT name FROM {table} ORDER BY value DESC LIMIT 1",
                    "column_rule": "Return entity identifier ONLY, NOT the metric",
                    "wrong": "SELECT name, MAX(value) - Don't include the metric unless asked"
                },
                {
                    "pattern": "Top N [entities] by [metric]",
                    "template": "SELECT entity_columns FROM table ORDER BY metric DESC LIMIT N",
                    "example": f"SELECT name FROM {table} ORDER BY score DESC LIMIT 5",
                    "column_rule": "Return entity columns in natural order"
                },
                {
                    "pattern": "[Entity] with lowest/least/minimum [metric]",
                    "template": "SELECT entity_columns FROM table ORDER BY metric ASC LIMIT 1",
                    "example": f"SELECT name FROM {table} ORDER BY cost ASC LIMIT 1",
                    "column_rule": "Return entity identifier, not the metric"
                },
                {
                    "pattern": "Which [entity] has the most [related items]",
                    "template": "SELECT entity FROM table GROUP BY entity ORDER BY COUNT(*) DESC LIMIT 1",
                    "column_rule": "Return the entity that has the most, not the count"
                }
            ]

            # Aggregation Patterns
            patterns["aggregation_patterns"] = [
                {
                    "pattern": "Total/Sum of [numeric column]",
                    "template": "SELECT SUM(column)",
                    "warning": "Use SUM for numeric totals, not COUNT",
                    "column_rule": "Return ONLY the sum"
                },
                {
                    "pattern": "Average [column]",
                    "template": "SELECT AVG(column)",
                    "column_rule": "Return ONLY the average"
                },
                {
                    "pattern": "Percentage of [condition]",
                    "template": "SELECT CAST(COUNT(CASE WHEN condition THEN 1 END) AS REAL) * 100 / COUNT(*)",
                    "column_rule": "Return ONLY the percentage value"
                },
                {
                    "pattern": "[Metric] by/per [grouping]",
                    "template": "SELECT grouping, metric FROM table GROUP BY grouping",
                    "column_rule": "Return grouping column(s) first, then metric",
                    "order_matters": "Column order: GROUP BY columns, then aggregates"
                }
            ]

            # Yes/No Patterns
            patterns["yes_no_patterns"] = [
                {
                    "pattern": "Is there any [entity] that [condition]",
                    "template": "SELECT CASE WHEN EXISTS(SELECT 1 FROM table WHERE condition) THEN 'yes' ELSE 'no' END",
                    "alternative": "SELECT COUNT(*) > 0 FROM table WHERE condition"
                },
                {
                    "pattern": "Does [entity] have [property]",
                    "template": "SELECT CASE WHEN condition THEN 'yes' ELSE 'no' END",
                    "note": "Use evidence format if provided"
                }
            ]

            # List Patterns with Column Order
            patterns["list_patterns"] = [
                {
                    "pattern": "List all [entities]",
                    "template": "SELECT entity_identifier FROM table",
                    "column_rule": "Return human-readable identifier, not ID"
                },
                {
                    "pattern": "List the [X] and [Y]",
                    "template": "SELECT X, Y FROM table",
                    "column_rule": "Return BOTH columns in order mentioned",
                    "critical": "Must return all requested items"
                },
                {
                    "pattern": "Show [entities] with their [properties]",
                    "template": "SELECT entity, property1, property2 FROM table",
                    "column_rule": "Entity first, then properties in order mentioned"
                }
            ]

            # Column Order Patterns
            patterns["column_order_patterns"] = [
                {
                    "rule": "Question Word Order",
                    "description": "Columns appear in SELECT in same order as question",
                    "examples": [
                        {
                            "question": "What is the name and age",
                            "correct": "SELECT name, age",
                            "wrong": "SELECT age, name"
                        },
                        {
                            "question": "Show price, quantity and total",
                            "correct": "SELECT price, quantity, total",
                            "wrong": "SELECT total, price, quantity"
                        }
                    ]
                },
                {
                    "rule": "Entity Before Metrics",
                    "description": "Return identifying columns before measurement columns",
                    "example": {
                        "question": "Which store has highest sales",
                        "correct": "SELECT store_name FROM ... ORDER BY sales DESC LIMIT 1",
                        "note": "Return store_name, not sales (unless both requested)"
                    }
                },
                {
                    "rule": "Compound Requests",
                    "description": "Return all parts of compound requests",
                    "example": {
                        "question": "name, address and phone number",
                        "correct": "SELECT name, address, phone",
                        "critical": "All three must be returned in that order"
                    }
                }
            ]

            # Multi-Information Patterns
            patterns["multi_info_patterns"] = [
                {
                    "pattern": "What is the [X], [Y] and [Z] of [entity]",
                    "template": "SELECT X, Y, Z FROM table WHERE entity_condition",
                    "column_order": "Exactly as listed: X, then Y, then Z",
                    "example": "What is the name, capital and population → SELECT name, capital, population"
                },
                {
                    "pattern": "[Entity] along with its [property1] and [property2]",
                    "template": "SELECT entity_col, property1_col, property2_col",
                    "column_order": "Entity first, then properties in order mentioned"
                },
                {
                    "pattern": "Provide/Show/Display [X] and [Y]",
                    "template": "SELECT X, Y",
                    "critical": "Must return BOTH, not just one"
                }
            ]

        # Add general pattern rules
        patterns["general_rules"] = [
            "Always use ORDER BY ... LIMIT for 'top/most/highest' queries",
            "Never use MAX() in WHERE clause for finding top entities",
            "Return columns in the order they appear in the question",
            "Don't return extra columns not asked for",
            "Use COUNT for counting records, SUM for numeric totals",
            "Include ALL requested information in multi-part questions"
        ]

    except Exception as e:
        patterns["error"] = str(e)

    finally:
        conn.close()

    # Write output
    with open("tool_output/query_patterns.json", "w") as f:
        json.dump(patterns, f, indent=2)

    print(f"Query pattern matching complete")
    print(f"Generated {sum(len(v) for v in patterns.values() if isinstance(v, list))} pattern templates")
    print("Results saved to tool_output/query_patterns.json")

if __name__ == "__main__":
    match_query_patterns()