#!/usr/bin/env python3
"""
Inspect actual values in database columns to prevent type and format confusion.
Fixed version with proper column quoting for columns with spaces.
"""

import sqlite3
import os
import re

class ValueInspector:
    def __init__(self, db_path: str):
        self.db_path = db_path
        self.conn = sqlite3.connect(db_path)
        self.cursor = self.conn.cursor()
        self.value_patterns = {}

    def inspect(self):
        """Inspect values across all tables."""
        os.makedirs('tool_output', exist_ok=True)

        try:
            self._inspect_all_tables()
            self._save_results()
            print("Value inspection complete - results in tool_output/value_examples.txt")
            return True
        except Exception as e:
            print(f"Value inspection error: {e}")
            self._save_error_results(str(e))
            return False
        finally:
            self.conn.close()

    def _quote_column(self, col_name: str) -> str:
        """Properly quote column names with spaces or special characters."""
        # Check for characters that require quoting
        needs_quoting = any(char in col_name for char in [' ', '(', ')', '-', '/', '#', '@', '.'])

        if needs_quoting:
            return f'"{col_name}"'
        return col_name

    def _inspect_all_tables(self):
        """Inspect values in all tables with focus on common confusion points."""
        self.cursor.execute("SELECT name FROM sqlite_master WHERE type='table' ORDER BY name")
        tables = [row[0] for row in self.cursor.fetchall()]

        for table in tables:
            self.cursor.execute(f"PRAGMA table_info({table})")
            columns = self.cursor.fetchall()

            table_patterns = {
                'columns_needing_quotes': [],
                'column_values': {}
            }

            for col in columns:
                col_name = col[1]
                col_type = col[2].upper()

                # Track columns needing quotes
                if self._quote_column(col_name) != col_name:
                    table_patterns['columns_needing_quotes'].append({
                        'name': col_name,
                        'quoted': self._quote_column(col_name)
                    })

                # Get sample values with proper quoting
                quoted_col = self._quote_column(col_name)

                try:
                    # Try to get distinct values with counts
                    query = f"""
                    SELECT DISTINCT {quoted_col}, COUNT(*) as cnt
                    FROM {table}
                    WHERE {quoted_col} IS NOT NULL
                    GROUP BY {quoted_col}
                    ORDER BY cnt DESC
                    LIMIT 10
                    """
                    self.cursor.execute(query)
                    values = self.cursor.fetchall()

                    if values:
                        analysis = self._analyze_values(values, col_type, col_name)
                        if analysis:
                            table_patterns['column_values'][col_name] = analysis

                except Exception as e:
                    # For columns that fail (large text, etc), try simpler query
                    try:
                        query = f"""
                        SELECT DISTINCT {quoted_col}
                        FROM {table}
                        WHERE {quoted_col} IS NOT NULL
                        LIMIT 5
                        """
                        self.cursor.execute(query)
                        values = [(v[0], 1) for v in self.cursor.fetchall()]

                        if values:
                            analysis = self._analyze_values(values, col_type, col_name)
                            if analysis:
                                table_patterns['column_values'][col_name] = analysis
                    except Exception as inner_e:
                        # Column completely failed - note it
                        table_patterns['column_values'][col_name] = {
                            "error": f"Could not inspect: {str(inner_e)}",
                            "needs_quoting": quoted_col != col_name
                        }

            if table_patterns['column_values'] or table_patterns['columns_needing_quotes']:
                self.value_patterns[table] = table_patterns

    def _analyze_values(self, values, col_type, col_name):
        """Analyze values to identify patterns and potential confusion points."""
        analysis = {
            "type": col_type,
            "samples": [],
            "patterns": [],
            "warnings": []
        }

        # Get first 5 samples
        for val, count in values[:5]:
            if val is not None:
                analysis["samples"].append(str(val))

        if not analysis["samples"]:
            return None

        # Analyze patterns
        first_val = str(values[0][0]) if values[0][0] is not None else ""

        # Check if numeric values stored as text
        if 'TEXT' in col_type or 'VARCHAR' in col_type or 'CHAR' in col_type:
            if all(self._is_numeric(s) for s in analysis["samples"]):
                analysis["patterns"].append("NUMERIC_TEXT")
                analysis["warnings"].append(f"Text column with numeric values - use quotes: WHERE {col_name} = '123'")

        # Check for date patterns
        date_patterns = [
            (r'^\d{4}-\d{2}-\d{2}', 'YYYY-MM-DD'),
            (r'^\d{2}/\d{2}/\d{4}', 'MM/DD/YYYY'),
            (r'^\d{1,2}/\d{1,2}/\d{2}', 'M/D/YY'),
            (r'^\d{4}$', 'YYYY')
        ]

        for pattern, format_name in date_patterns:
            if re.match(pattern, first_val):
                analysis["patterns"].append(f"DATE_FORMAT:{format_name}")
                break

        # Check for money format
        if '$' in first_val:
            analysis["patterns"].append("MONEY_FORMAT")
            analysis["warnings"].append("Contains $ - remove for numeric operations: CAST(REPLACE(REPLACE(col, '$', ''), ',', '') AS REAL)")

        # Check for case sensitivity
        if any(s != s.lower() and s != s.upper() for s in analysis["samples"]):
            analysis["patterns"].append("MIXED_CASE")
            analysis["warnings"].append("Case sensitive - match exactly")

        # Check for trailing/leading spaces
        if any(s != s.strip() for s in analysis["samples"]):
            analysis["patterns"].append("HAS_SPACES")
            analysis["warnings"].append("Contains trailing/leading spaces")

        # Check for boolean-like values
        bool_indicators = {'true', 'false', 'yes', 'no', '0', '1', 't', 'f'}
        if all(s.lower() in bool_indicators for s in analysis["samples"]):
            analysis["patterns"].append("BOOLEAN_LIKE")

        return analysis

    def _is_numeric(self, value: str) -> bool:
        """Check if string value is numeric."""
        try:
            float(value)
            return True
        except:
            return False

    def _save_results(self):
        """Save inspection results to file."""
        with open('tool_output/value_examples.txt', 'w') as f:
            f.write("# VALUE EXAMPLES (CRITICAL FOR SQL GENERATION)\n\n")

            # First, list all columns needing quotes
            columns_needing_quotes = []
            for table, patterns in self.value_patterns.items():
                if patterns.get('columns_needing_quotes'):
                    for col_info in patterns['columns_needing_quotes']:
                        columns_needing_quotes.append(f"{table}.{col_info['name']} → {col_info['quoted']}")

            if columns_needing_quotes:
                f.write("## ⚠️ COLUMNS REQUIRING QUOTES\n")
                f.write("These columns have spaces or special characters and MUST be quoted:\n\n")
                for col in columns_needing_quotes:
                    f.write(f"- {col}\n")
                f.write("\n")

            # Then show value examples by table
            for table, patterns in self.value_patterns.items():
                f.write(f"## {table}\n\n")

                # Show warnings first
                warnings = []
                for col_name, analysis in patterns.get('column_values', {}).items():
                    if isinstance(analysis, dict) and analysis.get('warnings'):
                        for warning in analysis['warnings']:
                            warnings.append(f"- **{col_name}**: {warning}")

                if warnings:
                    f.write("### Warnings\n")
                    for warning in warnings:
                        f.write(f"{warning}\n")
                    f.write("\n")

                # Show column values
                for col_name, analysis in patterns.get('column_values', {}).items():
                    if isinstance(analysis, dict):
                        if 'error' in analysis:
                            f.write(f"**{col_name}** ({analysis.get('type', 'UNKNOWN')}):\n")
                            f.write(f"- ❌ {analysis['error']}\n")
                            if analysis.get('needs_quoting'):
                                f.write(f"- ⚠️ Needs quoting in SQL\n")
                        else:
                            f.write(f"**{col_name}** ({analysis['type']}):\n")
                            if analysis['samples']:
                                # Show samples with proper formatting
                                sample_str = ', '.join([f"`{s}`" for s in analysis['samples'][:5]])
                                f.write(f"- Examples: {sample_str}\n")

                            if analysis['patterns']:
                                patterns_str = ', '.join(analysis['patterns'])
                                f.write(f"- Patterns: {patterns_str}\n")
                        f.write("\n")

                f.write("\n")

    def _save_error_results(self, error: str):
        """Save partial results even if inspection failed."""
        with open('tool_output/value_examples.txt', 'w') as f:
            f.write("# VALUE EXAMPLES - PARTIAL RESULTS\n\n")
            f.write(f"## ⚠️ Inspection Error\n")
            f.write(f"Tool encountered error: {error}\n\n")

            if self.value_patterns:
                f.write("## Partial Results\n\n")
                for table in list(self.value_patterns.keys())[:5]:
                    f.write(f"- {table} (partially inspected)\n")

if __name__ == "__main__":
    inspector = ValueInspector("database.sqlite")
    inspector.inspect()