#!/usr/bin/env python3
"""
Query Validator
Validates query compliance with evidence and output requirements.
"""

import json
import os
import sqlite3

def validate_query_compliance(db_path):
    """Validate query compliance with all rules."""
    validation_report = {
        'compliance_checks': [],
        'validation_rules': [],
        'error_prevention': [],
        'final_checklist': [],
        'critical_validations': []
    }

    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()

    # Get database schema info
    cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
    tables = [row[0] for row in cursor.fetchall()]

    table_columns = {}
    for table in tables:
        cursor.execute(f"PRAGMA table_info({table})")
        columns = cursor.fetchall()
        table_columns[table] = [col[1] for col in columns]

    # Critical validation rules
    validation_report['critical_validations'] = [
        {
            'validation': 'Evidence Constraint Compliance',
            'check': 'All WHERE conditions from evidence are applied EXACTLY',
            'priority': 'MANDATORY',
            'failure_consequence': 'Query will be WRONG'
        },
        {
            'validation': 'Output Type Match',
            'check': 'SELECT clause matches question type (COUNT vs LIST vs PERCENTAGE)',
            'priority': 'MANDATORY',
            'failure_consequence': 'Wrong result type returned'
        },
        {
            'validation': 'Column Selection Precision',
            'check': 'ONLY requested columns returned, in correct format',
            'priority': 'MANDATORY',
            'failure_consequence': 'Extra or missing columns'
        },
        {
            'validation': 'Aggregation Context',
            'check': 'GROUP BY matches aggregation requirement',
            'priority': 'HIGH',
            'failure_consequence': 'Wrong aggregation level'
        },
        {
            'validation': 'Join Completeness',
            'check': 'All necessary tables joined with correct conditions',
            'priority': 'HIGH',
            'failure_consequence': 'Missing or incorrect data'
        }
    ]

    # Compliance checks by category
    validation_report['compliance_checks'] = [
        {
            'category': 'EVIDENCE COMPLIANCE',
            'checks': [
                {
                    'item': 'Value constraints',
                    'validation': "All 'column = value' from evidence present",
                    'sql_pattern': "WHERE column = 'evidence_value'"
                },
                {
                    'item': 'Comparison operators',
                    'validation': "All >, <, >=, <= from evidence applied",
                    'sql_pattern': 'WHERE column [op] evidence_value'
                },
                {
                    'item': 'Aggregation conditions',
                    'validation': "COUNT/SUM conditions in HAVING clause",
                    'sql_pattern': 'HAVING COUNT(x) > evidence_value'
                },
                {
                    'item': 'Column mappings',
                    'validation': "Use exact column names from evidence",
                    'sql_pattern': 'Use evidence column, not intuitive name'
                },
                {
                    'item': 'Formula application',
                    'validation': "Calculation formulas applied exactly",
                    'sql_pattern': 'Use exact formula from evidence'
                }
            ]
        },
        {
            'category': 'OUTPUT COMPLIANCE',
            'checks': [
                {
                    'item': 'Return type',
                    'validation': 'Matches question intent (COUNT/LIST/PERCENTAGE)',
                    'sql_pattern': 'SELECT appropriate_type'
                },
                {
                    'item': 'Column count',
                    'validation': 'Exact number of columns requested',
                    'sql_pattern': 'No extra helpful columns'
                },
                {
                    'item': 'Column format',
                    'validation': 'Separate vs concatenated as specified',
                    'sql_pattern': 'f_name, l_name vs full_name'
                },
                {
                    'item': 'Result limit',
                    'validation': 'LIMIT 1 for "the most/least"',
                    'sql_pattern': 'ORDER BY ... LIMIT 1'
                },
                {
                    'item': 'Distinct usage',
                    'validation': 'DISTINCT when uniqueness implied',
                    'sql_pattern': 'SELECT DISTINCT when needed'
                }
            ]
        },
        {
            'category': 'AGGREGATION COMPLIANCE',
            'checks': [
                {
                    'item': 'Aggregation function',
                    'validation': 'Correct function (SUM/COUNT/AVG)',
                    'sql_pattern': 'Match aggregation keyword'
                },
                {
                    'item': 'GROUP BY presence',
                    'validation': 'GROUP BY when "per/each" mentioned',
                    'sql_pattern': 'GROUP BY entity'
                },
                {
                    'item': 'HAVING vs WHERE',
                    'validation': 'HAVING for aggregate conditions',
                    'sql_pattern': 'HAVING COUNT(*) > n'
                },
                {
                    'item': 'Aggregation scope',
                    'validation': 'Per-entity vs overall total',
                    'sql_pattern': 'GROUP BY for per-entity'
                }
            ]
        },
        {
            'category': 'JOIN COMPLIANCE',
            'checks': [
                {
                    'item': 'Join path completeness',
                    'validation': 'All intermediate tables included',
                    'sql_pattern': 'No skipped tables in path'
                },
                {
                    'item': 'Join conditions',
                    'validation': 'Correct foreign key relationships',
                    'sql_pattern': 't1.id = t2.t1_id'
                },
                {
                    'item': 'Temporal joins',
                    'validation': 'Year/date constraints when needed',
                    'sql_pattern': 'AND t1.year = t2.year'
                }
            ]
        }
    ]

    # Validation rules
    validation_report['validation_rules'] = [
        {
            'rule': 'Evidence First Rule',
            'description': 'Evidence constraints MUST be applied exactly',
            'validation_method': 'Check all WHERE/HAVING against evidence',
            'failure_action': 'Query is INVALID'
        },
        {
            'rule': 'Output Precision Rule',
            'description': 'Return ONLY what is requested',
            'validation_method': 'Match SELECT to question keywords',
            'failure_action': 'Remove extra columns'
        },
        {
            'rule': 'Percentage Calculation Rule',
            'description': 'Calculate percentage when "percentage" mentioned',
            'validation_method': 'Check for * 100 in calculation',
            'failure_action': 'Add percentage calculation'
        },
        {
            'rule': 'Count vs List Rule',
            'description': 'COUNT for "how many", values for "list"',
            'validation_method': 'Match output type to question pattern',
            'failure_action': 'Change SELECT clause'
        },
        {
            'rule': 'Aggregation Context Rule',
            'description': 'Correct aggregation level based on context',
            'validation_method': 'Check GROUP BY against question intent',
            'failure_action': 'Adjust GROUP BY clause'
        }
    ]

    # Error prevention checklist
    validation_report['error_prevention'] = [
        {
            'category': 'Pre-Query Validation',
            'checks': [
                'Read evidence constraints',
                'Identify output type from question',
                'Determine aggregation context',
                'Check for date format requirements',
                'Identify all tables needed'
            ]
        },
        {
            'category': 'Query Construction',
            'checks': [
                'Apply all evidence WHERE conditions',
                'Use correct SELECT for output type',
                'Add GROUP BY if aggregating',
                'Include all join conditions',
                'Handle date formats correctly'
            ]
        },
        {
            'category': 'Post-Query Validation',
            'checks': [
                'Verify evidence compliance',
                'Check output format matches request',
                'Validate aggregation correctness',
                'Confirm join completeness',
                'Test with sample data if possible'
            ]
        }
    ]

    # Final checklist
    validation_report['final_checklist'] = [
        {
            'step': 1,
            'check': 'EVIDENCE CHECK',
            'validation': 'Are ALL evidence constraints applied EXACTLY?',
            'required': 'YES'
        },
        {
            'step': 2,
            'check': 'OUTPUT CHECK',
            'validation': 'Does SELECT match what the question asks?',
            'required': 'YES'
        },
        {
            'step': 3,
            'check': 'COLUMN CHECK',
            'validation': 'Returning ONLY requested columns, in requested format?',
            'required': 'YES'
        },
        {
            'step': 4,
            'check': 'AGGREGATION CHECK',
            'validation': 'Is the aggregation level correct?',
            'required': 'YES'
        },
        {
            'step': 5,
            'check': 'JOIN CHECK',
            'validation': 'Are all necessary joins included?',
            'required': 'YES'
        },
        {
            'step': 6,
            'check': 'DATE CHECK',
            'validation': 'Are date comparisons using correct format?',
            'required': 'YES'
        },
        {
            'step': 7,
            'check': 'PERCENTAGE CHECK',
            'validation': 'If "percentage" mentioned, is it calculated?',
            'required': 'YES'
        },
        {
            'step': 8,
            'check': 'LIMIT CHECK',
            'validation': 'If "the most/least", is LIMIT 1 used?',
            'required': 'YES'
        }
    ]

    # Common validation failures
    validation_report['common_failures'] = [
        {
            'failure': 'Evidence value ignored',
            'example': 'Evidence says "> 3" but query uses "> 1"',
            'impact': 'CRITICAL - Wrong results',
            'fix': 'Always use evidence value'
        },
        {
            'failure': 'Wrong output type',
            'example': 'COUNT returned when LIST requested',
            'impact': 'CRITICAL - Wrong result type',
            'fix': 'Match output to question keywords'
        },
        {
            'failure': 'Extra columns added',
            'example': 'Returning title when only URL requested',
            'impact': 'HIGH - Extra data',
            'fix': 'Return ONLY requested columns'
        },
        {
            'failure': 'Missing aggregation',
            'example': 'Individual records when total requested',
            'impact': 'HIGH - Wrong level',
            'fix': 'Add appropriate aggregation'
        },
        {
            'failure': 'Wrong join path',
            'example': 'Direct join skipping intermediate table',
            'impact': 'HIGH - Missing data',
            'fix': 'Include all tables in path'
        }
    ]

    # Database-specific validations
    validation_report['database_validations'] = {
        'table_count': len(tables),
        'tables': list(tables),
        'join_validations': [],
        'column_validations': []
    }

    # Check for ambiguous columns
    column_names = {}
    for table, columns in table_columns.items():
        for col in columns:
            if col not in column_names:
                column_names[col] = []
            column_names[col].append(table)

    for col, tables_list in column_names.items():
        if len(tables_list) > 1:
            validation_report['database_validations']['column_validations'].append({
                'column': col,
                'appears_in': tables_list,
                'validation': 'Must specify table alias'
            })

    conn.close()
    return validation_report

def main():
    db_path = "./database.sqlite"
    output_dir = "./tool_output"
    os.makedirs(output_dir, exist_ok=True)

    # Validate query compliance
    report = validate_query_compliance(db_path)

    # Save results
    output_path = os.path.join(output_dir, "validation_report.json")
    with open(output_path, 'w') as f:
        json.dump(report, f, indent=2)

    print("Query validation analysis complete")
    print(f"Generated {len(report['critical_validations'])} critical validations")
    print(f"Generated {len(report['validation_rules'])} validation rules")
    print(f"Generated {len(report['final_checklist'])} checklist items")
    print(f"Results saved to {output_path}")
    print("\nREMEMBER: All validations must pass for query to be correct")

if __name__ == "__main__":
    main()