#!/usr/bin/env python3
"""
Evidence Analyzer Tool
Extracts and parses evidence patterns to generate specific SQL rules.
Evidence always overrides general patterns.
"""

import json
import os
import re

def analyze_evidence_patterns():
    """Generate evidence parsing rules and templates."""

    os.makedirs("tool_output", exist_ok=True)

    evidence_patterns = {
        "parsing_rules": [],
        "operator_mappings": [],
        "formula_templates": [],
        "column_mappings": [],
        "value_formats": [],
        "critical_notes": []
    }

    # Define parsing rules for evidence
    evidence_patterns["parsing_rules"] = [
        {
            "pattern": "X refers to Y",
            "meaning": "Use column/table Y when question mentions X",
            "example": "department refers to organ → Use 'organ' column for department",
            "priority": "HIGHEST - Always follow this mapping"
        },
        {
            "pattern": "X = formula",
            "meaning": "Use exact formula provided",
            "example": "percentage = COUNT(X) * 100 / COUNT(Y)",
            "priority": "HIGHEST - Copy formula exactly"
        },
        {
            "pattern": "X > value",
            "meaning": "Use > operator, NOT >=",
            "example": "income > 3000 means > not >=",
            "priority": "HIGH - Exact operator matters"
        },
        {
            "pattern": "between X and Y",
            "meaning": "Use BETWEEN or >= AND <=",
            "example": "age between 20 and 30 → age >= 20 AND age <= 30",
            "priority": "HIGH - Inclusive bounds unless specified"
        }
    ]

    # Define operator mappings
    evidence_patterns["operator_mappings"] = [
        {
            "evidence": "greater than X",
            "sql": "> X",
            "warning": "NOT >= X"
        },
        {
            "evidence": "less than X",
            "sql": "< X",
            "warning": "NOT <= X"
        },
        {
            "evidence": "at least X",
            "sql": ">= X",
            "warning": "Inclusive"
        },
        {
            "evidence": "at most X",
            "sql": "<= X",
            "warning": "Inclusive"
        },
        {
            "evidence": "more than X",
            "sql": "> X",
            "warning": "Exclusive"
        },
        {
            "evidence": "over X",
            "sql": "> X",
            "warning": "Usually exclusive unless context suggests otherwise"
        },
        {
            "evidence": "under X",
            "sql": "< X",
            "warning": "Usually exclusive"
        }
    ]

    # Define formula templates
    evidence_patterns["formula_templates"] = [
        {
            "type": "Percentage",
            "patterns": [
                "X * 100 / Y",
                "CAST(X AS REAL) * 100 / Y",
                "COUNT(CASE WHEN condition THEN 1 END) * 100.0 / COUNT(*)"
            ],
            "note": "Follow evidence formula exactly, including casting and decimals"
        },
        {
            "type": "Ratio",
            "patterns": [
                "X / Y",
                "CAST(X AS REAL) / Y",
                "COUNT(X) / COUNT(Y)"
            ],
            "note": "Cast to REAL if decimal result needed"
        },
        {
            "type": "Difference",
            "patterns": [
                "X - Y",
                "MAX(X) - MIN(X)",
                "(MAX(X) - MIN(X)) * 100 / MAX(X)"
            ],
            "note": "Follow parentheses exactly"
        }
    ]

    # Define column mapping patterns
    evidence_patterns["column_mappings"] = [
        {
            "pattern": "name refers to FirstName, LastName",
            "action": "Return both FirstName and LastName columns",
            "sql": "SELECT FirstName, LastName"
        },
        {
            "pattern": "full name refers to First + Last",
            "action": "Concatenate columns",
            "sql": "SELECT First || ' ' || Last"
        },
        {
            "pattern": "ID refers to identifier",
            "action": "Use identifier column for ID",
            "sql": "Use specified column name"
        },
        {
            "pattern": "X refers to table.column",
            "action": "Use specific table and column",
            "sql": "Use table.column qualification"
        }
    ]

    # Define value format patterns
    evidence_patterns["value_formats"] = [
        {
            "type": "Boolean output",
            "patterns": [
                "'yes'/'no'",
                "'YES'/'NO'",
                "1/0",
                "true/false"
            ],
            "note": "Match exact case and format from evidence"
        },
        {
            "type": "Date formats",
            "patterns": [
                "YYYY-MM-DD",
                "MM/DD/YYYY",
                "strftime format"
            ],
            "note": "Use format specified in evidence"
        },
        {
            "type": "String matching",
            "patterns": [
                "Exact case match",
                "LIKE with wildcards",
                "IN list"
            ],
            "note": "Preserve case exactly as in evidence"
        }
    ]

    # Add critical notes
    evidence_patterns["critical_notes"] = [
        "Evidence ALWAYS overrides general patterns",
        "Copy formulas character-for-character from evidence",
        "Operator choice (> vs >=) must match evidence exactly",
        "Column names from evidence override schema names",
        "When evidence provides a formula, use it even if it seems wrong",
        "Case sensitivity in evidence values must be preserved"
    ]

    # Common evidence patterns to watch for
    evidence_patterns["common_patterns"] = [
        {
            "evidence_text": "refers to",
            "meaning": "Column or concept mapping",
            "action": "Map the reference exactly"
        },
        {
            "evidence_text": "formula:",
            "meaning": "Exact calculation required",
            "action": "Copy formula precisely"
        },
        {
            "evidence_text": "means",
            "meaning": "Definition or equivalence",
            "action": "Use the defined meaning"
        },
        {
            "evidence_text": "calculate as",
            "meaning": "Specific calculation method",
            "action": "Follow calculation exactly"
        }
    ]

    # Write to output file
    output_path = "tool_output/evidence_patterns.json"
    with open(output_path, 'w') as f:
        json.dump(evidence_patterns, f, indent=2)

    print(f"Evidence analysis complete - results in {output_path}")
    print(f"Generated {len(evidence_patterns['parsing_rules'])} parsing rules")
    print(f"Created {len(evidence_patterns['operator_mappings'])} operator mappings")
    print(f"Added {len(evidence_patterns['critical_notes'])} critical notes")

if __name__ == "__main__":
    analyze_evidence_patterns()