#!/usr/bin/env python3
"""
Evidence Enforcer
Extracts and enforces exact evidence constraints as mandatory rules.
"""

import json
import os
import re

def extract_evidence_constraints():
    """Extract all constraints from evidence patterns."""
    constraints = {
        'mandatory_rules': [],
        'column_mappings': {},
        'value_constraints': [],
        'calculation_formulas': {},
        'conditional_logic': [],
        'evidence_conflicts': [],
        'enforcement_priority': 'MAXIMUM'
    }

    # Common evidence patterns with strict enforcement
    evidence_patterns = [
        {
            'pattern': r'(\w+)\s+refers to\s+(\w+)\s*=\s*["\']([^"\']+)["\']',
            'type': 'exact_value',
            'template': "WHERE {1} = '{2}'  -- MANDATORY from evidence",
            'priority': 'CRITICAL'
        },
        {
            'pattern': r'(\w+)\s+refers to\s+(\w+)',
            'type': 'column_mapping',
            'template': "Use {1} column when question mentions '{0}'",
            'priority': 'CRITICAL'
        },
        {
            'pattern': r'refers to\s+(\w+)\s*>\s*(\d+)',
            'type': 'comparison',
            'template': "WHERE {0} > {1}  -- MANDATORY from evidence",
            'priority': 'CRITICAL'
        },
        {
            'pattern': r'refers to\s+(\w+)\s*<\s*(\d+)',
            'type': 'comparison',
            'template': "WHERE {0} < {1}  -- MANDATORY from evidence",
            'priority': 'CRITICAL'
        },
        {
            'pattern': r'refers to\s+.*between\s+(\d+)\s+and\s+(\d+)',
            'type': 'range',
            'template': "BETWEEN {0} AND {1}  -- MANDATORY from evidence",
            'priority': 'CRITICAL'
        },
        {
            'pattern': r'count\((\w+)\)\s*>\s*(\d+)',
            'type': 'aggregation_condition',
            'template': "HAVING COUNT({0}) > {1}  -- MANDATORY from evidence",
            'priority': 'CRITICAL'
        },
        {
            'pattern': r'calculation\s*=\s*(.*)',
            'type': 'formula',
            'template': "Use exact formula: {0}",
            'priority': 'CRITICAL'
        }
    ]

    # Evidence keyword mappings (these override everything)
    keyword_mappings = {
        'full name': {
            'pattern': r'full name.*refers to\s+([\w\s,]+)',
            'extraction': 'Extract exact column list',
            'rule': 'Return columns SEPARATELY, not concatenated',
            'example': 'f_name, l_name (as two columns)'
        },
        'nickname': {
            'pattern': r'nickname.*refers to\s+(\w+)',
            'extraction': 'Use exact column name',
            'rule': 'Use the specific nickname column',
            'example': 'nameNick or nick_name'
        },
        'percentage': {
            'pattern': r'percentage\s*=\s*(.*)',
            'extraction': 'Extract exact formula',
            'rule': 'Apply formula EXACTLY as given',
            'example': 'Divide(Count(X), Count(Y)) * 100'
        },
        'rate': {
            'pattern': r'rate.*refers to\s+(.*)',
            'extraction': 'Extract calculation method',
            'rule': 'Use specified rate calculation',
            'example': 'wins / (wins + losses)'
        }
    }

    # Common evidence conflicts and resolutions
    conflict_patterns = [
        {
            'scenario': 'Question says "more than X" but evidence says "more than Y"',
            'resolution': 'ALWAYS use evidence value Y',
            'example': 'Question: "more than 1", Evidence: "> 3" → Use > 3',
            'priority': 'EVIDENCE_WINS'
        },
        {
            'scenario': 'Question implies concatenation but evidence lists separate columns',
            'resolution': 'Return separate columns as per evidence',
            'example': 'Evidence: "full name = f_name, l_name" → Return two columns',
            'priority': 'EVIDENCE_WINS'
        },
        {
            'scenario': 'Question uses different terminology than evidence',
            'resolution': 'Use evidence column names exactly',
            'example': 'Question: "coach name", Evidence: "coachID" → Use coachID',
            'priority': 'EVIDENCE_WINS'
        }
    ]

    # Formula translations (EXACT conversions required)
    formula_translations = {
        'DIVIDE': {
            'pattern': r'DIVIDE\(([^,]+),\s*([^)]+)\)',
            'sql': 'CAST({0} AS REAL) / {1}',
            'note': 'Always cast numerator to REAL'
        },
        'SUBTRACT': {
            'pattern': r'SUBTRACT\(([^,]+),\s*([^)]+)\)',
            'sql': '({0} - {1})',
            'note': 'Simple subtraction'
        },
        'MULTIPLY': {
            'pattern': r'MULTIPLY\(([^,]+),\s*([^)]+)\)',
            'sql': '({0} * {1})',
            'note': 'Simple multiplication'
        },
        'COUNT': {
            'pattern': r'COUNT\(([^)]+)\)',
            'sql': 'COUNT({0})',
            'note': 'Standard count'
        },
        'SUM': {
            'pattern': r'SUM\(([^)]+)\)',
            'sql': 'SUM({0})',
            'note': 'Standard sum'
        },
        'AVG': {
            'pattern': r'AVG\(([^)]+)\)',
            'sql': 'AVG({0})',
            'note': 'Standard average'
        }
    }

    # Build mandatory rules
    constraints['mandatory_rules'] = [
        "1. Evidence constraints are NON-NEGOTIABLE",
        "2. When evidence provides a value/condition, use it EXACTLY",
        "3. Evidence overrides question for WHERE conditions",
        "4. Evidence column mappings must be followed precisely",
        "5. Evidence formulas must be applied without modification",
        "6. If evidence conflicts with intuition, evidence WINS"
    ]

    # Extract column mappings
    constraints['column_mappings'] = {
        'enforcement': 'STRICT',
        'rules': [
            "If evidence says 'X refers to Y', always use Y when X is mentioned",
            "If evidence provides column list (e.g., f_name, l_name), return them separately",
            "Never concatenate columns that evidence lists separately",
            "Use exact case and spelling from evidence"
        ],
        'common_mappings': keyword_mappings
    }

    # Value constraints
    constraints['value_constraints'] = [
        {
            'type': 'exact_match',
            'rule': "When evidence says column = 'value', use exactly that",
            'sql_pattern': "WHERE column = 'exact_value'"
        },
        {
            'type': 'comparison',
            'rule': "When evidence provides >, <, >=, <= conditions, apply exactly",
            'sql_pattern': "WHERE column [operator] evidence_value"
        },
        {
            'type': 'range',
            'rule': "When evidence provides BETWEEN, use exact range",
            'sql_pattern': "WHERE column BETWEEN val1 AND val2"
        },
        {
            'type': 'null_check',
            'rule': "When evidence mentions NULL or empty, check both",
            'sql_pattern': "WHERE column IS NULL OR column = ''"
        }
    ]

    # Calculation formulas
    constraints['calculation_formulas'] = {
        'enforcement': 'EXACT',
        'translations': formula_translations,
        'rules': [
            "Apply formulas EXACTLY as specified in evidence",
            "Preserve operation order from evidence",
            "Use CAST for division to ensure REAL results",
            "Multiply by 100 for percentages ONLY if evidence shows it"
        ]
    }

    # Conditional logic
    constraints['conditional_logic'] = [
        {
            'condition': 'Evidence provides aggregation condition',
            'action': 'Use HAVING clause with exact condition',
            'example': 'HAVING COUNT(x) > evidence_value'
        },
        {
            'condition': 'Evidence specifies DISTINCT',
            'action': 'Apply DISTINCT exactly where indicated',
            'example': 'COUNT(DISTINCT column) if evidence says so'
        },
        {
            'condition': 'Evidence mentions GROUP BY',
            'action': 'Group by specified columns',
            'example': 'GROUP BY columns_from_evidence'
        }
    ]

    # Document conflicts
    constraints['evidence_conflicts'] = conflict_patterns

    # Priority enforcement
    constraints['enforcement_levels'] = {
        'CRITICAL': 'Must be applied exactly as specified',
        'HIGH': 'Should be applied unless technically impossible',
        'MEDIUM': 'Apply when relevant to query',
        'LOW': 'Consider as guidance'
    }

    # Special cases
    constraints['special_cases'] = [
        {
            'case': 'Percentage calculations',
            'evidence_pattern': 'percentage = formula',
            'enforcement': 'Use EXACT formula, including * 100 if shown',
            'sql_template': 'CAST(numerator AS REAL) * 100 / denominator'
        },
        {
            'case': 'Full name handling',
            'evidence_pattern': 'full name = col1, col2',
            'enforcement': 'Return as SEPARATE columns',
            'sql_template': 'SELECT col1, col2  -- NOT concatenated'
        },
        {
            'case': 'Count conditions',
            'evidence_pattern': 'count(x) > n',
            'enforcement': 'Use in HAVING clause',
            'sql_template': 'GROUP BY ... HAVING COUNT(x) > n'
        }
    ]

    return constraints

def main():
    output_dir = "./tool_output"
    os.makedirs(output_dir, exist_ok=True)

    # Extract evidence constraints
    constraints = extract_evidence_constraints()

    # Save results
    output_path = os.path.join(output_dir, "evidence_constraints.json")
    with open(output_path, 'w') as f:
        json.dump(constraints, f, indent=2)

    print("Evidence enforcement analysis complete")
    print("ENFORCEMENT LEVEL: MAXIMUM")
    print(f"Generated {len(constraints['mandatory_rules'])} mandatory rules")
    print(f"Generated {len(constraints['value_constraints'])} value constraints")
    print(f"Results saved to {output_path}")
    print("\nREMEMBER: Evidence constraints are NON-NEGOTIABLE")

if __name__ == "__main__":
    main()