#!/usr/bin/env python3
"""
Evidence Validator Tool
Parses evidence hints and validates them against actual database values.
"""

import sqlite3
import json
import os
import re

def validate_evidence(db_path="database.sqlite"):
    """Parse and validate evidence against database."""

    os.makedirs("tool_output", exist_ok=True)

    # Check if evidence file exists
    evidence_file = "evidence.txt"
    if not os.path.exists(evidence_file):
        print("ℹ No evidence.txt file found - skipping evidence validation")
        return

    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()

    evidence_info = {
        "raw_evidence": "",
        "parsed_mappings": [],
        "formula_patterns": [],
        "validated_values": [],
        "column_resolutions": [],
        "sql_translations": []
    }

    try:
        # Read evidence
        with open(evidence_file, 'r') as f:
            evidence_text = f.read()
            evidence_info["raw_evidence"] = evidence_text

        # Parse evidence patterns
        lines = evidence_text.strip().split('\n') if evidence_text else []

        for line in lines:
            if not line.strip():
                continue

            # Pattern 1: column refers to table.column
            refers_pattern = r'(\w+)\s+refers?\s+to\s+([^;,]+)'
            match = re.search(refers_pattern, line, re.IGNORECASE)
            if match:
                evidence_info["parsed_mappings"].append({
                    "evidence_term": match.group(1).strip(),
                    "maps_to": match.group(2).strip(),
                    "type": "column_mapping"
                })

            # Pattern 2: Formula definitions (e.g., percentage = MULTIPLY(...))
            formula_pattern = r'(\w+)\s*=\s*(MULTIPLY|DIVIDE|SUM|COUNT|AVG|MAX|MIN)\s*\(([^)]+)\)'
            match = re.search(formula_pattern, line, re.IGNORECASE)
            if match:
                formula_name = match.group(1).strip()
                operation = match.group(2).upper()
                params = match.group(3).strip()

                # Parse SQL translation
                sql_translation = ""
                if operation == "MULTIPLY":
                    parts = params.split(',')
                    if len(parts) == 2:
                        if "100" in parts[1]:
                            sql_translation = f"({parts[0].strip()} * 100.0)"
                        else:
                            sql_translation = f"({parts[0].strip()} * {parts[1].strip()})"
                elif operation == "DIVIDE":
                    parts = params.split(',')
                    if len(parts) == 2:
                        sql_translation = f"({parts[0].strip()} * 1.0 / {parts[1].strip()})"

                evidence_info["formula_patterns"].append({
                    "name": formula_name,
                    "operation": operation,
                    "parameters": params,
                    "sql_translation": sql_translation
                })

            # Pattern 3: Value mappings (e.g., "low salary" = salary < 30000)
            value_pattern = r'"([^"]+)"\s*(?:refers?\s+to|=|means?)\s*([^;,]+)'
            match = re.search(value_pattern, line)
            if match:
                evidence_info["parsed_mappings"].append({
                    "evidence_term": match.group(1).strip(),
                    "maps_to": match.group(2).strip(),
                    "type": "value_mapping"
                })

            # Pattern 4: Simple conditions (e.g., active = 1, enabled = true)
            condition_pattern = r'(\w+)\s*([<>=!]+)\s*([^;,\s]+)'
            match = re.search(condition_pattern, line)
            if match and match.group(2) in ['=', '!=', '<', '>', '<=', '>=']:
                evidence_info["parsed_mappings"].append({
                    "column": match.group(1).strip(),
                    "operator": match.group(2).strip(),
                    "value": match.group(3).strip().strip("'\""),
                    "type": "condition"
                })

        # Validate mapped columns against actual database
        cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
        tables = [t[0] for t in cursor.fetchall() if not t[0].startswith("sqlite_")]

        table_columns = {}
        for table in tables:
            cursor.execute(f"PRAGMA table_info(`{table}`)")
            columns = cursor.fetchall()
            table_columns[table] = [col[1].lower() for col in columns]

        # Resolve column references
        for mapping in evidence_info["parsed_mappings"]:
            if mapping["type"] == "column_mapping":
                target = mapping["maps_to"]

                # Check if it's already in table.column format
                if '.' in target:
                    parts = target.split('.')
                    if len(parts) == 2:
                        table_name, col_name = parts
                        table_name = table_name.strip().strip('`"')
                        col_name = col_name.strip().strip('`"')

                        # Validate it exists
                        found = False
                        for table in table_columns:
                            if table.lower() == table_name.lower():
                                if col_name.lower() in table_columns[table]:
                                    evidence_info["column_resolutions"].append({
                                        "evidence_term": mapping["evidence_term"],
                                        "resolved_to": f"{table}.{col_name}",
                                        "confidence": "high",
                                        "validated": True
                                    })
                                    found = True
                                    break

                        if not found:
                            # Try to find the column in any table
                            for table in table_columns:
                                if col_name.lower() in table_columns[table]:
                                    evidence_info["column_resolutions"].append({
                                        "evidence_term": mapping["evidence_term"],
                                        "resolved_to": f"{table}.{col_name}",
                                        "confidence": "medium",
                                        "note": "Table name in evidence not found, using best match"
                                    })
                                    break
                else:
                    # Just a column name - find which table(s) have it
                    col_name = target.strip().strip('`"')
                    found_in = []

                    for table in table_columns:
                        if col_name.lower() in table_columns[table]:
                            found_in.append(table)

                    if found_in:
                        evidence_info["column_resolutions"].append({
                            "evidence_term": mapping["evidence_term"],
                            "column": col_name,
                            "found_in_tables": found_in,
                            "recommendation": f"Use {found_in[0]}.{col_name}" if len(found_in) == 1 else f"Disambiguate: column exists in {', '.join(found_in)}"
                        })

        # Validate value conditions
        for mapping in evidence_info["parsed_mappings"]:
            if mapping["type"] == "condition":
                col_name = mapping["column"]
                value = mapping["value"]

                # Find which tables have this column
                for table in table_columns:
                    if col_name.lower() in table_columns[table]:
                        # Try to validate the value exists
                        try:
                            cursor.execute(f"""
                                SELECT COUNT(*) FROM `{table}`
                                WHERE `{col_name}` = ?
                            """, (value,))
                            count = cursor.fetchone()[0]

                            if count > 0:
                                evidence_info["validated_values"].append({
                                    "column": col_name,
                                    "value": value,
                                    "table": table,
                                    "occurrences": count,
                                    "validated": True
                                })
                            else:
                                # Check for case variations
                                cursor.execute(f"""
                                    SELECT DISTINCT `{col_name}` FROM `{table}`
                                    WHERE LOWER(`{col_name}`) = LOWER(?)
                                    LIMIT 5
                                """, (value,))
                                variations = [row[0] for row in cursor.fetchall()]

                                if variations:
                                    evidence_info["validated_values"].append({
                                        "column": col_name,
                                        "requested_value": value,
                                        "actual_values": variations,
                                        "table": table,
                                        "note": "Case mismatch - use actual values",
                                        "validated": False
                                    })
                        except:
                            pass

        # Generate SQL pattern recommendations
        for formula in evidence_info["formula_patterns"]:
            if formula["sql_translation"]:
                evidence_info["sql_translations"].append({
                    "evidence_formula": f"{formula['name']} = {formula['operation']}({formula['parameters']})",
                    "sql_pattern": formula["sql_translation"],
                    "usage_example": f"SELECT {formula['sql_translation']} AS {formula['name']}"
                })

        # Save to file
        with open("tool_output/evidence_validation.json", "w") as f:
            json.dump(evidence_info, f, indent=2)

        print(f"✓ Evidence validation complete: {len(evidence_info['parsed_mappings'])} mappings found")
        print(f"✓ Resolved {len(evidence_info['column_resolutions'])} column references")
        print(f"✓ Validated {len(evidence_info['validated_values'])} value conditions")

    except Exception as e:
        print(f"✗ Evidence validation failed: {str(e)}")
        raise
    finally:
        conn.close()

if __name__ == "__main__":
    validate_evidence()