"""
This module defines the core abstractions for representing mathematical concepts, formulas, and entities.

It provides:

Expression Framework:
- Base Expression class and Context for variable tracking
- Propositional logic (And, Or, Not, Implies)
- First-order logic quantifiers (Forall, Exists)
- Set theory operations and predicates
- Natural number arithmetic
- Lambda calculus and function application
- Equality relations

Entity Framework:
- Example management system for tracking and validating examples/nonexamples
- Base Entity class for mathematical objects with both symbolic and computational aspects
- Concept class for representing mathematical objects, properties, functions, and relations
- Conjecture class for representing mathematical claims and hypotheses
- Type system for mathematical objects (numeric, sets, groups, functions)

Each expression type supports translation to multiple target languages:
- Lean 4 theorem prover
- Prolog logic programming
- Z3 SMT solver

Each mathematical entity supports:
- Symbolic definitions using the expression framework
- Computational implementations for concrete evaluation
- Translation to multiple target languages (Lean 4, Prolog, Z3)
- Example management with automatic validation
- Metadata and documentation

The expressions and entities defined here form the foundation for defining and manipulating
mathematical concepts in the knowledge base.
"""

from abc import ABC, abstractmethod
from enum import Enum
from functools import total_ordering
import re
import os
from typing import List, Union, Optional, Any, Tuple, Callable, Set, Literal, Dict
from contextlib import contextmanager
from frame.provers.base_prover import ProofResult
from frame.provers.proof import Proof
from concurrent.futures import ThreadPoolExecutor, TimeoutError as FuturesTimeoutError
import typing
from frame.utils.logging import logger
from frame.tools.z3_template import Z3Template


# Default timeout for example verification (in seconds)
# This can be overridden by the configuration
DEFAULT_EXAMPLE_VERIFICATION_TIMEOUT = 0.01


# Global setting for example verification timeout
# This will be set from configuration if available
_example_verification_timeout = DEFAULT_EXAMPLE_VERIFICATION_TIMEOUT

def set_example_verification_timeout(timeout_value: float):
    """Set the global timeout value for example verification.
    
    Args:
        timeout_value: The timeout value in seconds
    """
    global _example_verification_timeout
    _example_verification_timeout = timeout_value
    logger.info(f"Example verification timeout set to {_example_verification_timeout} seconds")

# Export these for use by other modules
__all__ = ['Concept', 'Conjecture', 'Theorem', 'Entity', 'ConceptType', 
           'DEFAULT_EXAMPLE_VERIFICATION_TIMEOUT', 'set_example_verification_timeout']

# =============================================================================
# Base Class for All Expressions
# =============================================================================


class Expression(ABC):
    @abstractmethod
    def to_lean4(self) -> str:
        """Return a Lean 4 representation."""
        pass

    @abstractmethod
    def to_prolog(self) -> str:
        """Return a Prolog representation."""
        pass

    @abstractmethod
    def to_z3(self) -> Z3Template:
        """Return a Z3 (Python API or SMT-LIB) representation."""
        pass


class Var(Expression):
    """A variable."""

    def __init__(self, name: str):
        self.name = name

    def to_lean4(self) -> str:
        return self.name

    def to_prolog(self) -> str:
        # In Prolog, atoms are lowercase
        return self.name.lower()

    def to_z3(self) -> Z3Template:
        raise NotImplementedError("to_z3 is not yet implemented")


class ConceptApplication(Expression):
    """Represents application of a mathematical concept"""

    def __init__(self, concept: "Entity", *args):
        self.concept = concept
        self.args = args

    def _filter_example_type(self, arg: Any) -> Any:
        """Filter out ExampleType values from arguments"""
        if isinstance(arg, ExampleType):
            return None  # Skip ExampleType values
        return arg

    def to_lean4(self) -> str:
        raise NotImplementedError("to_lean4 is not yet implemented")

    def to_prolog(self) -> str:
        raise NotImplementedError("to_prolog is not yet implemented")

    def to_z3(self) -> Z3Template:
        raise NotImplementedError("to_z3 is not yet implemented")


# =============================================================================
# Propositional Logic Classes
# =============================================================================


class PropVar(Var):
    """A propositional variable (atomic proposition)."""

    def __init__(self, name: str):
        super().__init__(name)

    def to_lean4(self) -> str:
        return self.name

    def to_prolog(self) -> str:
        # In Prolog, atoms are lowercase
        return self.name.lower()

    def to_z3(self) -> Z3Template:
        return NotImplementedError("to_z3 is not yet implemented")


class Not(Expression):
    def __init__(self, expr: Expression):
        self.expr = expr

    def to_lean4(self) -> str:
        return f"(¬ {self.expr.to_lean4()})"

    def to_prolog(self) -> str:
        return f"not({self.expr.to_prolog()})"

    def to_z3(self) -> Z3Template:
        return NotImplementedError("to_z3 is not yet implemented")


class And(Expression):
    def __init__(self, left: Expression, right: Expression):
        self.left = left
        self.right = right

    def to_lean4(self) -> str:
        left = self.left.to_lean4()
        right = self.right.to_lean4()
        return f"({left} ∧ {right})"

    def to_prolog(self) -> str:
        left = self.left.to_prolog()
        right = self.right.to_prolog()
        return f"({left}, {right})"

    def to_z3(self) -> Z3Template:
        return NotImplementedError("to_z3 is not yet implemented")


class Or(Expression):
    def __init__(self, left: Expression, right: Expression):
        self.left = left
        self.right = right

    def to_lean4(self) -> str:
        return f"({self.left.to_lean4()} ∨ {self.right.to_lean4()})"

    def to_prolog(self) -> str:
        return f"({self.left.to_prolog()}; {self.right.to_prolog()})"

    def to_z3(self) -> Z3Template:
        return NotImplementedError("to_z3 is not yet implemented")


class Implies(Expression):
    def __init__(self, antecedent: Expression, consequent: Expression):
        self.antecedent = antecedent
        self.consequent = consequent

    def to_lean4(self) -> str:
        return f"({self.antecedent.to_lean4()} → {self.consequent.to_lean4()})"

    def to_prolog(self) -> str:
        # Note: Prolog does not have a direct "implies" operator.
        return f"implies({self.antecedent.to_prolog()}, {self.consequent.to_prolog()})"

    def to_z3(self) -> Z3Template:
        return NotImplementedError("to_z3 is not yet implemented")


# =============================================================================
# Quantifiers
# =============================================================================


class Forall(Expression):
    def __init__(self, var: str, iterable: Expression, body: Expression):
        """
        var: variable name (as a string)
        iterable: an expression denoting the iterable (domain, set, etc.) of the variable
        body: the predicate in which var appears
        """
        self.var = var
        self.iterable = iterable
        self.body = body

    def to_lean4(self) -> str:
        iterable = self.iterable.to_lean4()
        body = self.body.to_lean4()
        return f"(∀ {self.var} : {iterable}, {body})"

    def to_prolog(self) -> str:
        iterable = self.iterable.to_prolog()
        body = self.body.to_prolog()
        return f"forall({self.var}, {iterable}, {body})"

    def to_z3(self) -> Z3Template:
        return NotImplementedError("to_z3 is not yet implemented")


class Exists(Expression):
    def __init__(self, var: str, domain: Expression, body: Expression):
        self.var = var
        self.domain = domain
        self.body = body

    def to_lean4(self) -> str:
        domain = self.domain.to_lean4()
        body = self.body.to_lean4()
        return f"(∃ {self.var} : {domain}, {body})"

    def to_prolog(self) -> str:
        domain = self.domain.to_prolog()
        body = self.body.to_prolog()
        return f"exists({self.var}, {domain}, {body})"

    def to_z3(self) -> Z3Template:
        return NotImplementedError("to_z3 is not yet implemented")


# =============================================================================
# Set Theory Classes
# =============================================================================


class Set(Expression):
    """
    Represents a set with a specified element type.
    Can be defined either by listing elements or by a predicate.
    """

    def __init__(
        self,
        domain: Expression,
        elements: Optional[
            List[Expression]
        ] = None,  # TODO: Change to Set[Expression] but Set is defined twice (once here and once in typing)
        predicate: Optional[Expression] = None,
    ):
        if elements is None and predicate is None:
            raise ValueError("Must provide either elements or predicate")
        if elements is not None and predicate is not None:
            raise ValueError("Cannot provide both elements and predicate")
        self.domain = domain
        self.elements = elements
        self.predicate = predicate

    def to_lean4(self) -> str:
        if self.elements is not None:
            elems = ", ".join(e.to_lean4() for e in self.elements)
            return f"{{" + elems + "}}"
        else:
            # Use Lean's set comprehension syntax
            var = self.predicate.var_name
            body = self.predicate.body.to_lean4()
            return f"{{x : {self.domain.to_lean4()} | {body}}}"

    def to_prolog(self) -> str:
        if self.elements is not None:
            elems = ", ".join(e.to_prolog() for e in self.elements)
            return f"[{elems}]"
        else:
            var = self.predicate.var_name
            body = self.predicate.body.to_prolog()
            return f"findall(X, ({body}), Set)"

    def to_z3(self) -> str:
        return NotImplementedError("to_z3 is not yet implemented")

    def evaluate(self) -> Optional[set]:
        """
        Attempts to evaluate the set to a concrete Python set.
        Returns None if the set cannot be evaluated (e.g., symbolic variables).
        """
        if self.elements is not None:
            try:
                # If elements are already Python objects (like integers), return them directly
                if all(not hasattr(e, "evaluate") for e in self.elements):
                    return set(self.elements)
                # Otherwise try to evaluate each element
                return {e.evaluate() for e in self.elements}
            except AttributeError:
                return None
        elif isinstance(self.domain, NatDomain):
            # For natural numbers, we can try to evaluate up to some bound
            raise NotImplementedError(
                "Evaluation of predicate-defined sets requires a proper bounding strategy"
            )
        return None


class SetDomain(Expression):
    """Represents the domain/type of sets"""

    def to_lean4(self) -> str:
        return "Set"

    def to_prolog(self) -> str:
        return "set"

    def to_z3(self) -> str:
        return NotImplementedError("to_z3 is not yet implemented")


class TupleDomain(Expression):
    """
    Represents a product domain for a tuple of quantified variables.
    For example, in Lean: D1 × D2 × ...
    """

    def __init__(self, domains: Tuple["Entity", ...]):
        self.domains = domains

    def to_lean4(self, ctx: Optional[Any] = None) -> str:
        return " × ".join(d.to_lean4() for d in self.domains)

    def to_prolog(self, ctx: Optional[Any] = None) -> str:
        return f"tuple_domain({', '.join(d.to_prolog() for d in self.domains)})"

    def to_z3(self, ctx: Optional[Any] = None) -> str:
        return NotImplementedError("to_z3 is not yet implemented")


class In(Expression):
    """Represents set membership (x ∈ A)"""

    def __init__(self, element: Expression, set_expr: Expression):
        self.element = element
        self.set_expr = set_expr

    def to_lean4(self) -> str:
        return f"({self.element.to_lean4()} ∈ {self.set_expr.to_lean4()})"

    def to_prolog(self) -> str:
        return f"member({self.element.to_prolog()}, {self.set_expr.to_prolog()})"

    def to_z3(self) -> str:
        return NotImplementedError("to_z3 is not yet implemented")


class SetCardinality(Expression):
    """Represents the cardinality of a set"""

    def __init__(self, set_expr: Expression):
        if not isinstance(set_expr, (Set, ConceptApplication)):
            raise TypeError(
                "SetCardinality can only be applied to set-valued expressions"
            )
        self.set = set_expr

    def to_lean4(self) -> str:
        set_str = self.set.to_lean4()
        return f"(Finset.card {set_str})"

    def to_prolog(self) -> str:
        set_str = self.set.to_prolog()
        return f"length({set_str}, Len)"

    def to_z3(self) -> str:
        return NotImplementedError("to_z3 is not yet implemented")

    def evaluate(self) -> Optional[int]:
        """
        Attempts to evaluate the cardinality for concrete cases.
        Returns:
        - The number of elements for concrete sets
        - None if the cardinality cannot be determined (e.g., symbolic sets or concept applications)
        """
        if isinstance(self.set, Set) and self.set.elements is not None:
            try:
                return len(self.set.elements)
            except AttributeError:
                return None
        elif isinstance(self.set, ConceptApplication):
            try:
                # Try to compute the concept and get the length of its result if possible
                result = self.set.concept.compute(
                    *[arg.evaluate() for arg in self.set.args]
                )
                if isinstance(result, set):
                    return len(result)
            except (AttributeError, TypeError):
                pass
        return None


class SetSum(Expression):
    """Represents the sum of elements in a set-valued expression"""

    def __init__(self, set_expr: Expression):
        if not isinstance(set_expr, (Set, ConceptApplication)):
            raise TypeError("SetSum can only be applied to set-valued expressions")
        self.set_expr = set_expr

    def to_lean4(self) -> str:
        set_str = self.set_expr.to_lean4()
        return f"(Finset.sum {set_str} id)"

    def to_prolog(self) -> str:
        set_str = self.set_expr.to_prolog()
        return f"sum_list({set_str}, Sum)"

    def to_z3(self) -> str:
        return NotImplementedError("to_z3 is not yet implemented")

    def evaluate(self) -> Optional[int]:
        """
        Attempts to evaluate the sum for concrete cases.
        Returns:
        - The sum of elements for concrete sets
        - None if the sum cannot be determined (e.g., symbolic sets or concept applications)
        """
        if isinstance(self.set_expr, Set) and self.set_expr.elements is not None:
            try:
                return sum(e.evaluate() for e in self.set_expr.elements)
            except AttributeError:
                return None
        elif isinstance(self.set_expr, ConceptApplication):
            try:
                # Try to compute the concept and sum its result if possible
                result = self.set_expr.concept.compute(
                    *[arg.evaluate() for arg in self.set_expr.args]
                )
                if isinstance(result, set):
                    return sum(result)
            except (AttributeError, TypeError):
                pass
        return None


# =============================================================================
# Natural Numbers Classes
# =============================================================================


class Zero(Expression):
    def to_lean4(self) -> str:
        return "0"

    def to_prolog(self) -> str:
        return "zero"

    def to_z3(self) -> Z3Template:
        return "0"


class Succ(Expression):
    def __init__(self, n: Expression):
        self.n = n

    def to_lean4(self) -> str:
        return f"(succ {self.n.to_lean4()})"

    def to_prolog(self) -> str:
        return f"succ({self.n.to_prolog()})"

    def to_z3(self) -> Z3Template:
        return NotImplementedError("to_z3 is not yet implemented")


class Nat(Expression):
    """A natural number"""

    def __init__(self, value: int):
        if not isinstance(value, int) or value < 0:
            raise ValueError(f"Natural numbers must be non-negative integers, got {value} which has type {type(value)}")
        self.value = value

    def to_lean4(self) -> str:
        return str(self.value)  # Direct number representation

    def to_prolog(self) -> str:
        return str(self.value)

    def to_z3(self) -> Z3Template:
        return f"{self.value}"

    def evaluate(self) -> int:
        return self.value


class NatDomain(Expression):
    """Represents the domain/type of natural numbers"""

    def to_lean4(self) -> str:
        return "ℕ"  # Unicode for natural numbers

    def to_prolog(self) -> str:
        return "nat"

    def to_z3(self) -> str:
        return "NatSet"  # Z3 doesn't have built-in naturals, we use integers


class GroupDomain(Expression):
    """Represents the domain/type of groups"""

    def to_lean4(self) -> str:
        return "Group"

    def to_prolog(self) -> str:
        return "Unimplemented"

    def to_z3(self) -> str:
        return NotImplementedError("to_z3 is not yet implemented")


class GroupElementDomain(Expression):
    """Represents the domain/type of group elements"""

    def to_lean4(self) -> str:
        return "G"  # In Lean, groups are typically defined over a type 'G'

    def to_prolog(self) -> str:
        return "group_element"  # A clear Prolog atom for group elements

    def to_z3(self) -> str:
        # In Z3, group elements can be modeled as an uninterpreted sort 'G'
        return NotImplementedError("to_z3 is not yet implemented")


class TupleDomain(SetDomain):
    """
    Represents a product domain for a tuple of quantified variables.
    For example, in Lean: D1 × D2 × ...
    """

    def __init__(self, domains: "Union[List[Entity], Tuple[Entity, ...]]"):
        # Convert to tuple to communicate immutability
        self.domains = tuple(domains)

    def to_lean4(self) -> str:
        return " × ".join(d.to_lean4() for d in self.domains)

    def to_prolog(self) -> str:
        return f"tuple_domain({', '.join(d.to_prolog() for d in self.domains)})"

    def to_z3(self) -> str:
        return NotImplementedError("to_z3 is not yet implemented")


class TupleExpr(Expression):
    """
    Represents a tuple of expressions.
    For example: (x₁, x₂, ..., xₙ)
    """

    def __init__(self, elements: "Union[List[Expression], Tuple[Expression, ...]]"):
        # Convert to tuple to communicate immutability
        self.elements = tuple(elements)

    def to_lean4(self) -> str:
        # In Lean4, tuples are represented using parentheses and commas
        elements_str = ", ".join(e.to_lean4() for e in self.elements)
        return f"({elements_str})"

    def to_prolog(self) -> str:
        # In Prolog, tuples are represented using parentheses and commas
        elements_str = ", ".join(e.to_prolog() for e in self.elements)
        return f"({elements_str})"

    def to_z3(self) -> str:
        return NotImplementedError("to_z3 is not yet implemented")


# =============================================================================
# Function Classes
# =============================================================================


class Lambda(Expression):
    """Represents a lambda/anonymous function"""

    def __init__(self, var_name: str, body: Expression):
        self.var_name = var_name
        self.body = body

    def to_lean4(self) -> str:
        body = self.body.to_lean4()
        return f"(λ {self.var_name} => {body})"

    def to_prolog(self) -> str:
        body = self.body.to_prolog()
        return f"lambda({self.var_name}, {body})"

    def to_z3(self) -> str:
        return NotImplementedError("to_z3 is not yet implemented")


class Apply(Expression):
    """Represents function application"""

    def __init__(self, fn: Expression, arg: Expression):
        self.fn = fn
        self.arg = arg

    def to_lean4(self) -> str:
        return f"({self.fn.to_lean4()} {self.arg.to_lean4()})"

    def to_prolog(self) -> str:
        return f"apply({self.fn.to_prolog()}, {self.arg.to_prolog()})"

    def to_z3(self) -> str:
        return NotImplementedError("to_z3 is not yet implemented")


class Equals(Expression):
    """Represents mathematical equality between two expressions"""

    def __init__(self, left: Expression, right: Expression):
        self.left = left
        self.right = right

    def to_lean4(self) -> str:
        left = self.left.to_lean4()
        right = self.right.to_lean4()
        return f"({left} = {right})"

    def to_prolog(self) -> str:
        left = self.left.to_prolog()
        right = self.right.to_prolog()
        return f"equals({left}, {right})"

    def to_z3(self) -> Z3Template:
        return NotImplementedError("to_z3 is not yet implemented")

    def evaluate(self) -> Optional[bool]:
        """
        Attempts to evaluate equality for concrete cases.
        Returns:
        - True/False for concrete cases (Nat and explicit Sets)
        - None if equality cannot be determined (e.g., symbolic expressions or predicate-defined sets)
        """
        try:
            # Handle Nat equality
            if isinstance(self.left, Nat) and isinstance(self.right, Nat):
                return self.left.evaluate() == self.right.evaluate()

            # Handle explicit Set equality
            if (
                isinstance(self.left, Set)
                and isinstance(self.right, Set)
                and self.left.elements is not None
                and self.right.elements is not None
            ):
                left_vals = self.left.evaluate()
                right_vals = self.right.evaluate()
                if left_vals is not None and right_vals is not None:
                    return left_vals == right_vals

            return None  # Cannot evaluate equality
        except AttributeError:
            return None


class Fold(Expression):
    """
    Represents folding/iteration over natural numbers.
    fold(n, base, f) represents applying f n times starting with base.
    For example:
    - Addition(a,b) = fold(b, a, succ)  # Add b to a by applying succ b times
    - Multiplication(a,b) = fold(b, 0, λx.add(a,x))  # Add a to itself b times
    """

    def __init__(self, count: Expression, base: Expression, step_fn: Expression):
        self.count = count  # How many times to apply step_fn
        self.base = base  # Starting value
        self.step_fn = step_fn  # Function to apply at each step

    def to_lean4(self) -> str:
        return f"(fold {self.count.to_lean4()} {self.base.to_lean4()} {self.step_fn.to_lean4()})"

    def to_prolog(self) -> str:
        return f"fold({self.count.to_prolog()}, {self.base.to_prolog()}, {self.step_fn.to_prolog()})"

    def to_z3(self) -> str:
        # Z3 doesn't have direct fold, we'd need to use recursion/quantifiers
        return NotImplementedError("to_z3 is not yet implemented")


# =============================================================================
# Group Classes
# =============================================================================


class GroupElement(Expression):
    """Represents a group element, wrapping any value and providing a consistent interface."""

    def __init__(self, value: Any):
        """
        Initialize a group element with any value.

        Parameters:
        - value: The underlying value of the group element (e.g., integer, permutation, matrix)
        """
        self.value = value

    def evaluate(self) -> Any:
        """Returns the underlying value of the group element."""
        return self.value

    def to_lean4(self) -> str:
        """Convert the group element to Lean 4 syntax."""
        if hasattr(self.value, "to_lean4"):
            return self.value.to_lean4()
        return str(self.value)

    def to_prolog(self) -> str:
        """Convert the group element to Prolog syntax."""
        if hasattr(self.value, "to_prolog"):
            return self.value.to_prolog()
        return str(self.value)

    def to_z3(self) -> str:
        """Convert the group element to Z3 syntax."""
        return NotImplementedError("to_z3 is not yet implemented")

    def __eq__(self, other: Any) -> bool:
        """Two group elements are equal if their underlying values are equal."""
        if not isinstance(other, GroupElement):
            return NotImplemented
        return self.value == other.value

    def __hash__(self) -> int:
        """Hash based on the underlying value."""
        return hash(self.value)


class GroupElementDomain(Expression):
    """Represents the domain/type of group elements"""

    def to_lean4(self) -> str:
        return "G"  # In Lean4, group elements are of type G

    def to_prolog(self) -> str:
        return "G"  # In Prolog, group elements are of type G

    def to_z3(self) -> str:
        return NotImplementedError("to_z3 is not yet implemented")


class SetExpr(Expression):
    """A set expression that can be evaluated to a Python set."""

    def __init__(
        self,
        domain: Expression,
        elements: Optional[List[Expression]] = None,
        predicate: Optional[Expression] = None,
    ):
        self.domain = domain
        self.elements = elements
        self.predicate = predicate

    def to_lean4(self) -> str:
        if self.elements is not None:
            return f"{{{', '.join(e.to_lean4() for e in self.elements)}}}"
        elif self.predicate is not None:
            return f"{{x : {self.domain.to_lean4()} | {self.predicate.to_lean4()}}}"
        else:
            return f"{{x : {self.domain.to_lean4()}}}"

    def to_prolog(self) -> str:
        if self.elements is not None:
            return f"[{', '.join(e.to_prolog() for e in self.elements)}]"
        elif self.predicate is not None:
            return f"{{X : {self.domain.to_prolog()} | {self.predicate.to_prolog()}}}"
        else:
            return f"{{X : {self.domain.to_prolog()}}}"

    def to_z3(self) -> str:
        return NotImplementedError("to_z3 is not yet implemented")

    def evaluate(self) -> Optional[set]:
        """Evaluate the set expression to a Python set."""
        if self.elements is not None:
            return {e.evaluate() for e in self.elements}
        elif self.predicate is not None:
            # For now, we can't evaluate predicate-based sets
            return None
        else:
            # For now, we can't evaluate domain-based sets
            return None


class Group(Expression):
    """
    Represents a group structure with a carrier (set), a binary operation, an identity element,
    and an inverse function. Optionally, the group axioms can be checked if the carrier is concrete.
    """

    def __init__(
        self,
        carrier: Set,
        op: Union[
            Callable[[GroupElement, GroupElement], GroupElement],
            Dict[Tuple[GroupElement, GroupElement], GroupElement],
        ],  # can be a function or a multiplication table
        identity: GroupElement,
        inverse: Callable[[GroupElement], GroupElement],
        check_axioms: bool = False,
    ):
        """
        Initializes a Group instance.

        Parameters:
        - carrier: A Set instance representing the group's carrier.
        - op: A binary operation (can be a function or symbolic Expression)
              representing the group operation.
        - identity: The identity element.
        - inverse: A function (or symbolic Expression) that returns the inverse of a given element.
        - check_axioms: If True and the carrier is concrete (has elements),
                        perform a computational check of the group axioms.
        """
        self.carrier = carrier
        self.op = op
        self.identity = identity
        self.inverse = inverse

        # Optionally check the axioms if the carrier has concrete elements.
        if check_axioms and self.carrier.elements is not None:
            if not self._check_axioms():
                raise ValueError(
                    "The provided structure does not satisfy the group axioms."
                )

    def _check_axioms(self) -> bool:
        """
        Performs a computational check of the group axioms on the concrete carrier.
        Returns True if all axioms hold, otherwise False.
        Note: This method only works if self.carrier.elements is a concrete iterable.
        """
        # For brevity, we assume self.carrier.elements is a Python set or list.
        elems = self.carrier.elements
        if elems is None:
            raise ValueError("Carrier elements are not concrete; cannot check axioms.")

        # Check closure: For all a, b in carrier, op(a, b) must be in carrier.
        for a in elems:
            for b in elems:
                try:
                    result = self.op(a, b)
                except Exception as e:
                    raise ValueError(f"Error applying the group operation: {e}")
                if result not in elems:
                    return False

        # Check associativity: For all a, b, c in carrier, op(op(a, b), c) == op(a, op(b, c))
        for a in elems:
            for b in elems:
                for c in elems:
                    if self.op(self.op(a, b), c) != self.op(a, self.op(b, c)):
                        return False

        # Check identity: identity must be in carrier and op(identity, a) == a == op(a, identity)
        if self.identity not in elems:
            return False
        for a in elems:
            if self.op(self.identity, a) != a or self.op(a, self.identity) != a:
                return False

        # Check inverses: For each a in carrier, its inverse must be in carrier and op(a, inv(a)) == identity.
        for a in elems:
            inv_a = self.inverse(a)
            if inv_a not in elems:
                return False
            if self.op(a, inv_a) != self.identity or self.op(inv_a, a) != self.identity:
                return False

        return True

    def to_lean4(self) -> str:
        return (
            f"Group({self.carrier.to_lean4()}, "
            f"{self.op.to_lean4() if hasattr(self.op, 'to_lean4') else self.op}, "
            f"{self.identity.to_lean4() if hasattr(self.identity, 'to_lean4') else self.identity}, "
            f"{self.inverse.to_lean4() if hasattr(self.inverse, 'to_lean4') else self.inverse})"
        )

    def to_prolog(self) -> str:
        return (
            f"group({self.carrier.to_prolog()}, "
            f"{self.op.to_prolog() if hasattr(self.op, 'to_prolog') else self.op}, "
            f"{self.identity.to_prolog() if hasattr(self.identity, 'to_prolog') else self.identity}, "
            f"{self.inverse.to_prolog() if hasattr(self.inverse, 'to_prolog') else self.inverse})"
        )

    def to_z3(self) -> str:
        return (
            f"(group {self.carrier.to_z3()} "
            f"{self.op.to_z3() if hasattr(self.op, 'to_z3') else self.op} "
            f"{self.identity.to_z3() if hasattr(self.identity, 'to_z3') else self.identity} "
            f"{self.inverse.to_z3() if hasattr(self.inverse, 'to_z3') else self.inverse})"
        )

    def evaluate(self) -> Any:
        """
        In a concrete setting, this could return a tuple representing the group structure.
        """
        return (
            self.carrier.evaluate(),
            self.op,
            self.identity.evaluate(),
            self.inverse,
        )


class CyclicGroup(Group):
    """Base class for cyclic groups of order n."""

    def __init__(self, n: int):
        """
        Initialize a cyclic group of order n.

        Args:
            n: The order of the group (number of elements)
        """
        carrier = SetExpr(GroupElementDomain(), {GroupElement(i) for i in range(n)})
        super().__init__(
            carrier=carrier,
            op=lambda a, b: GroupElement((a.evaluate() + b.evaluate()) % n),
            identity=GroupElement(0),
            inverse=lambda a: GroupElement((n - a.evaluate()) % n),
            check_axioms=True,
        )
        self.order = n


class Z2(CyclicGroup):
    """The cyclic group of order 2, also known as Z₂ or C₂.

    This is the smallest non-trivial group, with elements {0,1} where:
    - 0 is the identity element
    - 1 is the generator
    - The operation is addition modulo 2
    - Each element is its own inverse
    This group is isomorphic to the multiplicative group {1,-1} and the
    symmetric group S₂.
    """

    def __init__(self):
        super().__init__(2)


class Z3(CyclicGroup):
    """The cyclic group of order 3, also known as Z₃ or C₃.

    Elements are {0,1,2} with addition modulo 3.
    """

    def __init__(self):
        super().__init__(3)


class Z4(CyclicGroup):
    """The cyclic group of order 4, also known as Z₄ or C₄.

    Elements are {0,1,2,3} with addition modulo 4.
    """

    def __init__(self):
        super().__init__(4)


class KleinFourGroup(Group):
    """The Klein four-group, also known as V₄ or Z₂ × Z₂.

    This is a group with four elements represented as 2x2 diagonal matrices:
    - Identity: [[1,0],[0,1]]
    - a: [[1,0],[0,-1]]
    - b: [[-1,0],[0,1]]
    - c: [[-1,0],[0,-1]]

    It is the smallest non-cyclic group and is isomorphic to Z₂ × Z₂.
    """

    def __init__(self):
        # Define the elements as 2x2 diagonal matrices
        elements = {
            GroupElement([[1, 0], [0, 1]]),  # identity
            GroupElement([[1, 0], [0, -1]]),  # a
            GroupElement([[-1, 0], [0, 1]]),  # b
            GroupElement([[-1, 0], [0, -1]]),  # c
        }

        carrier = SetExpr(GroupElementDomain(), elements)

        def matrix_multiply(a: GroupElement, b: GroupElement) -> GroupElement:
            """Multiply two 2x2 diagonal matrices."""
            a_val = a.evaluate()
            b_val = b.evaluate()
            return GroupElement(
                [
                    [
                        a_val[0][0] * b_val[0][0] + a_val[0][1] * b_val[1][0],
                        a_val[0][0] * b_val[0][1] + a_val[0][1] * b_val[1][1],
                    ],
                    [
                        a_val[1][0] * b_val[0][0] + a_val[1][1] * b_val[1][0],
                        a_val[1][0] * b_val[0][1] + a_val[1][1] * b_val[1][1],
                    ],
                ]
            )

        super().__init__(
            carrier=carrier,
            op=matrix_multiply,
            identity=GroupElement([[1, 0], [0, 1]]),
            inverse=lambda a: a,  # In Klein four-group, each element is its own inverse
            check_axioms=True,
        )
        self.order = 4


class SymmetricGroup(Group):
    """Base class for symmetric groups."""

    def __init__(self, n: int):
        """
        Initialize the symmetric group Sₙ on n elements.

        Args:
            n: The number of elements being permuted
        """

        # Generate all permutations of [0,1,...,n-1]
        def generate_permutations(n: int) -> typing.Set[GroupElement]:
            if n == 0:
                return {()}
            perms = set()
            for p in generate_permutations(n - 1):
                for i in range(n):
                    new_p = list(p)
                    new_p.insert(i, n - 1)
                    perms.add(tuple(new_p))
            return perms

        elements = {GroupElement(p) for p in generate_permutations(n)}
        carrier = SetExpr(GroupElementDomain(), elements)

        def compose(p: GroupElement, q: GroupElement) -> GroupElement:
            """Compose two permutations."""
            p_val = p.evaluate()
            q_val = q.evaluate()
            return GroupElement(tuple(p_val[i] for i in q_val))

        def inverse(p: GroupElement) -> GroupElement:
            """Find the inverse of a permutation."""
            p_val = p.evaluate()
            result = [0] * len(p_val)
            for i, j in enumerate(p_val):
                result[j] = i
            return GroupElement(tuple(result))

        super().__init__(
            carrier=carrier,
            op=compose,
            identity=GroupElement(tuple(range(n))),
            inverse=inverse,
            check_axioms=True,
        )
        self.order = n


class S3(SymmetricGroup):
    """The symmetric group on 3 elements, S₃.

    This is the smallest non-abelian group, with 6 elements:
    - Identity: (0,1,2)
    - Transpositions: (1,0,2), (2,1,0), (0,2,1)
    - 3-cycles: (1,2,0), (2,0,1)
    """

    def __init__(self):
        super().__init__(3)


class GroupDomain(Expression):
    """Represents the domain/type of groups"""

    def to_lean4(self) -> str:
        return "Group"

    def to_prolog(self) -> str:
        return "Unimplemented"

    def to_z3(self) -> str:
        return "Unimplemented"


class FunctionDomain(Expression):
    """Represents the domain/type of functions from one set to another"""

    def __init__(self, domain: Expression, codomain: Expression):
        self.domain = domain
        self.codomain = codomain

    def to_lean4(self) -> str:
        return f"{self.domain.to_lean4()} → {self.codomain.to_lean4()}"

    def to_prolog(self) -> str:
        return f"function({self.domain.to_prolog()}, {self.codomain.to_prolog()})"

    def to_z3(self) -> str:
        return f"(Array {self.domain.to_z3()} {self.codomain.to_z3()})"


# =============================================================================
# Example Management Classes
# =============================================================================


class ExampleType(Enum):
    """Types of mathematical objects that can be examples"""

    NUMERIC = "numeric"
    SET = "set"
    GROUP = "group"
    FUNCTION = "function"
    GROUPELEMENT = "group_element"
    CONSTANT = "constant"
    ANY = "ANY"

    @staticmethod
    def is_tuple_type(*types: "ExampleType") -> "ExampleType":
        """Create a tuple type from component types"""
        return tuple(types)


class ConceptType(Enum):
    """Types of mathematical concepts"""

    RELATION = "relation"  # General n-ary relation
    FUNCTION = "function"  # Maps inputs to outputs
    PREDICATE = "predicate"  # Special case: returns boolean
    CONSTANT = "constant"  # Represents a fixed mathematical object


class ExampleStructure:
    """
    Represents the structure of examples for a concept, including both their
    type information and semantic meaning.
    """

    def __init__(
        self,
        concept_type: ConceptType,
        component_types: Tuple[ExampleType, ...],
        input_arity: Optional[int] = None,
    ):  # Required for both functions and predicates
        self.concept_type = concept_type
        self.component_types = component_types
        self.input_arity = input_arity

        # Validate the structure
        if (
            concept_type in (ConceptType.FUNCTION, ConceptType.PREDICATE)
            and input_arity is None
        ):
            raise ValueError(f"{concept_type.value} must specify input_arity")

        if concept_type == ConceptType.FUNCTION:
            # For functions, component_types should include input types and at least one output type
            if len(component_types) <= input_arity:
                raise ValueError(
                    f"Functions with arity {input_arity} must have at least {input_arity + 1} component types "
                    f"(got {len(component_types)})"
                )
        elif concept_type == ConceptType.PREDICATE:
            # For predicates, component_types should only include input types
            if len(component_types) != input_arity:
                raise ValueError(
                    f"Predicates with arity {input_arity} must have {input_arity} component types "
                    f"(got {len(component_types)})"
                )

    def validate_example(self, value: Any) -> bool:
        """Validate that a value matches this example structure"""
        if not isinstance(value, tuple):
            # Convert single values to 1-tuples for unary predicates/functions
            if self.input_arity == 1:
                value = (value,)
            else:
                return False

        expected_len = len(self.component_types)
        if len(value) != expected_len:
            return False

        return all(
            self._validate_single_type(val, typ)
            for val, typ in zip(value, self.component_types)
        )

    def _validate_single_type(self, value: Any, example_type: ExampleType) -> bool:
        """Validate a single value against its expected type"""
        if example_type == ExampleType.NUMERIC:
            return isinstance(value, (int, float))
        elif example_type == ExampleType.SET:
            return isinstance(value, (set, frozenset))
        elif example_type == ExampleType.GROUP:
            return hasattr(value, "carrier")
        elif example_type == ExampleType.FUNCTION:
            return callable(value)
        elif example_type == ExampleType.CONSTANT:
            return isinstance(value, tuple) and len(value) == 1
        elif example_type == ExampleType.GROUPELEMENT:
            return True
        return False

    def __str__(self) -> str:
        """Returns a human-readable string representation of the example structure."""
        concept_type_str = self.concept_type.value if self.concept_type else "None"

        if isinstance(self.component_types, tuple):
            types_str = ", ".join(t.value for t in self.component_types)
        else:
            types_str = self.component_types.value if self.component_types else "None"

        arity_str = str(self.input_arity) if self.input_arity is not None else "None"

        return (
            f"ExampleStructure("
            f"concept_type={concept_type_str}, "
            f"component_types=({types_str}), "
            f"input_arity={arity_str})"
        )

    def __repr__(self) -> str:
        """Returns the same string as __str__ for consistency in error messages."""
        return self.__str__()


@total_ordering
class Example:
    """
    Represents an example or nonexample of a mathematical concept.
    """

    def __init__(
        self,
        value: Tuple[Any, ...],
        example_structure: ExampleStructure,
        is_example: bool = True,
    ):
        self.value = value
        self.example_structure = example_structure
        self.is_example = is_example

        # Validate value type based on example_structure
        if not example_structure.validate_example(value):
            raise TypeError(
                f"Value {value} is not valid for example structure {example_structure}"
            )

    def size(self) -> "Union[int, float, Tuple[int, ...]]":
        """
        Returns a measure of the size of the example.
        For numeric values: absolute value
        For sets: cardinality
        For tuples: tuple of sizes
        For groups: size of carrier set
        """
        if isinstance(self.example_structure.component_types, tuple):
            return tuple(
                self._single_size(val, typ)
                for val, typ in zip(self.value, self.example_structure.component_types)
            )
        return self._single_size(self.value, self.example_structure.component_types)

    def _single_size(self, value: Any, example_type: ExampleType) -> int:
        """Calculate size for a single value of given type"""
        if example_type == ExampleType.NUMERIC:
            return abs(value)
        elif example_type == ExampleType.SET:
            return len(value)
        elif example_type == ExampleType.GROUP:
            return len(value.carrier)
        return float("inf")  # Functions don't have a well-defined size

    def __eq__(self, other: "Example") -> bool:
        if not isinstance(other, Example):
            return NotImplemented
        return (
            self.value == other.value
            and self.example_structure == other.example_structure
            and self.is_example == other.is_example
        )

    def __lt__(self, other: "Example") -> bool:
        if not isinstance(other, Example):
            return NotImplemented
        if (
            self.example_structure.component_types
            != other.example_structure.component_types
        ):
            return (
                self.example_structure.component_types.value
                < other.example_structure.component_types.value
            )

        # Compare based on size first
        self_size = self.size()
        other_size = other.size()

        if isinstance(self_size, tuple) and isinstance(other_size, tuple):
            # For tuples, compare element-wise
            if len(self_size) != len(other_size):
                return len(self_size) < len(other_size)
            return self_size < other_size

        if isinstance(self_size, (int, float)) and isinstance(other_size, (int, float)):
            if self_size != other_size:
                return self_size < other_size

        # If sizes are equal, compare values
        if isinstance(self.value, tuple) and isinstance(other.value, tuple):
            return self.value < other.value
        return str(self.value) < str(other.value)

    def __hash__(self) -> int:
        """Make Example hashable for storage in sets."""
        # Convert value to a hashable form
        hashable_value = self.value
        if isinstance(self.value, tuple):
            # Convert any unhashable elements in the tuple to strings
            hashable_value = tuple(
                str(x) if isinstance(x, (set, list, dict)) else x for x in self.value
            )
        elif isinstance(self.value, (set, list, dict)):
            hashable_value = str(self.value)

        return hash(
            (hashable_value, self.example_structure.concept_type, self.is_example)
        )


class ExampleCollection:
    """
    Manages a collection of examples and nonexamples for a concept.
    Provides methods for adding, retrieving, and verifying examples.
    """

    def __init__(self, example_structure: ExampleStructure):
        """Initialize empty sets for examples and nonexamples"""
        self.example_structure = example_structure
        self._examples = set()  # Set of valid examples
        self._nonexamples = set()  # Set of valid nonexamples
        self._warned_duplicates = set()  # Track duplicates we've warned about

    def add(
        self,
        example: Example,
        verify_fn: Optional[Callable] = None,
        override: bool = False, # should be used only for examples synthesized by a proof mechanism.
        # Add capability flags directly to the signature
        can_add_examples_flag: bool = True, 
        can_add_nonexamples_flag: bool = True
    ):
        """
        Add an example or nonexample to the collection.
        Optionally verify it using the provided verification function and capability flags.
        Duplicates are not added, and warnings are issued for repeated attempts.

        Args:
            example: The example to add
            verify_fn: Function to verify the example (if None, no verification is performed)
            override: If True, bypass verification regardless of verify_fn
            can_add_examples_flag: The owning entity's can_add_examples capability.
            can_add_nonexamples_flag: The owning entity's can_add_nonexamples capability.
        """
        if (
            example.example_structure.component_types
            != self.example_structure.component_types
        ):
            raise TypeError(
                f"Example structure {example.example_structure} does not match "
                f"collection structure {self.example_structure}"
            )

        # Check for duplicates before proceeding
        target_set = self._examples if example.is_example else self._nonexamples
        if example in target_set:
            if example not in self._warned_duplicates:
                # logger.warning(f"Duplicate example/nonexample ignored: {example.value}")
                self._warned_duplicates.add(example)
            return

        # --- Start Corrected Logic (respecting flags, bypassing full verification) ---
        if override:
            # If override is True, add directly without checking capability flags or detailed verification.
            target_set.add(example)
            return

        # If not overriding, check capability flags before deciding to add.
        if example.is_example:
            if not can_add_examples_flag:
                logger.warning(
                    f"Attempted to add example {example.value} but can_add_examples_flag is False. Example not added."
                )
                return # Do not add if capability flag is false
            # If flag is True, proceed to add (computational verification below is bypassed).
        else: # This is a non-example
            if not can_add_nonexamples_flag:
                logger.warning(
                    f"Attempted to add nonexample {example.value} but can_add_nonexamples_flag is False. Nonexample not added."
                )
                return # Do not add if capability flag is false
            # If flag is True, proceed to add (computational verification below is bypassed).

        # If we reached here, either override was true, OR capability flags permitted addition.
        # The detailed computational verification logic using verify_fn remains bypassed below.
        target_set.add(example)

        # --- Original Verification Logic (Currently Bypassed) ---
        # NOTE: Verification logic is currently bypassed (commented out) as per user request.
        # TODO(_; 5/6): REINTRODUCE AFTER ANY RULE CHANGES.
        # This assumes verification failures themselves are not the source of hangs, 
        # but rather the verification *process* timing out or leaving rogue threads.
        # Re-enable if correctness issues arise due to lack of verification.
        # if verify_fn is not None and not override: # 'not override' is implicitly handled by logic above
            # Get value(s) to pass to verify_fn
            # Assuming verify_fn takes the same structure as the concept implementation
            # example_value = example.value

            # Define helper to get the actual verification result based on concept type
            # def _get_verification_result():
            #     # Calls verify_fn based on type and returns the *actual* result
            #     input_arity = self.example_structure.input_arity
            #     inputs = example.value[:input_arity]
            # 
            #     if self.example_structure.concept_type == ConceptType.FUNCTION:
            #         actual_outputs = verify_fn(*inputs)
            #         # Convert single outputs to tuples for consistent handling
            #         if not isinstance(actual_outputs, tuple):
            #              return (actual_outputs,) # Return tuple
            #         return actual_outputs
            #     elif self.example_structure.concept_type == ConceptType.CONSTANT:
            #         return verify_fn() # Return the constant value
            #     elif self.example_structure.concept_type == ConceptType.PREDICATE:
            #          result = verify_fn(*inputs)
            #          if not isinstance(result, bool):
            #              # Raise error here to be caught by the outer exception handler
            #              raise TypeError(f"Predicate {verify_fn.__name__} did not return bool for {inputs}, got {type(result)}")
            #          return result # Return boolean result
            #     elif self.example_structure.concept_type == ConceptType.RELATION:
            #          # Assuming verify_fn handles n-ary input and returns bool
            #          result = verify_fn(*example.value)
            #          if not isinstance(result, bool):
            #               raise TypeError(f"Relation {verify_fn.__name__} did not return bool for {example.value}, got {type(result)}")
            #          return result
            #     else:
            #         raise NotImplementedError(f"Verification logic not implemented for concept type: {self.example_structure.concept_type}")

            # --- Verification with ThreadPoolExecutor Timeout ---
            # actual_result = None
            # verification_ok = True # Flag to track if verification completed without timeout/error
            # try:
            #     with ThreadPoolExecutor(max_workers=1) as executor:
            #         future = executor.submit(_get_verification_result) # Submit the helper
            #         try:
            #             # Wait for the result with timeout
            #             actual_result = future.result(timeout=_example_verification_timeout)
            #         except FuturesTimeoutError:
            #             logger.warning(
            #                 f"Example verification timed out after {_example_verification_timeout}s for value {example_value}"
            #             )
            #             verification_ok = False
            #         except Exception as e:
            #             # Catches exceptions from _get_verification_result (TypeError, NotImplementedError, etc.)
            #             logger.error(
            #                 f"Error during example verification execution for value {example_value}: {e}"
            #             )
            #             verification_ok = False
            # except Exception as pool_error:
            #     # Handle errors related to the ThreadPoolExecutor itself
            #     logger.error(f"ThreadPoolExecutor error during verification: {pool_error}")
            #     verification_ok = False
            # --- End Verification with Timeout ---

            # if not verification_ok:
            #     return # Exit if verification timed out or had an error

            # --- Compare actual_result with expected based on capability flags ---
            # No longer need the __self__ hack, use passed flags directly
            
            # Get concept kind for error messages
            # concept_kind = self.example_structure.concept_type.value.capitalize()
            # input_arity = self.example_structure.input_arity
            # is_predicate_or_relation = self.example_structure.concept_type in [ConceptType.PREDICATE, ConceptType.RELATION]
            
            # if example.is_example: # Trying to add an EXAMPLE
            #     # Mismatch check:
            #     mismatch = False
            #     if is_predicate_or_relation:
            #         # For predicates/relations, mismatch if compute returns False for an example
            #         mismatch = not actual_result 
            #     else: 
            #         # For functions/constants, compare computed result (always tuple for functions from helper) 
            #         # with the expected part of the example value tuple.
            #         expected_value_tuple = example.value[input_arity:]
            #         # Special case for Constant: actual_result is the raw value, not a tuple from helper
            #         if self.example_structure.concept_type == ConceptType.CONSTANT:
            #             actual_value_for_compare = actual_result 
            #             expected_value_for_compare = expected_value_tuple[0] # Constant example value is (val,)
            #         else: # Function
            #             actual_value_for_compare = actual_result # Use the tuple returned by helper
            #             expected_value_for_compare = expected_value_tuple
            #             
            #         mismatch = (actual_value_for_compare != expected_value_for_compare)
            #     
            #     if mismatch and can_add_nonexamples_flag:
            #         # Mismatch found, and entity claims some reliability for non-examples. Raise error.
            #         inputs = example.value[:input_arity]
            #         args_str = str(inputs)
            #         # Get expected value string for error message
            #         expected_value_str = str(example.value[input_arity:])
            #         if is_predicate_or_relation:
            #              raise ValueError(f"{concept_kind} example verification failed: {concept_kind[0]}{args_str} computed False but expected True")
            #         elif self.example_structure.concept_type == ConceptType.FUNCTION:
            #              raise ValueError(f"{concept_kind} example verification failed: f{args_str} computed {actual_result} but expected {expected_value_str}")
            #         elif self.example_structure.concept_type == ConceptType.CONSTANT:
            #              # Use actual_result directly as it's not tupleized by helper for constants
            #              raise ValueError(f"{concept_kind} example verification failed: computed {actual_result}, expected {expected_value_str[1:-1]}") # Strip parens from tuple string
            #     else:
            #         # No reliable mismatch detected, add the example.
            #         target_set.add(example)
            # 
            # else: # Trying to add a NON-EXAMPLE
            #     # Contradiction check:
            #     contradiction = False
            #     if is_predicate_or_relation:
            #         # Contradiction if compute returns True for a non-example
            #         contradiction = actual_result 
            #     else:
            #         # For functions/constants, check if computed result matches the non-example's value part
            #         non_example_value_tuple = example.value[input_arity:]
            #         # Special case for Constant:
            #         if self.example_structure.concept_type == ConceptType.CONSTANT:
            #             actual_value_for_compare = actual_result
            #             expected_value_for_compare = non_example_value_tuple[0]
            #         else: # Function
            #             actual_value_for_compare = actual_result
            #             expected_value_for_compare = non_example_value_tuple
            #             
            #         contradiction = (actual_value_for_compare == expected_value_for_compare)
            #         
            #     if contradiction and can_add_examples_flag:
            #         # Compute result matched the non-example value (or was True for predicate),
            #         # and entity CAN reliably identify examples. This is a contradiction. Raise error.
            #         inputs = example.value[:input_arity]
            #         args_str = str(inputs)
            #         # Get non-example value string for error message
            #         non_example_value_str = str(example.value[input_arity:])
            #         if is_predicate_or_relation:
            #              raise ValueError(f"{concept_kind} nonexample verification failed: {concept_kind[0]}{args_str} computed True but expected False")
            #         elif self.example_structure.concept_type == ConceptType.FUNCTION:
            #              raise ValueError(f"{concept_kind} nonexample verification failed: f{args_str} computed {actual_result} which matched the non-example value {non_example_value_str}")
            #         elif self.example_structure.concept_type == ConceptType.CONSTANT:
            #              raise ValueError(f"{concept_kind} nonexample verification failed: computed {actual_result} matched the non-example value {non_example_value_str[1:-1]}")
            #     else:
            #         # No reliable contradiction detected, add the non-example.
            #         target_set.add(example)

        # else:
        #     # No verification needed (no verify_fn or override=True) 
        #     # OR verification was bypassed above. Add directly to the target set.
        #     target_set.add(example) # This line is redundant if the logic above correctly adds to target_set
        # --- End Corrected Logic ---

    def remove(self, example: Example) -> bool:
        """
        Remove an example or nonexample from the collection.
        
        Args:
            example: The example to remove
            
        Returns:
            True if the example was found and removed, False otherwise
        """
        if example.is_example:
            if example in self._examples:
                self._examples.remove(example)
                return True
        else:  # Non-example
            if example in self._nonexamples:
                self._nonexamples.remove(example)
                return True
        return False

    def get_smallest_examples(self, n: int) -> "List[Example]":
        """Get the n smallest examples sorted by size."""
        return sorted(self._examples)[:n]

    def get_smallest_nonexamples(self, n: int) -> "List[Example]":
        """Return the n smallest nonexamples"""
        return sorted(self._nonexamples)[:n]

    def get_examples(self) -> "Set[Example]":
        """Get all examples"""
        return self._examples

    def get_nonexamples(self) -> "Set[Example]":
        """Get all nonexamples"""
        return self._nonexamples


# =============================================================================
# Mathematical Concepts
# =============================================================================


class Entity:
    """
    Base class for mathematical entities (concepts, conjectures, theorems).
    Each entity has:
    1. Symbolic definition using the expression framework
    2. Computational implementation for concrete evaluation
    3. Translation rules for different target languages
    4. Example management system
    """

    def __init__(
        self,
        name: str,
        description: str,
        symbolic_definition: callable,
        computational_implementation: callable,
        example_structure: "Optional[ExampleStructure]" = None,
        lean4_translation: "Optional[callable]" = None,
        prolog_translation: "Optional[callable]" = None,
        z3_translation: "Optional[callable]" = None,
        can_add_examples: bool = True,  # NOTE: These names are provisional and may change
        can_add_nonexamples: bool = True,  # NOTE: These names are provisional and may change
    ):
        """Initialize a mathematical entity with its definitions and translations"""
        self.name = name
        self.description = description
        self._symbolic = symbolic_definition
        self._compute = computational_implementation
        self._has_computational_implementation = (
            computational_implementation is not None
        )
        self._lean4 = lean4_translation
        self._prolog = prolog_translation
        self._z3 = z3_translation
        self._has_z3_translation = z3_translation is not None # Added flag
        self.examples = ExampleCollection(example_structure)

        # Flags indicating whether computational implementation can reliably
        # verify examples/nonexamples
        self.can_add_examples = can_add_examples
        self.can_add_nonexamples = can_add_nonexamples
        self.map_iterate_depth: int = 0 # How many times map_iterate has been applied, done because too deep applications leads to excessive recursion and timeouts.

    def add_example(self, value: Tuple[Any, ...], override: bool = False):
        """
        Add an example of the concept

        Args:
            value: The example value to add
            override: If True, bypass computational verification and add directly
        """
        example = Example(value, self.examples.example_structure, True)
        self.examples.add(
            example,
            verify_fn=self._compute if self.has_computational_implementation() else None,
            override=override,
            # Pass capability flags
            can_add_examples_flag=self.can_add_examples,
            can_add_nonexamples_flag=self.can_add_nonexamples
        )

    def add_nonexample(self, value: Tuple[Any, ...], override: bool = False):
        """
        Add a nonexample of the concept

        Args:
            value: The nonexample value to add
            override: If True, bypass computational verification and add directly
        """
        example = Example(value, self.examples.example_structure, False)
        self.examples.add(
            example,
            verify_fn=self._compute if self.has_computational_implementation() else None,
            override=override,
            # Pass capability flags
            can_add_examples_flag=self.can_add_examples,
            can_add_nonexamples_flag=self.can_add_nonexamples
        )

    def get_smallest_examples(self, n: int) -> "List[Example]":
        """Get the n smallest examples"""
        return self.examples.get_smallest_examples(n)

    def get_smallest_nonexamples(self, n: int) -> "List[Example]":
        """Get the n smallest nonexamples"""
        return self.examples.get_smallest_nonexamples(n)

    def get_examples(self) -> "Set[Example]":
        """Get all examples"""
        return self.examples.get_examples()

    def get_nonexamples(self) -> "Set[Example]":
        """Get all nonexamples"""
        return self.examples.get_nonexamples()

    def is_predicate(self) -> bool:
        """Check if the concept is a predicate"""
        return self.examples.example_structure.concept_type == ConceptType.PREDICATE
    
    def is_function(self) -> bool:
        """Check if the concept is a function"""
        return self.examples.example_structure.concept_type == ConceptType.FUNCTION
    
    def is_constant(self) -> bool:
        """Check if the concept is a constant"""
        return self.examples.example_structure.concept_type == ConceptType.CONSTANT
    
    def is_relation(self) -> bool:
        """Check if the concept is a relation"""
        return self.examples.example_structure.concept_type == ConceptType.RELATION

    def get_input_arity(self) -> int:
        """Get the arity of the input"""
        return self.examples.example_structure.input_arity

    def get_component_types(self) -> Tuple[ConceptType, ...]:
        """Get the component types"""
        return self.examples.example_structure.component_types

    def verify_example(self, value: "Union[Any, Tuple[Any, ...]]") -> bool:
        """
        Verify if a given value is actually an example of the concept
        by using the computational implementation
        """
        try:
            if not isinstance(value, tuple):
                value = (value,)

            # For functions, verify output matches computation
            if self.examples.example_structure.concept_type == ConceptType.FUNCTION:
                inputs = value[:-1]  # All but last value
                expected_outputs = value[-1]  # Last value
                actual_outputs = self.compute(*inputs)
                return actual_outputs == expected_outputs

            # For predicates, just run computation
            return self.compute(*value)

        except Exception as e:
            return False

    def remove_invalid_examples(self) -> Tuple[List[Example], List[Example]]:
        """
        Verify all examples and nonexamples in an entity using its computational implementation.
        Remove any that fail verification.
        
        Returns:
            Tuple of (removed_examples, removed_nonexamples) - lists of examples that were removed
        """
        if not self.has_computational_implementation():
            return ([], [])  # Can't verify without a computational implementation
        
        # Track removed examples
        removed_examples = []
        removed_nonexamples = []
        
        # Verify examples
        if self.can_add_examples:
            examples_to_verify = list(self.examples.get_examples())  # Create a copy to avoid modification during iteration
            for example in examples_to_verify:
                try:
                    if not self.verify_example(example.value):
                        if self.examples.remove(example):
                            removed_examples.append(example)
                except Exception as e:
                    # If verification raises an exception, remove the example
                    if self.examples.remove(example):
                        removed_examples.append(example)
        
        # Verify nonexamples
        if self.can_add_nonexamples:
            nonexamples_to_verify = list(self.examples.get_nonexamples())  # Create a copy to avoid modification during iteration
            for nonexample in nonexamples_to_verify:
                try:
                    if self.verify_example(nonexample.value):
                        if self.examples.remove(nonexample):
                            removed_nonexamples.append(nonexample)
                except Exception as e: # keep nonexample if verification fails
                    pass
        
        return (removed_examples, removed_nonexamples)

    def symbolic(self, *args) -> Expression:
        """Returns the raw symbolic mathematical definition"""
        return self._symbolic(*args)

    def compute(self, *args):
        """Returns the computational result"""
        return self._compute(*args)

    def has_computational_implementation(self) -> bool:
        """Returns True if this entity has a computational implementation, False otherwise."""
        return self._has_computational_implementation

    def has_z3_translation(self) -> bool:
        """Returns True if this entity has a Z3 translation defined, False otherwise."""
        return self._has_z3_translation

    def to_lean4(
        self,
        *args,
    ) -> str:
        """Returns Lean 4 translation."""
        raise NotImplementedError("to_lean4 is not yet implemented")

    def to_prolog(
        self,
        *args,
    ) -> str:
        """Returns Prolog translation."""
        raise NotImplementedError("to_prolog is not yet implemented")

    def to_z3(
        self,
        *args,
    ) -> str:
        """Returns Z3 translation."""
        return self._z3(*args)

    def __call__(self, *args) -> Expression:
        """Returns symbolic definition unless one of the arguments is an Entity."""
        # Check if any of the arguments is an Entity
        if any(isinstance(arg, Entity) for arg in args):
            # If we have nested concepts, evaluate them first
            evaluated_args = [
                arg(*args) if isinstance(arg, Entity) else arg for arg in args
            ]
            return self.symbolic(*evaluated_args)
        # Otherwise just return the symbolic definition
        return self.symbolic(*args)

    def set_z3_translation(
        self,
        translation: callable,
    ):
        """Set or update the Z3 translation after entity creation.
        """
        self._z3 = translation
        self._has_z3_translation = True
        return self

    def set_computational_implementation(
        self,
        implementation: callable,
        can_add_examples: bool = True,
        can_add_nonexamples: bool = True,
    ):
        """Set or update the computational implementation after entity creation.

        Args:
            implementation: A callable that implements the computation for this entity
            can_add_examples: Whether this entity can reliably verify examples
            can_add_nonexamples: Whether this entity can reliably verify nonexamples
        """
        self._compute = implementation
        self._has_computational_implementation = True
        self.can_add_examples = can_add_examples
        self.can_add_nonexamples = can_add_nonexamples
        return self

    def __eq__(self, other: "Entity") -> bool:
        """
        Check if two entities are equal by comparing their symbolic definitions.
        Two entities are considered equal if:
        1. They are of the same type (both Concepts, both Conjectures, or both Theorems)
        2. If they are concepts, they must have the same concept type (function/predicate)
        3. If they have arities, they must match
        4. Their symbolic definitions produce identical expressions for all possible inputs
        """
        if not isinstance(other, Entity):
            return NotImplemented

        # Must be same subclass type
        if type(self) != type(other):
            return False

        # For concepts, check concept type (function vs predicate)
        if isinstance(self, Concept):
            if not hasattr(self.examples, "example_structure") or not hasattr(
                other.examples, "example_structure"
            ):
                return False

            # Check concept types match (function vs predicate)
            if (
                self.examples.example_structure.concept_type
                != other.examples.example_structure.concept_type
            ):
                return False

            # Check arities match if they exist
            if hasattr(self.examples.example_structure, "input_arity"):
                if (
                    self.examples.example_structure.input_arity
                    != other.examples.example_structure.input_arity
                ):
                    return False

        # Compare symbolic definitions
        if hasattr(self, "_symbolic") and hasattr(other, "_symbolic"):
            try:
                # Get function signatures
                if isinstance(self, Concept):
                    self_sig = self.get_input_arity()
                    other_sig = other.get_input_arity()
                else:
                    # Fallback to co_argcount for non-Concept entities
                    self_sig = self._symbolic.__code__.co_argcount
                    other_sig = other._symbolic.__code__.co_argcount

                if self_sig != other_sig:
                    return False

                # Create dummy arguments for testing symbolic equality
                # Test with varied inputs to catch differences
                test_cases = [
                    tuple(1 for _ in range(self_sig)),  # All 1s
                    tuple(i+1 for i in range(self_sig)), # Sequential numbers
                    # tuple(2**i for i in range(self_sig)),  # Powers of 2
                ]
                
                for test_idx, test_args in enumerate(test_cases):
                    
                    try:
                        self_expr = self.symbolic(*test_args)
                    except Exception as e:
                        continue  # Try next test case instead of failing
                    
                    try:
                        other_expr = other.symbolic(*test_args)
                    except Exception as e:
                        continue  # Try next test case instead of failing
                    
                    # Only compare if both evaluations succeeded
                    if self_expr != other_expr:
                        return False
                
                # If we reached here, consider them equal
                # Additional safety check: ensure at least one test succeeded
                successful_tests = 0
                for test_args in test_cases:
                    try:
                        self_expr = self.symbolic(*test_args)
                        other_expr = other.symbolic(*test_args)
                        if self_expr == other_expr:
                            successful_tests += 1
                    except Exception:
                        continue
                
                # Only consider equal if at least one test passed
                return successful_tests > 0
                
            except Exception as e:
                print(f"Unexpected error in equality check: {e}")
                import traceback
                traceback.print_exc()
                return False

        return False

    def __hash__(self) -> int:
        """
        Make Entity hashable based on name and type.
        This is a conservative implementation - two equal entities will have the same hash,
        but two entities with the same hash may not be equal.
        """
        return hash((self.name, type(self)))


class Concept(Entity):
    """
    A mathematical concept represents a mathematical object, property,
    function, or relation.
    Examples include:
    - Properties: is_prime, is_even
    - Functions: addition, multiplication
    - Relations: less_than, divides
    - Constructions: specific groups, sets
    """

    def __init__(
        self,
        name: str,
        description: str,
        symbolic_definition: callable,
        computational_implementation: callable,
        example_structure: ExampleStructure,
        lean4_translation: "Optional[callable]" = None,
        prolog_translation: "Optional[callable]" = None,
        z3_translation: "Optional[callable]" = None,
        can_add_examples: bool = True,
        can_add_nonexamples: bool = True,
    ):
        super().__init__(
            name,
            description,
            symbolic_definition,
            computational_implementation,
            example_structure,
            lean4_translation,
            prolog_translation,
            z3_translation,
            can_add_examples,
            can_add_nonexamples,
        )


class Conjecture(Entity):
    """
    A mathematical conjecture that may or may not be provable.
    Conjectures can have:
    1. Symbolic definition (required)
    2. Computational implementation (optional - may be partial or None)
    3. Examples and nonexamples
    4. Translations to target languages
    """

    def __init__(
        self,
        name: str,
        description: str,
        symbolic_definition: callable,
        example_structure: "Optional[ExampleStructure]" = None,
        lean4_translation: "Optional[callable]" = None,
        prolog_translation: "Optional[callable]" = None,
        z3_translation: "Optional[callable]" = None,
        computational_implementation: callable = None,
        can_add_examples: bool = True,
        can_add_nonexamples: bool = True,
    ):
        # For conjectures, computational_implementation might be a partial check
        # or a function that returns None/raises NotImplementedError
        if computational_implementation is None:
            computational_implementation = lambda *args: None
        super().__init__(
            name,
            description,
            symbolic_definition,
            computational_implementation,
            example_structure,
            lean4_translation,
            prolog_translation,
            z3_translation,
            can_add_examples,
            can_add_nonexamples,
        )

    def verify_example(self, value: "Union[Any, Tuple[Any, ...]]") -> bool:
        """Conjectures don't have examples to verify"""
        raise NotImplementedError("Conjectures don't have examples to verify")

    def add_example(self, value: "Union[Any, Tuple[Any, ...]]", override: bool = False):
        """Conjectures don't have examples"""
        raise NotImplementedError("Conjectures don't have examples")

    def add_nonexample(
        self, value: "Union[Any, Tuple[Any, ...]]", override: bool = False
    ):
        """Conjectures don't have non-examples"""
        raise NotImplementedError("Conjectures don't have non-examples")


class Theorem(Entity):
    """
    Represents a proven mathematical statement with an associated proof.
    Unlike conjectures, theorems have been formally verified.
    """

    def __init__(
        self,
        name: str,
        description: str,
        symbolic_definition: callable,
        proof: Proof,
        example_structure: "Optional[ExampleStructure]" = None,
        lean4_translation: "Optional[callable]" = None,
        prolog_translation: "Optional[callable]" = None,
        z3_translation: "Optional[callable]" = None,
        computational_implementation: callable = None,
        can_add_examples: bool = True,
        can_add_nonexamples: bool = True,
    ):
        super().__init__(
            name=name,
            description=description,
            symbolic_definition=symbolic_definition,
            computational_implementation=computational_implementation,
            example_structure=example_structure,
            lean4_translation=lean4_translation,
            prolog_translation=prolog_translation,
            z3_translation=z3_translation,
            can_add_examples=can_add_examples,
            can_add_nonexamples=can_add_nonexamples,
        )
        self.proof = proof
