import re
from typing import List, Tuple, Any

# -------------------------------
# Tokenizer
# -------------------------------
TOKEN_SPEC = [
    ("WS", r"\s+"),
    ("FORALL", r"∀"),
    ("IMPLIES", r"→"),
    ("AND", r"∧"),
    ("OR", r"∨"),
    ("XOR", r"⊕"),
    ("NOT", r"¬"),
    ("LPAREN", r"\("),
    ("RPAREN", r"\)"),
    ("COMMA", r","),
    ("IDENT", r"[A-Za-z_][A-Za-z0-9_]*"),
]
TOKEN_RE = re.compile("|".join(f"(?P<{name}>{pat})" for name, pat in TOKEN_SPEC))


def tokenize(s: str):
    for m in TOKEN_RE.finditer(s):
        kind = m.lastgroup
        val = m.group()
        if kind == "WS":
            continue
        yield (kind, val)


# -------------------------------
# AST nodes
# -------------------------------
class Pred:
    def __init__(self, name: str, args: List[str]):
        self.name = name
        self.args = args


class Not:
    def __init__(self, expr):
        self.expr = expr


class BinOp:
    def __init__(self, op: str, left, right):
        self.op = op  # "AND","OR","XOR"
        self.left = left
        self.right = right


class Implies:
    def __init__(self, left, right):
        self.left = left
        self.right = right


class ForAll:
    def __init__(self, var: str, expr):
        self.var = var
        self.expr = expr


# -------------------------------
# Parser (precedence: NOT > AND > OR/XOR > IMPLIES)
# -------------------------------
class Parser:
    def __init__(self, tokens):
        self.tokens = list(tokens)
        self.pos = 0

    def peek(self):
        return self.tokens[self.pos] if self.pos < len(self.tokens) else (None, None)

    def eat(self, kind=None):
        tok = self.peek()
        if kind and tok[0] != kind:
            raise ValueError(f"Expected {kind}, got {tok}")
        self.pos += 1
        return tok

    def parse(self):
        # Optional leading universal quantifier: ∀x ( expr )
        if self.peek()[0] == "FORALL":
            self.eat("FORALL")
            var = self.eat("IDENT")[1]
            self.eat("LPAREN")
            expr = self.parse_expr()
            self.eat("RPAREN")
            return ForAll(var, expr)
        return self.parse_expr()

    def parse_expr(self):
        left = self.parse_orxor()
        if self.peek()[0] == "IMPLIES":
            self.eat("IMPLIES")
            right = self.parse_expr()
            return Implies(left, right)
        return left

    def parse_orxor(self):
        left = self.parse_and()
        while self.peek()[0] in ("OR", "XOR"):
            op = self.eat()[0]
            right = self.parse_and()
            left = BinOp(op, left, right)
        return left

    def parse_and(self):
        left = self.parse_unary()
        while self.peek()[0] == "AND":
            self.eat("AND")
            right = self.parse_unary()
            left = BinOp("AND", left, right)
        return left

    def parse_unary(self):
        tok = self.peek()[0]
        if tok == "NOT":
            self.eat("NOT")
            return Not(self.parse_unary())
        elif tok == "LPAREN":
            self.eat("LPAREN")
            expr = self.parse_expr()
            self.eat("RPAREN")
            return expr
        else:
            return self.parse_atom()

    def parse_atom(self):
        # predicate( arg [, arg]* )
        name = self.eat("IDENT")[1]
        args = []
        if self.peek()[0] == "LPAREN":
            self.eat("LPAREN")
            if self.peek()[0] != "RPAREN":
                args.append(self.eat("IDENT")[1])
                while self.peek()[0] == "COMMA":
                    self.eat("COMMA")
                    args.append(self.eat("IDENT")[1])
            self.eat("RPAREN")
        return Pred(name, args)


# -------------------------------
# Helpers
# -------------------------------
def base_words(s: str) -> str:
    """Convert underscores to spaces for readability."""
    return s.replace("_", " ").strip()


def pred_subject(p: Pred) -> str:
    return p.args[0] if p.args else "it"


def find_first_subject(node) -> str:
    """Choose a subject to lead the flat tuple.
    Prefer RHS subject for implications; otherwise the first we see.
    """
    if isinstance(node, Pred):
        return pred_subject(node)
    if isinstance(node, Not):
        return find_first_subject(node.expr)
    if isinstance(node, BinOp):
        return find_first_subject(node.left) or find_first_subject(node.right)
    if isinstance(node, Implies):
        return find_first_subject(node.right) or find_first_subject(node.left)
    if isinstance(node, ForAll):
        inner = find_first_subject(node.expr)
        return inner or node.var
    return "it"


def props_tokens(node, expected_subj: str) -> List[str]:
    """Return a flat token list of humanized properties interleaved with operator words.
    Rules:
      - is_X(y)   -> 'X'
      - has_Y(y)  -> 'Y'
      - has_no_Z  -> 'no Z'
      - ¬has_no_Z -> 'Z'   (avoid double negative)
      - generic   -> underscored name with spaces
      - NOT over anything else prepends 'not'
    If a property's subject differs from expected_subj, encode as 'subject:prop' to keep it flat but unambiguous.
    """

    def wrap_prop(prop: str, subj: str) -> str:
        return prop if subj == expected_subj else f"{subj}:{prop}"

    # Predicates
    if isinstance(node, Pred):
        name = node.name
        subj = pred_subject(node)
        if name.startswith("has_no_"):
            core = base_words(name[len("has_no_") :])
            # has_no_X(y) -> "not", "X"
            return ["not", wrap_prop(core, subj)]
        if name.startswith("not_"):
            core = base_words(name[len("not_") :])
            # has_no_X(y) -> "not", "X"
            return ["not", wrap_prop(core, subj)]
        if name.startswith("has_"):
            core = base_words(name[len("has_") :])
            return [wrap_prop(core, subj)]
        if name.startswith("is_"):
            core = base_words(name[len("is_") :])
            return [wrap_prop(core, subj)]
        return [wrap_prop(base_words(name), subj)]

    # Negation
    if isinstance(node, Not):
        inner = node.expr
        if isinstance(inner, Pred) and inner.name.startswith("has_no_"):
            # ¬has_no_X(y) => 'X'
            name = inner.name
            subj = pred_subject(inner)
            core = base_words(name[len("has_no_") :])
            return [wrap_prop(core, subj)]
        if isinstance(inner, Pred) and inner.name.startswith("not_"):
            # ¬has_no_X(y) => 'X'
            name = inner.name
            subj = pred_subject(inner)
            core = base_words(name[len("not_") :])
            return [wrap_prop(core, subj)]
        # General NOT: add 'not' before the inner tokens
        return ["not"] + props_tokens(inner, expected_subj)

    # Binary ops
    if isinstance(node, BinOp):
        op_word = {"AND": "and", "OR": "or", "XOR": "xor"}[node.op]
        left_tokens = props_tokens(node.left, expected_subj)
        right_tokens = props_tokens(node.right, expected_subj)
        return left_tokens + [op_word] + right_tokens

    # Implication
    if isinstance(node, Implies):
        left_tokens = props_tokens(node.left, expected_subj)
        right_tokens = props_tokens(node.right, expected_subj)
        return left_tokens + ["is"] + right_tokens

    # Quantifier: just descend
    if isinstance(node, ForAll):
        return props_tokens(node.expr, expected_subj or node.var)

    return []


def to_flat_ops(ast) -> List[Tuple[str, ...]]:
    """Produce one flat tuple per top-level statement, preserving operators in-line and humanizing properties."""

    def top_tuple(node):
        subj = find_first_subject(node)
        tokens = props_tokens(node, subj)
        return tuple([subj, "is"] + tokens)

    tuples = [top_tuple(ast)]
    # Deduplicate while preserving order
    seen = set()
    out = []
    for t in tuples:
        if t not in seen:
            seen.add(t)
            out.append(t)
    return out


# ---- "Facts" (operator-less) mode helpers ----
def words_for_pred(pred_name: str, neg: bool) -> Tuple[bool, str]:
    """Return (use_not_prefix, property_string) for operator-less facts mode."""
    if pred_name.startswith("has_no_"):
        core = base_words(pred_name[len("has_no_") :])
        if neg:  # ¬has_no_X -> X
            return (False, core)
        else:  # has_no_X   -> not X
            return (True, core)  # use_not=True ensures "not" is separate
    if pred_name.startswith("has_"):
        core = base_words(pred_name[len("has_") :])
        return (neg, core)  # add 'not' if neg
    if pred_name.startswith("is_"):
        core = base_words(pred_name[len("is_") :])
        return (neg, core)
    core = base_words(pred_name)
    return (neg, core)


def pred_to_fact_tuples(p: Pred, neg=False) -> Tuple[Tuple[str, ...]]:
    subj = pred_subject(p)
    use_not, prop = words_for_pred(p.name, neg)
    if use_not:
        return (subj, "is", "not", prop)
    else:
        return (subj, "is", prop)


def to_facts(ast) -> List[Tuple[str, ...]]:
    """Operator-less simple facts; humanized; order-stable dedup."""
    facts: List[Tuple[str, ...]] = []

    def walk(node, neg=False):
        if isinstance(node, Pred):
            facts.append(pred_to_fact_tuples(node, neg))
        elif isinstance(node, Not):
            walk(node.expr, neg=not neg)
        elif isinstance(node, BinOp):
            walk(node.left, neg=neg)
            walk(node.right, neg=neg)
        elif isinstance(node, Implies):
            walk(node.left, neg=neg)
            walk(node.right, neg=neg)
        elif isinstance(node, ForAll):
            walk(node.expr, neg=neg)

    walk(ast, False)
    # Dedup
    seen = set()
    out = []
    for f in facts:
        if f not in seen:
            seen.add(f)
            out.append(f)
    return out


# -------------------------------
# Public API
# -------------------------------
def transform(sentences: List[str], mode: str = "flatten") -> List[Any]:
    """Transform FOL sentences to tuples.
    Modes:
      - 'flatten': flat, operator-preserving tuples (humanized)
      - 'facts'  : operator-less simple facts (humanized)
    """
    outputs: List[Any] = []
    for s in sentences:
        parser = Parser(tokenize(s))
        ast = parser.parse()
        if mode == "flatten":
            outputs.extend(to_flat_ops(ast))
        elif mode == "facts":
            outputs.extend(to_facts(ast))
        elif mode == "phrases":
            flats = to_flat_ops(ast)
            for tup in flats:
                for tok in tup:
                    if tok not in {"is", "and", "or", "xor", "implies", "not"}:
                        outputs.append(tok)
        else:
            raise ValueError("Unknown mode")
    # Global dedup across all lines
    seen = set()
    final = []
    for o in outputs:
        if o not in seen:
            seen.add(o)
            final.append(o)
    return final


def fol_to_flat(fol: str) -> List[Tuple[str, ...]]:
    """Parse a single FOL string into flat operator-preserving tuples (same as transform(...,'flatten') for one string)."""
    parser = Parser(tokenize(fol))
    ast = parser.parse()
    tuples = to_flat_ops(ast)
    # Dedup (order-stable)
    seen = set()
    out: List[Tuple[str, ...]] = []
    for t in tuples:
        if t not in seen:
            seen.add(t)
            out.append(t)
    return out


# -------------------------------
# Demo
# -------------------------------
if __name__ == "__main__":
    data = [
        "¬is_competitive(Legend)",
        "has_distinctive_marking(Legend) ⊕ has_no_distinctive_marking(Legend)",
        "(has_strong_muscles(Legend) ∧ has_powerful_gait(Legend)) → is_champion(Legend)",
        "is_champion(Legend) ∨ is_fast(Legend)",
        "enjoy_life(Jace) → (inspire_others(Jace) ⊕ live_fulfilling_life(Jace))",
        "¬has_no_spots(Legend)",
    ]
    print("FLAT WITH OPERATORS:")
    for t in transform(data, mode="flatten"):
        print(t)

    print("\nFLAT WITHOUT OPERATORS:")
    simple_fol = (
        "enjoy_life(Jace) → (inspire_others(Jace) ⊕ live_fulfilling_life(Jace))"
    )
    print(transform([simple_fol], mode="phrases"))
