"""
MolEnc IO Schema for inference.

Unified schema that supports both simple SMILES-based inference
and 3D conformer-based (Allegro) inference.
"""

from dataclasses import dataclass
from typing import Optional

import numpy as np
from rdkit import Chem


@dataclass(slots=True)
class MolEncIOSchema:
    """
    Unified IO schema for MolEnc inference.

    Supports dual-mode inference producing both embeddings:
    - smiles_embed: SMILES-based embedding (fast, no 3D)
    - e3nn_embed: 3D conformer-based embedding (slower, geometric)
    """

    # Input
    smiles: str = None

    # Artifact info
    artifact_s3: str = None

    # Canonicalized and processed molecule
    canon_smiles: str = None
    rdkit_mol: Optional[Chem.Mol] = None

    # Allegro-specific features (for 3D conformer mode)
    atoms: Optional[np.ndarray] = None  # Atomic numbers
    coords: Optional[np.ndarray] = None  # 3D coordinates
    bonds: Optional[np.ndarray] = None  # Bond connectivity
    atom_toks: Optional[np.ndarray] = None  # Atom tokens for graph tokenizer
    triu: Optional[np.ndarray] = None  # Upper triangular bond matrix

    # Output embeddings
    smiles_embed: Optional[np.ndarray] = None  # Simple mode: SMILES-based embedding
    e3nn_embed: Optional[np.ndarray] = (
        None  # Allegro mode: 3D conformer-based embedding
    )

    # Simple mode output (optional mask)
    embed_mask: Optional[np.ndarray] = None

    # Error handling
    error: bool = False
    error_msg: Optional[str] = None
