"""
ESM2 IO Schema for protein sequence inference.

Dataclass for managing ESM2 protein language model inference input/output.
"""

from dataclasses import dataclass
from typing import Optional

import numpy as np


@dataclass(slots=True)
class ESM2IOSchema:
    """
    IO schema for ESM2 protein sequence embedding inference.

    Handles input protein sequences and output per-residue embeddings
    from ESM2 protein language models.
    """

    # Input
    sequence: str = None

    # Model info
    model_name: str = None
    rep_layer: int = None

    # Output embedding
    embed: Optional[np.ndarray] = (
        None  # Per-residue embeddings [seq_len + 2, embed_dim]
    )
    embed_mask: Optional[np.ndarray] = None  # Mask for valid positions

    # Error handling
    error: bool = False
    error_msg: Optional[str] = None
