"""
Tokenizer utilities for AION astronomical foundation models.

AION uses modality-specific tokenizers to convert continuous astronomical data 
(images, spectra, etc.) into discrete tokens suitable for transformer processing.
Each of the 39 supported modalities has its own specialized tokenizer.
"""

import torch
from torch import package

def load_tokenizer(path: str, device: str = "cpu") -> torch.nn.Module:
    """
    Load a pre-trained AION tokenizer for astronomical data.
    
    AION tokenizers convert astronomical observations into discrete tokens using
    learned vector quantization. Different tokenizers are used for different 
    modalities (e.g., image tokenizers for DES/HSC data, spectrum tokenizers 
    for DESI data).
    
    The tokenization approach is based on FSQ (Finite Scalar Quantization) which
    provides efficient compression while preserving astronomical features.
    
    Args:
        path (str): Path to the packaged tokenizer file (e.g., 'hsc_tokenizer.pt').
        device (str): Device to load the model on. Options are 'cpu' or 'cuda'.
            Defaults to 'cpu'.
    
    Returns:
        torch.nn.Module: The loaded tokenizer model ready for converting astronomical
            data to tokens.
        
    Example:
        >>> # Load tokenizer for HSC images
        >>> hsc_tokenizer = load_tokenizer('tokenizers/hsc_tokenizer.pt', device='cuda')
        >>> 
        >>> # Convert HSC images to tokens
        >>> hsc_tokens = hsc_tokenizer.encode(hsc_images)
        >>> 
        >>> # Load tokenizer for DESI spectra  
        >>> desi_tokenizer = load_tokenizer('tokenizers/desi_tokenizer.pt')
        >>> desi_tokens = desi_tokenizer.encode(spectra)
    """
    importer = package.PackageImporter(path)
    model = importer.load_pickle("network", "network.pkl", map_location=device)
    return model