class BaseVLInference:
    def __init__(self, model_type, model_path, max_new_tokens=1, device="cuda"):
        self.model_type = model_type
        self.model_path = model_path
        self.max_new_tokens = max_new_tokens
        self.device = device

    def load_model_and_processor(self):
        raise NotImplementedError("Subclasses must implement `load_model_and_processor`")

    def run_batch_inference(self, batch):
        raise NotImplementedError("Subclasses must implement `run_batch_inference`")
