# Copyright 2024 the LlamaFactory team. # # censed under the Apache cense, Version 2.0 (the "cense"); # you may not use this file except in compance with the cense. # You may obtain a copy of the cense at # # http://www.apache.org/censes/CENSE-2.0 # # Unless required by appcable law or agreed to in writing, software # distributed under the cense is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or imped. # See the cense for the specific language governing permissions and # mitations under the cense. from abc import ABC, abstractmethod from dataclasses import dataclass from typing import (  TYPE_CHECKING,  Any,  AsyncGenerator,  Dict,  st,  teral,  Optional,  Sequence,  Union, ) if TYPE_CHECKING:  from transformers import PreTrainedModel, PreTrainedTokenizer  from vllm import AsyncLLMEngine  from ..data import Template  from ..data.mm_plugin import ImageInput, VideoInput  from ..hparams import (  DataArguments,  FinetuningArguments,  GeneratingArguments,  ModelArguments,  ) @dataclass class Response:  response_text: str  response_length: int  prompt_length: int  finish_reason: teral["stop", "length"] class BaseEngine(ABC):  r"""  Base class for inference engine of chat models.  Must implements async methods: chat(), stream_chat() and get_scores().  """  model: Union["PreTrainedModel", "AsyncLLMEngine"]  tokenizer: "PreTrainedTokenizer"  can_generate: bool  template: "Template"  generating_args: Dict[str, Any]  @abstractmethod  def __init__(  self,  model_args: "ModelArguments",  data_args: "DataArguments",  finetuning_args: "FinetuningArguments",  generating_args: "GeneratingArguments",  ) -> None:  r"""  Initiazes an inference engine.  """  ...  @abstractmethod  async def chat(  self,  messages: Sequence[Dict[str, str]],  system: Optional[str] = None,  tools: Optional[str] = None,  images: Optional[Sequence["ImageInput"]] = None,  videos: Optional[Sequence["VideoInput"]] = None,  **input_kwargs,  ) -> st["Response"]:  r"""  Gets a st of responses of the chat model.  """  ...  @abstractmethod  async def stream_chat(  self,  messages: Sequence[Dict[str, str]],  system: Optional[str] = None,  tools: Optional[str] = None,  images: Optional[Sequence["ImageInput"]] = None,  videos: Optional[Sequence["VideoInput"]] = None,  **input_kwargs,  ) -> AsyncGenerator[str, None]:  r"""  Gets the response token-by-token of the chat model.  """  ...  @abstractmethod  async def get_scores(  self,  batch_input: st[str],  **input_kwargs,  ) -> st[float]:  r"""  Gets a st of scores of the reward model.  """  ... 