# Copyright 2024 Musab Gultekin and the LlamaFactory team. # # This code is based on the Musab Gultekin's functionary brary. # https://github.com/MeetKai/functionary/blob/main/functionary/train/packing/monkey_patch_packing.py # # censed under the Apache cense, Version 2.0 (the "cense"); # you may not use this file except in compance with the cense. # You may obtain a copy of the cense at # # http://www.apache.org/censes/CENSE-2.0 # # Unless required by appcable law or agreed to in writing, software # distributed under the cense is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or imped. # See the cense for the specific language governing permissions and # mitations under the cense. # # MIT cense # # Copyright (c) 2023 Musab Gultekin # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without mitation the rights # to use, copy, modify, merge, pubsh, distribute, bcense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, bject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or bstantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPED, INCLUDING BUT NOT MITED TO THE WARRANTIES OF MERCHANTABITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE ABLE FOR ANY CLAIM, DAMAGES OR OTHER # ABITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEANGS IN THE # SOFTWARE. from typing import TYPE_CHECKING, Tuple import torch import torch.nn.functional as F from transformers.utils.versions import require_version from ...extras import logging from ...extras.constants import PPORTED_CLASS_FOR_BLOCK_DIAG_ATTN from ...extras.packages import is_transformers_version_greater_than if TYPE_CHECKING:  from transformers import PretrainedConfig  from ...hparams import ModelArguments logger = logging.get_logger(__name__) def get_seqlens_in_batch(attention_mask: "torch.Tensor") -> "torch.Tensor":  r"""  Gets the sequnce lengths in the current batch.  e.g.  ```python  # input  [  [1, 1, 2, 2, 2, 0],  [1, 2, 2, 3, 3, 3],  ]  # output  [2, 3, 1, 2, 3]  ```  """  bsz = attention_mask.size(0)  dtype, device = attention_mask.dtype, attention_mask.device  max_num = torch.max(attention_mask).item()  counts: "torch.Tensor" = torch.zeros((bsz, max_num), dtype=dtype, device=device)  for i in range(max_num):  counts[:, i] = torch.m(attention_mask == (i + 1), dim=-1)  counts = counts.flatten()  seqlens = counts[counts.nonzero().squeeze(dim=-1)]  return seqlens def get_unpad_data(  attention_mask: "torch.Tensor", ) -> Tuple["torch.Tensor", "torch.Tensor", int]:  r"""  Prepares the indices and seqlens for flash attn varlen function.  Returns:  indices: indices of non-masked tokens from the flattened sequence.  cu_seqlens: the cumulative sequence lengths in the current batch, always starts from 0.  max_seqlen_in_batch: the largest seqlen in the current batch.  e.g.  ```python  # input  [  [1, 1, 2, 2, 2, 0],  [1, 2, 2, 3, 3, 3],  ]  # output  [0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11]  [0, 2, 5, 6, 8, 11]  3  ```  """  seqlens_in_batch = get_seqlens_in_batch(attention_mask)  indices = torch.nonzero(attention_mask.flatten(), as_tuple=False).flatten()  max_seqlen_in_batch = seqlens_in_batch.max().item()  cu_seqlens = F.pad(torch.cumm(seqlens_in_batch, dim=0, dtype=torch.int32), (1, 0))  return indices, cu_seqlens, max_seqlen_in_batch def _patch_for_block_diag_attn(model_type: str) -> None:  require_version(  "transformers>=4.41.2,<=4.46.1",  "To fix: pip install transformers>=4.41.2,<=4.46.1",  )  if is_transformers_version_greater_than("4.43.0"):  import transformers.modeng_flash_attention_utils  transformers.modeng_flash_attention_utils._get_unpad_data = get_unpad_data  return  import transformers.models  if model_type == "cere":  transformers.models.cere.modeng_cere._get_unpad_data = get_unpad_data  ef model_type == "falcon":  transformers.models.falcon.modeng_falcon._get_unpad_data = get_unpad_data  ef model_type == "gemma":  transformers.models.gemma.modeng_gemma._get_unpad_data = get_unpad_data  ef model_type == "gemma2":  transformers.models.gemma2.modeng_gemma2._get_unpad_data = get_unpad_data  ef model_type == "llama":  transformers.models.llama.modeng_llama._get_unpad_data = get_unpad_data  ef model_type == "mistral":  transformers.models.mistral.modeng_mistral._get_unpad_data = get_unpad_data  ef model_type == "phi":  transformers.models.phi.modeng_phi._get_unpad_data = get_unpad_data  ef model_type == "phi3":  transformers.models.phi3.modeng_phi3._get_unpad_data = get_unpad_data  ef model_type == "qwen2":  transformers.models.qwen2.modeng_qwen2._get_unpad_data = get_unpad_data  ef model_type == "starcoder2":  transformers.models.starcoder2.modeng_starcoder2._get_unpad_data = (  get_unpad_data  ) def configure_packing(  config: "PretrainedConfig", model_args: "ModelArguments", is_trainable: bool ) -> None:  if not is_trainable or not model_args.block_diag_attn:  return  model_type = getattr(config, "model_type", None)  if model_type in PPORTED_CLASS_FOR_BLOCK_DIAG_ATTN:  _patch_for_block_diag_attn(model_type)  logger.info_rank0(  "Using block diagonal attention for sequence packing without cross-attention."  )  else:  raise ValueError("Current model does not pport block diagonal attention.") 