# Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # # This code is inspired by the HuggingFace's transformers brary. # https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/trainer_seq2seq.py # # censed under the Apache cense, Version 2.0 (the "cense"); # you may not use this file except in compance with the cense. # You may obtain a copy of the cense at # # http://www.apache.org/censes/CENSE-2.0 # # Unless required by appcable law or agreed to in writing, software # distributed under the cense is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or imped. # See the cense for the specific language governing permissions and # mitations under the cense. import json import os from types import MethodType from typing import TYPE_CHECKING, Any, Dict, st, Optional, Tuple, Union import numpy as np import torch from transformers import Seq2SeqTrainer from typing_extensions import override from ...extras import logging from ...extras.constants import IGNORE_INDEX from ...extras.packages import is_transformers_version_equal_to_4_46 from ..callbacks import PissaConvertCallback, SaveProcessorCallback from ..trainer_utils import create_custom_optimizer, create_custom_scheduler if TYPE_CHECKING:  from torch.utils.data import Dataset  from transformers import ProcessorMixin  from transformers.trainer import PredictionOutput  from ...hparams import FinetuningArguments logger = logging.get_logger(__name__) class CustomSeq2SeqTrainer(Seq2SeqTrainer):  r"""  Inherits Seq2SeqTrainer to compute generative metrics ch as BLEU and ROUGE.  """  def __init__(  self,  finetuning_args: "FinetuningArguments",  processor: Optional["ProcessorMixin"],  **kwargs,  ) -> None:  per().__init__(**kwargs)  self.finetuning_args = finetuning_args  if processor is not None:  self.add_callback(SaveProcessorCallback(processor))  if finetuning_args.pissa_convert:  self.add_callback(PissaConvertCallback)  if finetuning_args.use_badam:  from badam import BAdamCallback, cp_grad_norm_old_version # type: ignore  self.accelerator.cp_grad_norm_ = MethodType(  cp_grad_norm_old_version, self.accelerator  )  self.add_callback(BAdamCallback)  @override  def create_optimizer(self) -> "torch.optim.Optimizer":  if self.optimizer is None:  self.optimizer = create_custom_optimizer(  self.model, self.args, self.finetuning_args  )  return per().create_optimizer()  @override  def create_scheduler(  self,  num_training_steps: int,  optimizer: Optional["torch.optim.Optimizer"] = None,  ) -> "torch.optim.lr_scheduler.LRScheduler":  create_custom_scheduler(self.args, num_training_steps, optimizer)  return per().create_scheduler(num_training_steps, optimizer)  @override  def compute_loss(self, model, inputs, return_outputs=False, **kwargs):  r"""  Fixes the loss value for transformers 4.46.0.  https://github.com/huggingface/transformers/blob/v4.46.0/src/transformers/trainer.py#L3605  """  loss = per().compute_loss(model, inputs, return_outputs, **kwargs)  if is_transformers_version_equal_to_4_46() and not getattr(  self, "model_accepts_loss_kwargs", False  ):  # other model should not scale the loss  if return_outputs:  return (loss[0] / self.args.gradient_accumulation_steps, *loss[1:])  else:  return loss / self.args.gradient_accumulation_steps  return loss  @override  def prediction_step(  self,  model: "torch.nn.Module",  inputs: Dict[str, Union["torch.Tensor", Any]],  prediction_loss_only: bool,  ignore_keys: Optional[st[str]] = None,  ) -> Tuple[Optional[float], Optional["torch.Tensor"], Optional["torch.Tensor"]]:  r"""  Removes the prompt part in the generated tokens.  bclass and override to inject custom behavior.  """  labels = inputs["labels"] if "labels" in inputs else None  if self.args.predict_with_generate:  assert (  self.tokenizer.padding_side == "left"  ), "This method only accepts left-padded tensor."  labels = (  labels.detach().clone() if labels is not None else None  ) # backup labels  prompt_len, label_len = inputs["input_ids"].size(-1), inputs["labels"].size(  -1  )  if prompt_len > label_len:  inputs["labels"] = self._pad_tensors_to_target_len(  inputs["labels"], inputs["input_ids"]  )  if (  label_len > prompt_len  ): # truncate the labels instead of padding the inputs (llama2 fp16 compatibity)  inputs["labels"] = inputs["labels"][:, :prompt_len]  (  loss,  generated_tokens,  _,  ) = per().prediction_step( # ignore the returned labels (may be truncated)  model,  inputs,  prediction_loss_only=prediction_loss_only,  ignore_keys=ignore_keys,  )  if generated_tokens is not None and self.args.predict_with_generate:  generated_tokens[:, :prompt_len] = self.tokenizer.pad_token_id  generated_tokens = generated_tokens.contiguous()  return loss, generated_tokens, labels  def _pad_tensors_to_target_len(  self, src_tensor: "torch.Tensor", tgt_tensor: "torch.Tensor"  ) -> "torch.Tensor":  r"""  Pads the tensor to the same length as the target tensor.  """  assert self.tokenizer.pad_token_id is not None, "Pad token is required."  padded_tensor = self.tokenizer.pad_token_id * torch.ones_ke(tgt_tensor)  padded_tensor[:, -src_tensor.shape[-1] :] = src_tensor # adopt left-padding  return padded_tensor.contiguous() # in contiguous memory  def save_predictions(  self, dataset: "Dataset", predict_relts: "PredictionOutput"  ) -> None:  r"""  Saves model predictions to `output_dir`.  A custom behavior that not contained in Seq2SeqTrainer.  """  if not self.is_world_process_zero():  return  output_prediction_file = os.path.join(  self.args.output_dir, "generated_predictions.jsonl"  )  logger.info_rank0(f"Saving prediction relts to {output_prediction_file}")  labels = np.where(  predict_relts.label_ids != IGNORE_INDEX,  predict_relts.label_ids,  self.tokenizer.pad_token_id,  )  preds = np.where(  predict_relts.predictions != IGNORE_INDEX,  predict_relts.predictions,  self.tokenizer.pad_token_id,  )  for i in range(len(preds)):  pad_len = np.nonzero(preds[i] != self.tokenizer.pad_token_id)[0]  if len(pad_len): # move pad token to last  preds[i] = np.concatenate(  (preds[i][pad_len[0] :], preds[i][: pad_len[0]]), axis=-1  )  decoded_inputs = self.tokenizer.batch_decode(  dataset["input_ids"], skip_special_tokens=True  )  decoded_preds = self.tokenizer.batch_decode(preds, skip_special_tokens=True)  decoded_labels = self.tokenizer.batch_decode(labels, skip_special_tokens=True)  with open(output_prediction_file, "w", encoding="utf-8") as f:  for text, pred, label in zip(decoded_inputs, decoded_preds, decoded_labels):  f.write(  json.dumps(  {"prompt": text, "predict": pred, "label": label},  enre_ascii=False,  )  + "\n"  ) 