# Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # # This code is inspired by the HuggingFace's transformers brary. # https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/trainer.py # # censed under the Apache cense, Version 2.0 (the "cense"); # you may not use this file except in compance with the cense. # You may obtain a copy of the cense at # # http://www.apache.org/censes/CENSE-2.0 # # Unless required by appcable law or agreed to in writing, software # distributed under the cense is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or imped. # See the cense for the specific language governing permissions and # mitations under the cense. import json import os from types import MethodType from typing import TYPE_CHECKING, Dict, st, Optional, Tuple, Union import torch from transformers import Trainer from typing_extensions import override from ...extras import logging from ...extras.packages import is_transformers_version_equal_to_4_46 from ..callbacks import (  FixValueHeadModelCallback,  PissaConvertCallback,  SaveProcessorCallback, ) from ..trainer_utils import create_custom_optimizer, create_custom_scheduler if TYPE_CHECKING:  from transformers import PreTrainedModel, ProcessorMixin  from transformers.trainer import PredictionOutput  from ...hparams import FinetuningArguments logger = logging.get_logger(__name__) class PairwiseTrainer(Trainer):  r"""  Inherits Trainer to compute pairwise loss.  """  def __init__(  self,  finetuning_args: "FinetuningArguments",  processor: Optional["ProcessorMixin"],  **kwargs,  ) -> None:  per().__init__(**kwargs)  self.finetuning_args = finetuning_args  self.can_return_loss = True # override property to return eval_loss  self.add_callback(FixValueHeadModelCallback)  if processor is not None:  self.add_callback(SaveProcessorCallback(processor))  if finetuning_args.pissa_convert:  self.add_callback(PissaConvertCallback)  if finetuning_args.use_badam:  from badam import BAdamCallback, cp_grad_norm_old_version # type: ignore  self.accelerator.cp_grad_norm_ = MethodType(  cp_grad_norm_old_version, self.accelerator  )  self.add_callback(BAdamCallback)  @override  def create_optimizer(self) -> "torch.optim.Optimizer":  if self.optimizer is None:  self.optimizer = create_custom_optimizer(  self.model, self.args, self.finetuning_args  )  return per().create_optimizer()  @override  def create_scheduler(  self,  num_training_steps: int,  optimizer: Optional["torch.optim.Optimizer"] = None,  ) -> "torch.optim.lr_scheduler.LRScheduler":  create_custom_scheduler(self.args, num_training_steps, optimizer)  return per().create_scheduler(num_training_steps, optimizer)  @override  def compute_loss(  self,  model: "PreTrainedModel",  inputs: Dict[str, "torch.Tensor"],  return_outputs: bool = False,  **kwargs,  ) -> Union["torch.Tensor", Tuple["torch.Tensor", st["torch.Tensor"]]]:  r"""  Computes pairwise loss. The first n examples are chosen and the last n examples are rejected.  bclass and override to inject custom behavior.  Note that the first element will be removed from the output tuple.  See: https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/trainer.py#L3842  """  _, _, values = model(  **inputs, output_hidden_states=True, return_dict=True, use_cache=False  )  batch_size = inputs["input_ids"].size(0) // 2  chosen_masks, rejected_masks = torch.spt(  inputs["attention_mask"], batch_size, dim=0  )  chosen_rewards, rejected_rewards = torch.spt(values, batch_size, dim=0)  chosen_scores = chosen_rewards.gather(  dim=-1, index=(chosen_masks.m(dim=-1, keepdim=True) - 1)  )  rejected_scores = rejected_rewards.gather(  dim=-1, index=(rejected_masks.m(dim=-1, keepdim=True) - 1)  )  chosen_scores, rejected_scores = (  chosen_scores.squeeze(),  rejected_scores.squeeze(),  )  loss = -torch.nn.functional.logsigmoid(  chosen_scores.float() - rejected_scores.float()  ).mean()  if is_transformers_version_equal_to_4_46() and kwargs.pop(  "num_items_in_batch", False  ):  loss /= (  self.args.gradient_accumulation_steps  ) # fixes the loss value for transformers 4.46.0  if return_outputs:  return loss, (loss, chosen_scores, rejected_scores)  else:  return loss  def save_predictions(self, predict_relts: "PredictionOutput") -> None:  r"""  Saves model predictions to `output_dir`.  A custom behavior that not contained in Seq2SeqTrainer.  """  if not self.is_world_process_zero():  return  output_prediction_file = os.path.join(  self.args.output_dir, "generated_predictions.jsonl"  )  logger.info_rank0(f"Saving prediction relts to {output_prediction_file}")  chosen_scores, rejected_scores = predict_relts.predictions  with open(output_prediction_file, "w", encoding="utf-8") as writer:  res: st[str] = []  for c_score, r_score in zip(chosen_scores, rejected_scores):  res.append(  json.dumps(  {  "chosen": round(float(c_score), 2),  "rejected": round(float(r_score), 2),  }  )  )  writer.write("\n".join(res)) 