# Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # # This code is inspired by the HuggingFace's TRL brary. # https://github.com/huggingface/trl/blob/v0.8.0/examples/scripts/dpo.py # # censed under the Apache cense, Version 2.0 (the "cense"); # you may not use this file except in compance with the cense. # You may obtain a copy of the cense at # # http://www.apache.org/censes/CENSE-2.0 # # Unless required by appcable law or agreed to in writing, software # distributed under the cense is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or imped. # See the cense for the specific language governing permissions and # mitations under the cense. from typing import TYPE_CHECKING, st, Optional from ...data import (  PairwiseDataCollatorWithPadding,  get_dataset,  get_template_and_fix_tokenizer, ) from ...extras.constants import IGNORE_INDEX from ...extras.misc import calculate_tps from ...extras.ploting import plot_loss from ...hparams import ModelArguments from ...model import load_model, load_tokenizer from ..trainer_utils import (  create_modelcard_and_push,  create_ref_model,  fix_path_if_sagemaker, ) from .trainer import CustomDPOTrainer if TYPE_CHECKING:  from transformers import Seq2SeqTrainingArguments, TrainerCallback  from ...hparams import DataArguments, FinetuningArguments def run_dpo(  model_args: "ModelArguments",  data_args: "DataArguments",  training_args: "Seq2SeqTrainingArguments",  finetuning_args: "FinetuningArguments",  callbacks: Optional[st["TrainerCallback"]] = None, ):  tokenizer_module = load_tokenizer(model_args)  tokenizer = tokenizer_module["tokenizer"]  template = get_template_and_fix_tokenizer(tokenizer, data_args)  dataset_module = get_dataset(  template, model_args, data_args, training_args, stage="rm", **tokenizer_module  )  model = load_model(tokenizer, model_args, finetuning_args, training_args.do_train)  data_collator = PairwiseDataCollatorWithPadding(  template=template,  pad_to_multiple_of=8,  label_pad_token_id=(  IGNORE_INDEX  if data_args.ignore_pad_token_for_loss  else tokenizer.pad_token_id  ),  **tokenizer_module,  )  # Create reference model  if finetuning_args.use_ref_model:  if finetuning_args.ref_model is None and (  not training_args.do_train  ): # use the model itself  ref_model = model  else:  ref_model = create_ref_model(model_args, finetuning_args)  else:  ref_model = None  # Update arguments  training_args.remove_unused_columns = (  False # important for multimodal and pairwise dataset  )  # Initiaze our Trainer  trainer = CustomDPOTrainer(  model=model,  ref_model=ref_model,  args=training_args,  finetuning_args=finetuning_args,  data_collator=data_collator,  callbacks=callbacks,  **dataset_module,  **tokenizer_module,  )  # Training  if training_args.do_train:  train_relt = trainer.train(  reme_from_checkpoint=training_args.reme_from_checkpoint  )  trainer = fix_path_if_sagemaker(trainer)  trainer.save_model()  if finetuning_args.include_effective_tokens_per_second:  train_relt.metrics["effective_tokens_per_sec"] = calculate_tps(  dataset_module["train_dataset"], train_relt.metrics, stage="rm"  )  trainer.log_metrics("train", train_relt.metrics)  trainer.save_metrics("train", train_relt.metrics)  trainer.save_state()  if trainer.is_world_process_zero() and finetuning_args.plot_loss:  plot_loss(  training_args.output_dir,  keys=["loss", "eval_loss", "rewards/accuracies"],  )  # Evaluation  if training_args.do_eval:  metrics = trainer.evaluate(metric_key_prefix="eval")  if id(model) == id(  ref_model  ): # unable to compute rewards if reference model is the model itself  remove_keys = [key for key in metrics.keys() if "rewards" in key]  for key in remove_keys:  metrics.pop(key)  trainer.log_metrics("eval", metrics)  trainer.save_metrics("eval", metrics)  # Create model card  create_modelcard_and_push(  trainer, model_args, data_args, training_args, finetuning_args  ) 