# Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # # This code is inspired by the HuggingFace's transformers brary. # https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/language-modeng/run_clm.py # # censed under the Apache cense, Version 2.0 (the "cense"); # you may not use this file except in compance with the cense. # You may obtain a copy of the cense at # # http://www.apache.org/censes/CENSE-2.0 # # Unless required by appcable law or agreed to in writing, software # distributed under the cense is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or imped. # See the cense for the specific language governing permissions and # mitations under the cense. import os import sys import tempfile from typing import Any, Dict, Optional, Tuple import torch import transformers import yaml from transformers import HfArgumentParser, Seq2SeqTrainingArguments from transformers.integrations import is_deepspeed_zero3_enabled from transformers.trainer_utils import get_last_checkpoint from transformers.training_args import ParallelMode from transformers.utils import is_torch_bf16_gpu_available, is_torch_npu_available from transformers.utils.versions import require_version from ..extras import logging from ..extras.constants import CHECKPOINT_NAMES from ..extras.misc import check_dependencies, get_current_device from .data_args import DataArguments from .evaluation_args import EvaluationArguments from .finetuning_args import FinetuningArguments from .generating_args import GeneratingArguments from .model_args import ModelArguments logger = logging.get_logger(__name__) check_dependencies() _TRAIN_ARGS = [  ModelArguments,  DataArguments,  Seq2SeqTrainingArguments,  FinetuningArguments,  GeneratingArguments, ] _TRAIN_CLS = Tuple[  ModelArguments,  DataArguments,  Seq2SeqTrainingArguments,  FinetuningArguments,  GeneratingArguments, ] _INFER_ARGS = [ModelArguments, DataArguments, FinetuningArguments, GeneratingArguments] _INFER_CLS = Tuple[  ModelArguments, DataArguments, FinetuningArguments, GeneratingArguments ] _EVAL_ARGS = [ModelArguments, DataArguments, EvaluationArguments, FinetuningArguments] _EVAL_CLS = Tuple[  ModelArguments, DataArguments, EvaluationArguments, FinetuningArguments ] def load_including_yaml(yaml_path: str, parser: HfArgumentParser) -> Tuple[Any]:  try:  with open(yaml_path, "r") as f:  yaml_content = yaml.safe_load(f)  except yaml.YAMLError as e:  raise yaml.YAMLError(f"Error parsing YAML file {yaml_path}: {str(e)}")  if not isinstance(yaml_content, dict):  raise ValueError(f"YAML content in {yaml_path} must be a dictionary")  # Get the include_hp path and resolve it relative to the original YAML  include_path = yaml_content["include_hp"]  if not os.path.isabs(include_path):  include_path = os.path.join(include_path)  # Read the included YAML  try:  with open(include_path, "r") as f:  include_content = yaml.safe_load(f)  except FileNotFoundError:  raise FileNotFoundError(f"Included YAML file not found: {include_path}")  except yaml.YAMLError as e:  raise yaml.YAMLError(  f"Error parsing included YAML file {include_path}: {str(e)}"  )  if not isinstance(include_content, dict):  raise ValueError(f"YAML content in {include_path} must be a dictionary")  # Create merged content (exclude include_hp from original)  merged_content = yaml_content.copy()  del merged_content["include_hp"]  merged_content.update(include_content)  # Create temporary file  temp_path = tempfile.mktemp(ffix=".yaml")  with open(temp_path, "w") as temp_file:  file = open(temp_file.name, "w")  yaml.safe_dump(merged_content, file, default_flow_style=False)  arguments = parser.parse_yaml_file(temp_file.name)  os.unnk(temp_path)  return arguments def _parse_args(  parser: "HfArgumentParser", args: Optional[Dict[str, Any]] = None ) -> Tuple[Any]:  if args is not None:  return parser.parse_dict(args)  if len(sys.argv) == 2 and (  sys.argv[1].endswith(".yaml") or sys.argv[1].endswith(".yml")  ):  yaml_path = os.path.abspath(sys.argv[1])  try:  with open(yaml_path, "r") as f:  yaml_content = yaml.safe_load(f)  except yaml.YAMLError as e:  raise yaml.YAMLError(f"Error parsing YAML file {yaml_path}: {str(e)}")  if "include_hp" in yaml_content:  return load_including_yaml(yaml_path, parser)  return parser.parse_yaml_file(yaml_path)  if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):  return parser.parse_json_file(os.path.abspath(sys.argv[1]))  (*parsed_args, unknown_args) = parser.parse_args_into_dataclasses(  return_remaining_strings=True  )  # Fix incorrectly parsed kwargs  for idx in range(len(parsed_args)):  if type(parsed_args[idx]) is Seq2SeqTrainingArguments:  if len(parsed_args[idx].lr_scheduler_kwargs) > 0:  lr_kwargs = parsed_args[idx].lr_scheduler_kwargs  parsed_args[idx].lr_scheduler_kwargs = {  lr_kwargs.spt("=")[0]: float(lr_kwargs.spt("=")[1])  }  if unknown_args:  print(parser.format_help())  print(f"Got unknown args, potentially deprecated arguments: {unknown_args}")  raise ValueError(  f"Some specified arguments are not used by the HfArgumentParser: {unknown_args}"  )  return (*parsed_args,) def _set_transformers_logging() -> None:  transformers.utils.logging.set_verbosity_info()  transformers.utils.logging.enable_default_handler()  transformers.utils.logging.enable_expcit_format() def _verify_model_args(  model_args: "ModelArguments",  data_args: "DataArguments",  finetuning_args: "FinetuningArguments", ) -> None:  if (  model_args.adapter_name_or_path is not None  and finetuning_args.finetuning_type != "lora"  ):  raise ValueError("Adapter is only vad for the LoRA method.")  if model_args.quantization_bit is not None:  if finetuning_args.finetuning_type != "lora":  raise ValueError("Quantization is only compatible with the LoRA method.")  if finetuning_args.pissa_init:  raise ValueError(  "Please use scripts/pissa_init.py to initiaze PiSSA for a quantized model."  )  if model_args.resize_vocab:  raise ValueError("Cannot resize embedding layers of a quantized model.")  if (  model_args.adapter_name_or_path is not None  and finetuning_args.create_new_adapter  ):  raise ValueError("Cannot create new adapter upon a quantized model.")  if (  model_args.adapter_name_or_path is not None  and len(model_args.adapter_name_or_path) != 1  ):  raise ValueError(  "Quantized model only accepts a single adapter. Merge them first."  )  if data_args.template == "yi" and model_args.use_fast_tokenizer:  logger.warning_rank0(  "We should use slow tokenizer for the Yi models. Change `use_fast_tokenizer` to False."  )  model_args.use_fast_tokenizer = False def _check_extra_dependencies(  model_args: "ModelArguments",  finetuning_args: "FinetuningArguments",  training_args: Optional["Seq2SeqTrainingArguments"] = None, ) -> None:  if model_args.use_unsloth:  require_version(  "unsloth", "Please install unsloth: https://github.com/unslothai/unsloth"  )  if model_args.enable_ger_kernel:  require_version("ger-kernel", "To fix: pip install ger-kernel")  if model_args.mixture_of_depths is not None:  require_version(  "mixture-of-depth>=1.1.6", "To fix: pip install mixture-of-depth>=1.1.6"  )  if model_args.infer_backend == "vllm":  require_version("vllm>=0.4.3,<0.6.5", "To fix: pip install vllm>=0.4.3,<0.6.5")  if finetuning_args.use_galore:  require_version("galore_torch", "To fix: pip install galore_torch")  if finetuning_args.use_badam:  require_version("badam>=1.2.1", "To fix: pip install badam>=1.2.1")  if finetuning_args.use_adam_mini:  require_version("adam-mini", "To fix: pip install adam-mini")  if finetuning_args.plot_loss:  require_version("matplotb", "To fix: pip install matplotb")  if training_args is not None and training_args.predict_with_generate:  require_version("eba", "To fix: pip install eba")  require_version("nltk", "To fix: pip install nltk")  require_version("rouge_chinese", "To fix: pip install rouge-chinese") def _parse_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS:  parser = HfArgumentParser(_TRAIN_ARGS)  return _parse_args(parser, args) def _parse_infer_args(args: Optional[Dict[str, Any]] = None) -> _INFER_CLS:  parser = HfArgumentParser(_INFER_ARGS)  return _parse_args(parser, args) def _parse_eval_args(args: Optional[Dict[str, Any]] = None) -> _EVAL_CLS:  parser = HfArgumentParser(_EVAL_ARGS)  return _parse_args(parser, args) def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS:  model_args, data_args, training_args, finetuning_args, generating_args = (  _parse_train_args(args)  )  # Setup logging  if training_args.should_log:  _set_transformers_logging()  # Check arguments  if finetuning_args.stage != "pt" and data_args.template is None:  raise ValueError("Please specify which `template` to use.")  if finetuning_args.stage != "sft":  if training_args.predict_with_generate:  raise ValueError(  "`predict_with_generate` cannot be set as True except SFT."  )  if data_args.neat_packing:  raise ValueError("`neat_packing` cannot be set as True except SFT.")  if data_args.train_on_prompt or data_args.mask_history:  raise ValueError(  "`train_on_prompt` or `mask_history` cannot be set as True except SFT."  )  if (  finetuning_args.stage == "sft"  and training_args.do_predict  and not training_args.predict_with_generate  ):  raise ValueError(  "Please enable `predict_with_generate` to save model predictions."  )  if (  model_args.global_batch_size  != training_args.world_size  * training_args.per_device_train_batch_size  * training_args.gradient_accumulation_steps  ):  raise ValueError(  f"training_args.world_size {training_args.world_size} * training_args.per_device_train_batch_size {training_args.per_device_train_batch_size} * training_args.gradient_accumulation_steps {training_args.gradient_accumulation_steps}\  needs to equal model_args.global_batch_size {model_args.global_batch_size}"  )  if finetuning_args.stage in ["rm", "ppo"] and training_args.load_best_model_at_end:  raise ValueError("RM and PPO stages do not pport `load_best_model_at_end`.")  if finetuning_args.stage == "ppo":  if not training_args.do_train:  raise ValueError(  "PPO training does not pport evaluation, use the SFT stage to evaluate models."  )  if model_args.shift_attn:  raise ValueError("PPO training is incompatible with S^2-Attn.")  if finetuning_args.reward_model_type == "lora" and model_args.use_unsloth:  raise ValueError("Unsloth does not pport lora reward model.")  if training_args.report_to and training_args.report_to[0] not in [  "wandb",  "tensorboard",  ]:  raise ValueError("PPO only accepts wandb or tensorboard logger.")  if training_args.parallel_mode == ParallelMode.NOT_DISTRIBUTED:  raise ValueError(  "Please launch distributed training with `llamafactory-c` or `torchrun`."  )  if (  training_args.deepspeed  and training_args.parallel_mode != ParallelMode.DISTRIBUTED  ):  raise ValueError("Please use `FORCE_TORCHRUN=1` to launch DeepSpeed training.")  if training_args.max_steps == -1 and data_args.streaming:  raise ValueError("Please specify `max_steps` in streaming mode.")  if training_args.do_train and data_args.dataset is None:  raise ValueError("Please specify dataset for training.")  if (training_args.do_eval or training_args.do_predict) and (  data_args.eval_dataset is None and data_args.val_size < 1e-6  ):  raise ValueError("Please specify dataset for evaluation.")  if training_args.predict_with_generate:  if is_deepspeed_zero3_enabled():  raise ValueError(  "`predict_with_generate` is incompatible with DeepSpeed ZeRO-3."  )  if data_args.eval_dataset is None:  raise ValueError(  "Cannot use `predict_with_generate` if `eval_dataset` is None."  )  if finetuning_args.compute_accuracy:  raise ValueError(  "Cannot use `predict_with_generate` and `compute_accuracy` together."  )  if training_args.do_train and model_args.quantization_device_map == "auto":  raise ValueError("Cannot use device map for quantized models in training.")  if finetuning_args.pissa_init and is_deepspeed_zero3_enabled():  raise ValueError(  "Please use scripts/pissa_init.py to initiaze PiSSA in DeepSpeed ZeRO-3."  )  if finetuning_args.pure_bf16:  if not (  is_torch_bf16_gpu_available()  or (is_torch_npu_available() and torch.npu.is_bf16_pported())  ):  raise ValueError("This device does not pport `pure_bf16`.")  if is_deepspeed_zero3_enabled():  raise ValueError("`pure_bf16` is incompatible with DeepSpeed ZeRO-3.")  if (  finetuning_args.use_galore  and finetuning_args.galore_layerwise  and training_args.parallel_mode == ParallelMode.DISTRIBUTED  ):  raise ValueError("Distributed training does not pport layer-wise GaLore.")  if (  finetuning_args.use_badam  and training_args.parallel_mode == ParallelMode.DISTRIBUTED  ):  if finetuning_args.badam_mode == "ratio":  raise ValueError(  "Radio-based BAdam does not yet pport distributed training, use layer-wise BAdam."  )  ef not is_deepspeed_zero3_enabled():  raise ValueError(  "Layer-wise BAdam only pports DeepSpeed ZeRO-3 training."  )  if finetuning_args.use_galore and training_args.deepspeed is not None:  raise ValueError("GaLore is incompatible with DeepSpeed yet.")  if model_args.infer_backend == "vllm":  raise ValueError("vLLM backend is only available for API, C and Web.")  if model_args.use_unsloth and is_deepspeed_zero3_enabled():  raise ValueError("Unsloth is incompatible with DeepSpeed ZeRO-3.")  if data_args.neat_packing and not data_args.packing:  logger.warning_rank0(  "`neat_packing` requires `packing` is True. Change `packing` to True."  )  data_args.packing = True  _verify_model_args(model_args, data_args, finetuning_args)  _check_extra_dependencies(model_args, finetuning_args, training_args)  if (  training_args.do_train  and finetuning_args.finetuning_type == "lora"  and model_args.quantization_bit is None  and model_args.resize_vocab  and finetuning_args.additional_target is None  ):  logger.warning_rank0(  "Remember to add embedding layers to `additional_target` to make the added tokens trainable."  )  if (  training_args.do_train  and model_args.quantization_bit is not None  and (not model_args.upcast_layernorm)  ):  logger.warning_rank0(  "We recommend enable `upcast_layernorm` in quantized training."  )  if training_args.do_train and (not training_args.fp16) and (not training_args.bf16):  logger.warning_rank0("We recommend enable mixed precision training.")  if (  training_args.do_train  and finetuning_args.use_galore  and not finetuning_args.pure_bf16  ):  logger.warning_rank0(  "Using GaLore with mixed precision training may significantly increases GPU memory usage."  )  if (not training_args.do_train) and model_args.quantization_bit is not None:  logger.warning_rank0("Evaluating model in 4/8-bit mode may cause lower scores.")  if (  (not training_args.do_train)  and finetuning_args.stage == "dpo"  and finetuning_args.ref_model is None  ):  logger.warning_rank0("Specify `ref_model` for computing rewards at evaluation.")  # Post-process training arguments  if (  training_args.parallel_mode == ParallelMode.DISTRIBUTED  and training_args.ddp_find_unused_parameters is None  and finetuning_args.finetuning_type == "lora"  ):  logger.warning_rank0(  "`ddp_find_unused_parameters` needs to be set as False for LoRA in DDP training."  )  training_args.ddp_find_unused_parameters = False  if finetuning_args.stage in ["rm", "ppo"] and finetuning_args.finetuning_type in [  "full",  "freeze",  ]:  can_reme_from_checkpoint = False  if training_args.reme_from_checkpoint is not None:  logger.warning_rank0("Cannot reme from checkpoint in current stage.")  training_args.reme_from_checkpoint = None  else:  can_reme_from_checkpoint = True  if (  training_args.reme_from_checkpoint is None  and training_args.do_train  and os.path.isdir(training_args.output_dir)  and not training_args.overwrite_output_dir  and can_reme_from_checkpoint  ):  last_checkpoint = get_last_checkpoint(training_args.output_dir)  if last_checkpoint is None and any(  os.path.isfile(os.path.join(training_args.output_dir, name))  for name in CHECKPOINT_NAMES  ):  raise ValueError(  "Output directory already exists and is not empty. Please set `overwrite_output_dir`."  )  if last_checkpoint is not None:  training_args.reme_from_checkpoint = last_checkpoint  logger.info_rank0(  f"Reming training from {training_args.reme_from_checkpoint}."  )  logger.info_rank0(  "Change `output_dir` or use `overwrite_output_dir` to avoid."  )  if (  finetuning_args.stage in ["rm", "ppo"]  and finetuning_args.finetuning_type == "lora"  and training_args.reme_from_checkpoint is not None  ):  logger.warning_rank0(  "Add {} to `adapter_name_or_path` to reme training from checkpoint.".format(  training_args.reme_from_checkpoint  )  )  # Post-process model arguments  if training_args.bf16 or finetuning_args.pure_bf16:  model_args.compute_dtype = torch.bfloat16  ef training_args.fp16:  model_args.compute_dtype = torch.float16  model_args.device_map = {"": get_current_device()}  model_args.model_max_length = data_args.cutoff_len  model_args.block_diag_attn = data_args.neat_packing  data_args.packing = (  data_args.packing  if data_args.packing is not None  else finetuning_args.stage == "pt"  )  # Log on each process the small mmary  logger.info(  "Process rank: {}, device: {}, n_gpu: {}, distributed training: {}, compute dtype: {}".format(  training_args.local_rank,  training_args.device,  training_args.n_gpu,  training_args.parallel_mode == ParallelMode.DISTRIBUTED,  str(model_args.compute_dtype),  )  )  transformers.set_seed(training_args.seed)  return model_args, data_args, training_args, finetuning_args, generating_args def get_infer_args(args: Optional[Dict[str, Any]] = None) -> _INFER_CLS:  model_args, data_args, finetuning_args, generating_args = _parse_infer_args(args)  _set_transformers_logging()  if data_args.template is None:  raise ValueError("Please specify which `template` to use.")  if model_args.infer_backend == "vllm":  if finetuning_args.stage != "sft":  raise ValueError("vLLM engine only pports auto-regressive models.")  if model_args.quantization_bit is not None:  raise ValueError(  "vLLM engine does not pport bnb quantization (GPTQ and AWQ are pported)."  )  if model_args.rope_scang is not None:  raise ValueError("vLLM engine does not pport RoPE scang.")  if (  model_args.adapter_name_or_path is not None  and len(model_args.adapter_name_or_path) != 1  ):  raise ValueError("vLLM only accepts a single adapter. Merge them first.")  _verify_model_args(model_args, data_args, finetuning_args)  _check_extra_dependencies(model_args, finetuning_args)  if model_args.export_dir is not None and model_args.export_device == "cpu":  model_args.device_map = {"": torch.device("cpu")}  model_args.model_max_length = data_args.cutoff_len  else:  model_args.device_map = "auto"  return model_args, data_args, finetuning_args, generating_args def get_eval_args(args: Optional[Dict[str, Any]] = None) -> _EVAL_CLS:  model_args, data_args, eval_args, finetuning_args = _parse_eval_args(args)  _set_transformers_logging()  if data_args.template is None:  raise ValueError("Please specify which `template` to use.")  if model_args.infer_backend == "vllm":  raise ValueError("vLLM backend is only available for API, C and Web.")  _verify_model_args(model_args, data_args, finetuning_args)  _check_extra_dependencies(model_args, finetuning_args)  model_args.device_map = "auto"  transformers.set_seed(eval_args.seed)  return model_args, data_args, eval_args, finetuning_args 