$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
type: command

name: dp-transformers-fine-tune-lm
display_name: DP fine-tune LM

description: "Train a Huggingface language model using differential privacy and LoRA."

inputs:
  nproc_per_node:
    type: integer
    description: "Number of processes per node. Usually this should be the number of GPUs per node"
  model_name:
    type: string
    description: "Name of the HF model to use. Mutually exclusive with the option 'model_path'"
    optional: true
  model_path:
    type: uri_folder
    description: "Path to the model to use in HF format. Mutually exclusive with the option 'model_name'"
    optional: true
  train_data_path:
    type: uri_file
    description: "Path to the training data in jsonl format"
  text_column:
    type: string
    description: "Name of the column that contains the text body to train on."
  val_data_path:
    type: uri_file
    description: "Path to the evaluation data in jsonl format."
  sequence_len:
    type: integer
    description: "Maximum token sequence length."
  per_device_train_batch_size:
    type: integer
    description: "Batch size per device for training."
  gradient_accumulation_steps:
    type: integer
    description: "Number of gradient accumulation steps."
  evaluation_strategy:
    type: string
    description: "Evaluation strategy."
    default: steps
  eval_steps:
    type: integer
    description: "Run evaluation every n steps."
    default: 128
  log_level:
    type: string
    description: "Log level."
    default: info
  per_device_eval_batch_size:
    type: integer
    description: "Batch size per device for evaluation."
    default: 64
  seed:
    type: integer
    description: "Random seed."
  target_epsilon:
    type: number
    description: "Target epsilon which is achieved at the end of the training run."
  target_delta:
    type: number
    description: "Target delta which is achieved at the end of the training run."
  per_sample_max_grad_norm:
    type: number
    description: "Max norm of per-sample gradients."
  weight_decay:
    type: number
    description: "Weight decay."
    default: 0.01
  remove_unused_columns:
    type: boolean
    description: "Remove unused columns before passing them to the model."
    default: false
  num_train_epochs:
    type: integer
    description: "Number of training epochs."
  logging_steps:
    type: integer
    description: "Log every x steps."
    default: 5
  max_grad_norm:
    type: number
    description: "Max batch gradient norm."
    default: 0
  lr_scheduler_type:
    type: string
    description: "Learning rate scheduler type."
    default: constant
  learning_rate:
    type: number
    description: "Learning rate."
  dataloader_num_workers:
    type: integer
    description: "Number of workers for data loader."
    default: 2
  label_names:
    type: string
    description: "Label names."
    default: labels
  lora_dim:
    type: integer
    description: "LoRA dimension. (For more details see `r` in https://huggingface.co/docs/peft/main/en/package_reference/tuners#peft.LoraConfig)"
    default: 4
  lora_alpha:
    type: integer
    description: "LoRA alpha. (For more details see `lora_alpha` in https://huggingface.co/docs/peft/main/en/package_reference/tuners#peft.LoraConfig)"
    default: 32
  lora_dropout:
    type: number
    description: "LoRA dropout. (For more details see `lora_dropout` in https://huggingface.co/docs/peft/main/en/package_reference/tuners#peft.LoraConfig)"
    default: 0.0
outputs:
  output_dir:
    type: uri_folder
    description: Output directory
  
code: ./
additional_includes:
  - "../../setup.py"
  - "../../src"
  - "../../README.md"


command: >-
  python -m pip install -e . && python -m torch.distributed.run --nproc_per_node ${{inputs.nproc_per_node}} fine-tune.py \
    --output_dir ${{outputs.output_dir}} \
    $[[ --model_name ${{inputs.model_name}} ]] \
    $[[ --model_name ${{inputs.model_path}} ]] \
    --train_data_path ${{inputs.train_data_path}} \
    --val_data_path ${{inputs.val_data_path}} \
    --text_column ${{inputs.text_column}} \
    --sequence_len ${{inputs.sequence_len}} \
    --per_device_train_batch_size ${{inputs.per_device_train_batch_size}} \
    --gradient_accumulation_steps ${{inputs.gradient_accumulation_steps}} \
    --evaluation_strategy ${{inputs.evaluation_strategy}} \
    --eval_steps ${{inputs.eval_steps}} \
    --log_level ${{inputs.log_level}} \
    --per_device_eval_batch_size ${{inputs.per_device_eval_batch_size}} \
    --eval_accumulation_steps 1 \
    --seed ${{inputs.seed}} \
    --target_epsilon ${{inputs.target_epsilon}} \
    --target_delta ${{inputs.target_delta}} \
    --per_sample_max_grad_norm ${{inputs.per_sample_max_grad_norm}} \
    --prediction_loss_only \
    --weight_decay ${{inputs.weight_decay}} \
    --remove_unused_columns ${{inputs.remove_unused_columns}} \
    --num_train_epochs ${{inputs.num_train_epochs}} \
    --logging_steps ${{inputs.logging_steps}} \
    --max_grad_norm ${{inputs.max_grad_norm}} \
    --lr_scheduler_type ${{inputs.lr_scheduler_type}} \
    --learning_rate ${{inputs.learning_rate}} \
    --disable_tqdm True \
    --dataloader_num_workers ${{inputs.dataloader_num_workers}} \
    --label_names ${{inputs.label_names}} \
    --lora_dim ${{inputs.lora_dim}} \
    --lora_alpha ${{inputs.lora_alpha}} \
    --lora_dropout ${{inputs.lora_dropout}} \
    --enable_lora

environment:
  image: mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.6-cudnn8-ubuntu20.04
  conda_file: ./environment.yml
