$schema: https://azuremlschemas.azureedge.net/latest/commandJob.schema.json
code: ../../../../
command: >-
  python -m pip install -e . && python -m torch.distributed.run --nproc_per_node 8 examples/nlg-reddit/sample-level-dp/fine-tune-dp.py \
    --output_dir outputs \
    --model_name gpt2 \
    --sequence_len 128 \
    --per_device_train_batch_size 128 \
    --gradient_accumulation_steps 1 \
    --evaluation_strategy steps \
    --eval_steps 128 \
    --log_level info \
    --per_device_eval_batch_size 64 \
    --eval_accumulation_steps 1 \
    --seed 42 \
    --target_epsilon 8 \
    --target_delta 2e-6 \
    --per_sample_max_grad_norm 1.0 \
    --prediction_loss_only \
    --weight_decay 0.01 \
    --remove_unused_columns False \
    --num_train_epochs 3 \
    --logging_steps 5 \
    --max_grad_norm 0 \
    --lr_scheduler_type constant \
    --learning_rate 1e-4 \
    --disable_tqdm True \
    --dataloader_num_workers 2 \
    --lora_dim 4 \
    --lora_alpha 32 \
    --lora_dropout 0.0 \
    --enable_lora \
    --label_names labels
environment:
  image: mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.8-cudnn8-ubuntu22.04
  conda_file: ../environment.yml
compute: azureml:ND40rsv2
display_name: parameter_efficient_fine_tuning-epsilon_8
experiment_name: dp-transformers-nlg-reddit-sample-level-dp
description: Train a model on the Reddit dataset using differential privacy