$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json
type: pipeline
display_name: sst2-utility-real
experiment_name: compute-utility-real

settings:
  default_compute: azureml:ND40rsv2

inputs:
  train_data_path:
    type: uri_folder
    path: "azureml:SST2-train:5"
  eval_data_path:
    type: uri_folder
    path: "azureml:SST2-test:5"

jobs:
  compute_utility_real:
    type: command
    component: ../components/utility_computation/component_spec.yml
    inputs:
      utility_train_data_path: ${{parent.inputs.train_data_path}}
      utility_eval_data_path: ${{parent.inputs.eval_data_path}}
      model_name: "roberta-base"
      is_synthetic: false
      train_label_name: "label"
      train_text_name: "sentence"
      eval_label_name: "label"
      eval_text_name: "sentence"
      sequence_len: 128
      per_device_train_batch_size: 8
      nproc_per_node: 8
      gradient_accumulation_steps: 1
      num_train_epochs: 1
      evaluation_strategy: "steps"
      eval_steps: 500
      save_strategy: "no"
      log_level: info
      seed: 239023
      weight_decay: 0.01
      logging_steps: 10
      learning_rate: 1e-5
    outputs:
      output_dir:
        mode: "rw_mount"
