defaults:
  - workspace: M365Research-PPML-EUS
  - _self_

data_config:
  train_data_name: "SST2-train"
  train_data_version: "5"
  eval_data_name: "SST2-test"
  eval_data_version: "5"
  min_words: 5
  text_column: "sentence"

shared_training_parameters:
  model_path: "azureml:mistralai-Mistral-7B-v0_1:4"
  templated_prompt: "A sentence with a {label} sentiment: "
  text_column: "sentence"
  label_column: "label"
  sequence_len: 256
  learning_rate: 2e-5
  num_train_epochs: 1.0
  per_device_train_batch_size: 2
  gradient_accumulation_steps: 8
  enable_lora: true
  lora_dim: 4
  target_modules: "['embed_tokens', 'q_proj', 'k_proj', 'v_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj', 'lm_head']"
  gradient_checkpointing: true
  torch_dtype: "bf16"
  quantization_4bit: false

shared_inference_parameters:
  per_device_batch_size: 16
  sequence_len: 256

game_config:
  num_models: 1
  num_repetitions: 20
  seed: 1920
  num_concurrent_jobs_per_node: 1
  num_challenge_points_per_model: 1000

shadow_model_config:
  in_fraction: 0.5
  num_models: 4

mi_signal_config:
  method: CrossEntropy
  aggregation: mean
  extra_args:
    temp: 2.0 

rmia_config: {}

canary_config:
  method: external_data
  n_canaries: 1000
  canary_min_words: 5
  canary_text_column: sentence
  canary_data_name: "AmazonPolarity5k-train"
  canary_data_version: "2"
  canary_text_column: "sentence"
  canary_min_words: 100
  num_tokens_to_replace: 0
  replacement_method: "random"
  mlm_name: "roberta-base"