defaults:
  - workspace: M365Research-PPML-EUS
  - _self_

data_config:
  train_data_name: "SST2-train"
  train_data_version: "5"
  eval_data_name: "SST2-test"
  eval_data_version: "5"
  min_words: 5
  text_column: "sentence"

shared_training_parameters:
  model_path: "azureml:mistralai-Mistral-7B-v0_1:4"
  templated_prompt: "A sentence with a {label} sentiment: "
  text_column: "sentence"
  label_column: "label"
  sequence_len: 256
  learning_rate: 2e-5
  num_train_epochs: 1.0
  per_device_train_batch_size: 2
  gradient_accumulation_steps: 8
  enable_lora: true
  lora_dim: 4
  target_modules: "['embed_tokens', 'q_proj', 'k_proj', 'v_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj', 'lm_head']"
  gradient_checkpointing: true
  torch_dtype: "bf16"
  quantization_4bit: false

shared_inference_parameters:
  per_device_batch_size: 16
  sequence_len: 256
  text_column: "sentence"
  label_column: "label"
  templated_prompt: "A sentence with a {label} sentiment: "

game_config:
  num_models: 1
  num_repetitions: 12
  seed: 1920
  num_concurrent_jobs_per_node: 1
  num_challenge_points_per_model: 1000

shadow_model_config:
  in_fraction: 0.5
  num_models: 4

mi_signal_config:
  method: CrossEntropy
  aggregation: expsum
  extra_args:
    temp: 2.0 

rmia_config: {}

canary_config:
  canary_method: "hold_out_original_data"
  n_canaries: 1000
  canary_length: 30 # note that this is different from the other configs, 30 has been determined manually as maximum of in-distribution for sst2
  external_artifact: "none"
  external_artifact_version: "none"
  canary_text_column: "none"
  label_comptability_method: "none"
  seed: 1920
  templated_prompt: "A sentence with a {label} sentiment: "
  min_ppl: 5.0
  max_ppl: 15.0
  min_temperature: 0.8
  max_temperature: 1.2
  prefix_length: 0
  num_tokens_to_replace: 0
  replacement_method: "random"
  mlm_name: "roberta-base"