---
seed: 42

dialect_file_folder: ./multimodal-dialectal-bias/data/text/train_val_test/4-1-1/

# tokenizer: openai/clip-vit-large-patch14
# text_encoder: openai/clip-vit-large-patch14
stable_diffusion_model: runwayml/stable-diffusion-v1-5
hf_token: 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

optimizer:  # specify the optimizer and its parameters from torch.optim for training.
  AdamW:
    lr: 0.0001 #1e-6
    betas: [0.9, 0.999]
    eps: 1.0e-08
    weight_decay: 0.0

lr_scheduler: # option to provide a learning rate scheduler from torch.optim.
  # MultiStepLR:
  #   milestones: [1000]
  #   gamma: 0.1
  CosineAnnealingLR:
    T_max: 30

training: # select the training parameters.
  epochs: 40
  loss_weight: 1.0
  num_steps: 100
  clean_batch_size: 32
  polysemy_batch_size: 32
  num_threads: 16
  dataloader_num_workers: 8
  save_path: models
  loss_fkt: SimilarityLoss
  weight_unlearn: 1.0
  weight_sae_reg: 0.0
  weight_dialect_reg: 0.0
  weight_kl_reg: 1.0
  weight_polysemy: 1.0
  add_noise_to_encoder: false
  add_noise_to_embedding: false
  noise_std: 1.0e-3

evaluation: # select parameters for evaluation metrics
  # caption_file: metrics/captions_10000_o.txt
  batch_size: 256
  log_samples: false

rtpt: # state RTPT details. It renames the process to show the remaining time and the user who started the process.
  experiment_name: Dialect Unlearning
  name_initials: SA

wandb: # options for WandB logging.
  enable_logging: true # Set to true to activate the logging.
  args: # arguments for wandb.init call. See https://docs.wandb.ai/ref/python/init for a complete overview.
    project: Dialect Unlearning
    name: dialect_unlearning
    save_code: true

caption_dataset:
  caption_file_path: ./data/mscoco/annotations/captions_val2017.json
  image_folder_path: ./data/generated/images/mscoco_orig
  batch_size: 128
  control_size: 1024
  control_size_eval: 256
  mode: text # [image, text]

clip_model: openai/clip-vit-large-patch14