---
DataConfig:
  dataset: "VisDA2017"

ModelConfig:
  backbone: "resnet101"
  channels_per_group: 0
  pretrain_strategy: "shot"
  pretrain_beta: 0.1
  bottleneck_dim: 256

TrainingConfig:
  batch_size: 64
  # The total data numbers we use in each epoch
  epoch_samples: 56000
  total_epochs: 60
  # optimizer
  optimizer: "SGD"
  momentum: 0.9
  # regularization
  weight_decay: 0.0005
  # We decay learning rate from begin value to end value with cosine annealing schedule
  learning_rate_begin: 0.0005
  learning_rate_end: 0.00006
  # We use EMA to retain the accuracy in source domain
  ema: False

DAConfig:
  source_domain: "Synthetic"
  target_domain: "Real"
  method: "DaC"

DataAugConfig:
  method: "identity" # method can be "identity", "edgemix"

# number of gpu >= 2
DaCConfig:
  temperature: 0.05     # temperature for scaling contrastive loss
  K: 300                # initial number of each class
  k: 5                  # number of nearest neighbors
  momentum: 0.2         # update banks with ema.
  threshold: 0          # the threshold of class filtering in obtain_label()
  confidence_gate: 0.97 # confident gate for split bewteen source-like and target-specific
  # the follwing are coeffient for 4 loss components.
  cls_par: 0.39         # for cross entropy loss
  im_par: 0.1           # for information-maximization loss
  # the two loss above constitute L_{self} in the paper
  con_par: 1.0          # for L_{con}
  mmd_par: 0.3          # for L_{EMMD}
