Tuners:
  BOHB:
    reduction_factor: 3
    time_attr: "training_iteration"
  HB:
    reduction_factor: 3


Optimizers:
  SGD:
    1param:
      lr: "Uniform(-8, 1, log=True)"
      momentum: 0.0
    2param:
      lr: "Uniform(-8, 1, log=True)"
      momentum: "Uniform(0, 1, log=False)"

  Adam:
    1param:
      lr: "Uniform(-8, 1, log=True)"
      new_beta1: 0.1 #0.1
      new_beta2: 0.05 #0.001
      eps: 1.0e-8
    4param:
      lr: "Uniform(-8, 1, log=True)"
      new_beta1: "Uniform(-4, 0, log=True)"
      new_beta2: "Uniform(-6, 0, log=True)"
      eps: "Uniform(-8, 1, log=True)"

  RAdam:
    1param:
      lr: "Uniform(-8, 1, log=True)"
      new_beta1: 0.1
      new_beta2: 0.001
      eps: 1.0e-8
    4param:
      lr: "Uniform(-8, 1, log=True)"
      new_beta1: "Uniform(-4, 0, log=True)"
      new_beta2: "Uniform(-6, 0, log=True)"
      eps: "Uniform(-8, 1, log=True)"

  Yogi:
    1param:
      lr: "Uniform(-8, 1, log=True)"
      new_beta1: 0.1
      new_beta2: 0.001
      eps: 1.0e-3
    4param:
      lr: "Uniform(-8, 1, log=True)"
      new_beta1: "Uniform(-4, 0, log=True)"
      new_beta2: "Uniform(-6, 0, log=True)"
      eps: "Uniform(-8, 1, log=True)"

  Lookahead:
    1param:
      lr: "Uniform(-8, 1, log=True)"
      new_beta1: 0.1
      new_beta2: 0.001
      eps: 1.0e-8
      k: 5
      alpha: 0.5
    4param:
      lr: "Uniform(-8, 1, log=True)"
      new_beta1: "Uniform(-4, 0, log=True)"
      new_beta2: "Uniform(-6, 0, log=True)"
      eps: "Uniform(-8, 1, log=True)"
      k: 5
      alpha: 0.5

  LAMB:
    1param:
      lr: "Uniform(-8, 1, log=True)"
      new_beta1: 0.1
      new_beta2: 0.001
      eps: 1.0e-6
    4param:
      lr: "Uniform(-8, 1, log=True)"
      new_beta1: "Uniform(-4, 0, log=True)"
      new_beta2: "Uniform(-6, 0, log=True)"
      eps: "Uniform(-8, 1, log=True)"

  LARS:
    1param:
      lr: "Uniform(-8, 1, log=True)"
      momentum: 0.9
      eps: 1.0e-8
    3param:
      lr: "Uniform(-8, 1, log=True)"
      momentum: "Uniform(0, 1, log=False)"
      eps: "Uniform(-8, 1, log=True)"



LR_scheduler:
  nowarmup_nodecay:
    warmup: 0
    decay_rate: 1
  nowarmup_decay:
    warmup: 0
    decay_rate: "Uniform(-4, 0, log=True)"
  warmup_decay:
    warmup: "Uniform(-1, 0, log=True)"  # optimal: 0.5
    decay_rate: "Uniform(-4, 0, log=True)"  # optimal: 0.01
    # warmup: "Uniform(1.0e-6, 1.0, log=False)"
    # decay_rate: "Uniform(-4, 0, log=True)"


Tasks:
  default:
    # Default values, which will be overwitten by specific tasks
    ncpu: 4
    ngpu: 0
    metric: "mean_accuracy"
    mode: "max"
    max_t: 100
    num_config: 62
    batch_size: 1
    data_root: none
    save_root: "./checkpoint"
    checkpoint_freq: 50

  CIFAR10:
    ngpu: 1
    max_t: 200
    num_config: 172
    batch_size: 128
    data_root: "~/data"
    num_class: 10

  CIFAR100:
    ngpu: 1
    max_t: 200
    num_config: 172
    batch_size: 128
    data_root: "~/data"
    num_class: 100

  VAE:
    ngpu: 0.5
    max_t: 50
    num_config: 62
    batch_size: 144
    data_root: "~/data"
    checkpoint_freq: 10
    mode: "min"
    metric: "mean_loss"

  GAN:
    ngpu: 1
    max_t: 200
    num_config: 172
    gen_batch_size: 128
    dis_batch_size: 64
    num_workers: 8
    latent_dim: 128
    img_size: 32
    channels: 3
    n_critic: 5
    d_spectral_norm: True
    g_spectral_norm: False
    dataset: 'cifar10'
    data_path: '~/data'
    init_type: 'xavier_uniform'
    gf_dim: 256
    df_dim: 128
    inception_dim: 2048
    eval_batch_size: 50
    num_eval_imgs: 10000
    bottom_width: 4
    mode: "min"
    metric: "fid"


  Glue-RTE:
    ngpu: 1
    max_t: 10
    num_config: 200
    batch_size: 4
    data_root: "./RTE-bin"

  Glue-MRPC:
    ngpu: 1
    max_t: 10
    num_config: 200
    batch_size: 4
    data_root: "./MRPC-bin"

  GCN:
    ngpu: 1
    max_t: 100
    num_config: 200
    batch_size: 100
    data_root: "./cluster_gcn/data"
    metric: "f1"
    mode: "max"

  RL:
    ngpu: 0
    metric: "rewards"
    max_t: 60
