quantizer:
  init_args:
    codebook_size: 320
    channels: 128 # per codebooks
wav2vec2: # Encoder and Decoder
  init_args:
    dual_codebook: true
    embed_dim: 768
    dropout_input: 0.1
    pos_conv_kernel: 128
    pos_conv_groups: 16
    num_layers: 12
    num_heads: 8
    attention_dropout: 0.1
    ff_interm_features: 4096
    ff_interm_dropout: 0.1
    dropout: 0.1
    layer_drop: 0.05
contrast_loss:
  init_args:
    num_distractors: 100
    temperature: 0.1
optimizer:
  lr: 5.0e-4
  weight_decay: 0.0
  betas: [0.9, 0.99]
scheduler:
  # initial_lr: <- Refer to the lr of optimizer
  warmup_iters: 10000
  decay_iters: 115000
  min_lr_constant: 100
batch_size: 64
num_epochs: 20
max_length: 250000