# @package __global__

defaults:
  - compression/default_hierarchical2
  - /model: encodec/speechtokenizer_large_hier6
  - override /dset: audio/speech_debug
  - _self_

solver: compression_hier6
init_autoencoder_from_base: true

semantic_model:
  dim: 768  # to load the weight

# loss balancing
losses:
  adv: 4.
  feat: 4.
  l1: 0.1
  mel: 0.
  msspec: 2.
  sisnr: 0.

losses_for_others:
  penalty: 1.
  l1_hidden_tf_0: 8.
  l1_hidden_tf_1: 6.
  l1_hidden_tf_2: 4.
  l1_hidden_tf_3: 2.
  l1_hidden_rec_0: 8.
  l1_hidden_rec_1: 6.
  l1_hidden_rec_2: 4.
  l1_hidden_rec_3: 2.

channels: 1
sample_rate: 24000
base_encodec_path: null

dataset:
  batch_size: 1536
  num_workers: 16
  train:
    num_samples: 100  # batch_size * updates_per_epoch
  valid:
    num_samples: 100
  evaluate:
    batch_size: 64
    num_samples: 100
  generate:
    batch_size: 64
    num_samples: 5
    segment_duration: 10

valid:
  every: 10
evaluate:
  every: 10
  num_workers: 8
generate:
  every: 10
  num_workers: 8

checkpoint:
  save_last: true
  save_every: 10
  keep_last: 10
  keep_every_states: null

optim:
  epochs: 200
  updates_per_epoch: 2000
  lr: 3e-4
  max_norm: 10.
  optimizer: adam
  adam:
    betas: [0.5, 0.9]
    weight_decay: 0.

schedule:
  lr_scheduler: exponential
  exponential:
    lr_decay: 0.9999875

logging:
  log_wandb: true
wandb:
  project: podcast_VQ_VAE
  name: initial_exp
