# @package __global__

defaults:
  - compression/default
  - /model: encodec/speechtokenizer_nq8_s500
  - override /dset: audio/speech_debug
  - _self_

solver: speechtokenizer
channels: 1
sample_rate: 24000

semantic_model:
  path: facebook/hubert-base-ls960
  dim: 768
  frame_rate: 50
  sample_rate: 16000
  target_layer: 8

losses:
  adv: 4.
  feat: 4.
  l1: 0.1
  mel: 0.
  msspec: 2.
  sisnr: 0.
losses_other:
  penalty: 1.
  semantic_distill: 10.

adversarial:
  adversaries: [msstftd, msd, mpd]

# losses hyperparameters
semantic_distill: {}

dataset:
  batch_size: 1536
  num_workers: 16
  train:
    num_samples: 100  # batch_size * updates_per_epoch
  valid:
    num_samples: 100
  evaluate:
    batch_size: 64
    num_samples: 100
  generate:
    batch_size: 64
    num_samples: 5
    segment_duration: 10

evaluate:
  every: 20
  num_workers: 8

generate:
  every: 20
  num_workers: 8

optim:
  epochs: 200
  updates_per_epoch: 2000
  lr: 9e-4
  max_norm: 10.
  optimizer: adam
  adam:
    betas: [0.5, 0.9]
    weight_decay: 0.

schedule:
  lr_scheduler: exponential
  exponential:
    lr_decay: 0.9999875

logging:
  log_wandb: true
wandb:
  project: podcast_Encodec
  name: initial_exp
