# @package __global__

defaults:
  - /solver/default
  - /conditioner: text2speech
  - _self_
  - override /dset: audio/speech_debug

autocast: true
autocast_dtype: float16

solver: speechgen
sample_rate: ???
channels: ???
compression_model_checkpoint: ???

losses:
  text: 0.01
  audio: 0.99

cache:
  path:
  write: false
  write_shard: 0
  write_num_shards: 1

dataset:
  batch_size: 128
  num_workers: 10
  min_audio_duration: 2.0
  max_audio_duration: 20.0
  sample_on_weight_for_utter: false
  max_audio_duration_for_short: null
  segment_duration: null
  min_segment_ratio: null
  return_info: true
  buffer_length: 0.0
  train:
    num_samples: null  # If null, num_samples = len(dataset). If len(dataset) < batch_size * updates_per_epoch, you should set num_samples to a number that is a multiple of batch_size * updates_per_epoch.
  valid:
    num_samples: null
  evaluate:
    num_samples: null
  generate:
    num_samples: null

generate:
  every: 25
  num_workers: 5
  path: samples
  audio:
    format: wav
    strategy: loudness
    sample_rate: ${sample_rate}
    loudness_headroom_db: 14
  lm:
    prompted_samples: false
    unprompted_samples: true
    gen_gt_samples: false
    prompt_duration: 2.0  # we must set this
    gen_duration: 10.0
    remove_prompts: false
    # generation params
    use_sampling: true
    top_k: 50
    top_p: 0.85
    temp: 0.75
    repetition_penalty: 5.0
evaluate:
  every: 25
  num_workers: 5
  metrics: {}

checkpoint:
  save_last: true
  save_every: 5
  keep_last: 10
  keep_every_states: null

optim:
  epochs: 500
  updates_per_epoch: 2000
  lr: 1e-4  # same settings with paper
  optimizer: adamw
  max_norm: 1.0
  eager_sync: true
  adam:
    betas: [0.9, 0.95]
    weight_decay: 0.1
    eps: 1e-8
  ema:
    use: true
    updates: 10
    device: cuda

schedule:
  lr_scheduler: cosine
  cosine:
    warmup: 4000
    lr_min_ratio: 0.0
    cycle_length: 1.0
