# @package __global__

defaults:
  - speechgen/default
  - /model: lm/valle_lm_nar_small
  - override /dset: audio/speech_podcast_bpe_3-180_merged
  - _self_

autocast: true
autocast_dtype: float16

compression_model_checkpoint: //pretrained/facebook/encodec_24khz

solver: valle_nar
channels: 1
sample_rate: 24000

prompt_cond:
  max_length: 3.0  # seconds
  min_ratio: 0.1
  max_ratio: 0.25

max_audio_duration_for_short: 54

deadlock:
  use: true  # deadlock detectio

dataset:
  batch_size: 64  # A100 4 GPUs
  sample_on_weight: false  # Uniform sampling all the way
  sample_on_duration: false  # Uniform sampling all the way
  max_audio_duration_for_short: ${max_audio_duration_for_short}
  num_workers: 16
  train:
    num_samples: 4096000  # > batch_size * updates_per_epoch
  valid:
    num_samples: 3200
  evaluate:
    num_samples: 3200
  generate:
    num_samples: 32

valid:
  every: 1
generate:
  every: 10
  lm:
    prompted_samples: true
    unprompted_samples: false
    gen_gt_samples: false
    prompt_duration: ${prompt_cond.max_length}
    gen_duration: 180.0
    remove_prompts: false
    use_sampling: false
evaluate:
  every: 10

optim:
  epochs: 500

logging:
  time_profile: false
  log_wandb: true
wandb:
  project: podcast_SpeechGen_en
  name: initial_exp
