# @package __global__

defaults:
  - speechgen/default
  - /model: lm/valle_lm_nar_small
  - override /dset: audio/speech_debug
  - _self_

autocast: true
autocast_dtype: float16

compression_model_checkpoint: //pretrained/facebook/encodec_24khz

solver: valle_nar
channels: 1
sample_rate: 24000

prompt_cond:
  max_length: 3.0  # seconds
  min_ratio: 0.1
  max_ratio: 0.25

deadlock:
  use: true  # deadlock detection

dataset:
  batch_size: 4
  sample_on_weight: false  # Uniform sampling all the way
  sample_on_duration: false  # Uniform sampling all the way
  num_workers: 4
  train:
    num_samples: 2160
  valid:
    num_samples: 32
  evaluate:
    num_samples: 12
  generate:
    num_samples: 4

generate:
  every: 1
  lm:
    prompted_samples: true
    unprompted_samples: false
    gen_gt_samples: false
    prompt_duration: ${prompt_cond.max_length}
    gen_duration: 10.0
    remove_prompts: false
    use_sampling: false
evaluate:
  every: 1

optim:
  epochs: 2
  updates_per_epoch: 20

logging:
  level: DEBUG
  time_profile: false
  log_wandb: true
wandb:
  project: podcast_SpeechGen_en
  name: debug
