# @package __global__

defaults:
  - speechgen/default
  - /model: lm/halle_short4_lm_small_nar
  - override /dset: audio/speech_debug
  - _self_

autocast: true
autocast_dtype: float16

solver: halle_short5_nar
compression_model_checkpoint: //pretrained/facebook/encodec_24khz
init_from_valle: null

prompt_cond:
  max_length: 3.0  # seconds
  min_ratio: 0.1
  max_ratio: 0.25

max_audio_duration_for_short: 24
use_long_model_emb:
  long_model_checkpoint_path: null
  long_model_dim: null
  fix: true

channels: 1
sample_rate: 24000

deadlock:
  use: true  # deadlock detection

dataset:
  batch_size: 8
  sample_on_weight: false  # Uniform sampling all the way
  sample_on_duration: false  # Uniform sampling all the way
  max_audio_duration_for_short: ${max_audio_duration_for_short}
  num_workers: 1
  train:
    num_samples: 2160
  valid:
    num_samples: 32
  evaluate:
    num_samples: 8
  generate:
    num_samples: 8

generate:
  every: 1
  num_workers: 1
  lm:
    prompted_samples: true
    unprompted_samples: false
    gen_gt_samples: false
    prompt_duration: ${prompt_cond.max_length}
    gen_duration: 22.0
    remove_prompts: false
    use_sampling: false
evaluate:
  num_workers: 1
  every: 1

optim:
  epochs: 2
  updates_per_epoch: 5

logging:
  level: DEBUG
  time_profile: false
  log_wandb: true
wandb:
  project: podcast_SpeechGen_en
  name: debug
