# @package __global__

defaults:
  - speechgen/default
  - /model: lm/valle_lm_nar_small
  - override /dset: audio/speech_debug
  - _self_

autocast: true
autocast_dtype: float16

compression_model_checkpoint: //pretrained/facebook/encodec_24khz
init_from_valle: null
pre_post_mode: "pre"  # pre, post

solver: halle_short6_nar
channels: 1
sample_rate: 24000

prompt_cond:
  max_length: 3.0  # seconds
  min_ratio: 0.1
  max_ratio: 0.25

max_audio_duration_for_short: 28
use_long_model_emb:
  long_model_checkpoint_path: null
  long_model_dim: null
  fix: true

deadlock:
  use: true  # deadlock detection

dataset:
  batch_size: 64
  sample_on_weight: false  # Uniform sampling all the way
  sample_on_duration: false  # Uniform sampling all the way
  max_audio_duration_for_short: ${max_audio_duration_for_short}
  num_workers: 16
  train:
    num_samples: 100
  valid:
    num_samples: 100
  evaluate:
    num_samples: 100
  generate:
    num_samples: 32

generate:
  every: 10
  lm:
    prompted_samples: true
    unprompted_samples: false
    gen_gt_samples: false
    prompt_duration: ${prompt_cond.max_length}
    gen_duration: ${max_audio_duration_for_short}
    remove_prompts: false
    use_sampling: false
valid:
  every: 1
evaluate:
  every: 10

optim:
  epochs: 500

logging:
  time_profile: false
  log_wandb: true
wandb:
  project: podcast_SpeechGen_en
  name: debug
