prenet:
  conv_kernel_size: 3 
  dropout: 0.1 

encoder:
  encoder_layer: 6
  encoder_head: 2
  encoder_hidden: 192
  dropout: 0.1 
  conv_filter_size: 768
  conv_kernel_size: 3

variance_predictor:
  filter_size: 192
  kernel_size: 3
  dropout: 0.1
  noise_scale: 0.6

variance_embedding:
  pitch_quantization: "linear" # support 'linear' or 'log', 'log' is allowed only if the pitch values are not normalized during preprocessing
  energy_quantization: "linear" # support 'linear' or 'log', 'log' is allowed only if the energy values are not normalized during preprocessing
  n_bins: 256

decoder:
  decoder_layer: 6
  decoder_hidden: 192
  decoder_head: 2
  dropout: 0.1
  conv_filter_size: 768
  conv_kernel_size: [9,1]

SDP:
  filter_channels: 192
  kernel_size: 3
  dropout: 0.1
  n_flows: 4
  gin_channels: 0 
  density_sample: 1

SPP:
  filter_channels: 192 
  kernel_size: 3 
  dropout: 0.1
  n_flows: 4
  gin_channels: 0
  density_sample: 1
  
DPP:
  num_can: 6
  dropout: 0.1
  duration_threshold: -2.65   # q_dr = -2.65
  pitch_threshold: 0.3       # q_pi = 0.14
  kappa: 2.5
  sweight_dur: 3.0
  sweight_pitch: 1.0

multi_speaker: False

max_seq_len: 1000

vocoder:
  model: "HiFi-GAN" # support 'HiFi-GAN', 'MelGAN'
  speaker: "LJSpeech" # support  'LJSpeech', 'universal'
