transformer:
  encoder_layer: 4
  encoder_head: 2
  encoder_hidden: 128
  decoder_layer: 12
  decoder_head: 2
  decoder_hidden: 128
  conv_filter_size: 1024
  conv_kernel_size: [9, 1]
  encoder_dropout: 0.1
  decoder_dropout: 0.1
  attention: "Standard"

Nystrom:
   num_landmarks: 256
   pinv_iterations: 6

variance_predictor:
  filter_size: 256
  kernel_size: 3
  dropout: 0.5
  noise_scale: 0.8

variance_embedding:
  pitch_quantization: "linear" # support 'linear' or 'log', 'log' is allowed only if the pitch values are not normalized during preprocessing
  energy_quantization: "linear" # support 'linear' or 'log', 'log' is allowed only if the energy values are not normalized during preprocessing
  n_bins: 256

SDP:
  filter_channels: 100
  kernel_size: 3
  dropout: 0.1
  n_flows: 4
  gin_channels: 0 
  density_sample: 1

SPP:
  filter_channels: 120
  kernel_size: 3 
  dropout: 0.05
  n_flows: 5
  gin_channels: 0
  density_sample: 1
  
DSF:
  num_can: 6
  dropout: 0.1
  duration_threshold: -10.5
  pitch_threshold: -2.66
  kappa: 0.2
  sweight_dur: 10.0 
  sweight_pitch: 5.0

multi_speaker: False

max_seq_len: 1000

vocoder:
  model: "HiFi-GAN" # support 'HiFi-GAN', 'MelGAN'
  speaker: "LJSpeech" # support  'LJSpeech', 'universal'
