base_config: configs/tts/speech_transducer.yaml
data_dir: 'data/ljspeech'
raw_data_dir: 'data/raw/LJSpeech-1.1'

# training
dropout: 0.2
max_tokens: 3200
val_check_interval: 1000
accumulate_grad_batches: 6

# lambda
mel_loss: l1
lambda_mel: 1.0
lambda_diag_cumsum: 1.0
lambda_diag: 1.0

# encoder
tau: 10
text_enc_layers: 6
speech_enc_layers: 6
arch: '8 8 8 8 8 8 13 13 13 13 13 13'
hidden_size: 256
prenet_hidden_size: 256

# joint network
tts_joint_network_params:
  hidden_size: 256
  activation_type: 'ReLU' # ['Tanh', 'ReLU', 'LeakyReLU', null]
  n_layers: 3
  dropout: 0.1
