# @package _group_

common:
  fp16: true
  log_format: json
  log_interval: 200
  seed: 1337
  tensorboard_logdir: ???
  user_dir: examples/wescon 

checkpoint:
  save_dir: ???
  save_interval_updates: 10000
  keep_interval_updates: 1
  keep_best_checkpoints: 10
  no_epoch_checkpoints: true
 

distributed_training:
  ddp_backend: no_c10d
  distributed_world_size: 1

task:
  _name: cosyvoice2_tp
  data: ./datas/1st_stage_alignment/for_train/aishell_ls100_normed.json
  max_sample_size: 2250
  min_sample_size: 30
  max_valid_sample_size: 1000
  pretrained_home: ./pretrained_models/CosyVoice2-0.5B/
  data_type: tsv

dataset:
  train_subset: aishell_ls100_normed
  valid_subset: dev
  num_workers: 4
  max_tokens: 2250
  max_tokens_valid: 1000
  validate_interval: 1
  validate_interval_updates: 20000

criterion:
  _name: cosyvoice2_tp
     
optimization:
  max_update: 400000
  lr: [0.00025]
  clip_norm: 1.0

optimizer:
  _name: adam
  adam_betas: (0.9,0.99)
  adam_eps: 1e-06

lr_scheduler:
  _name: polynomial_decay
  warmup_updates: 40000

model:
  _name: cosyvoice2_tp
  text_encoder_input_size: 512
  llm_input_size: 896
  llm_output_size: 896
  text_token_size: 51866
  speech_token_size: 6561
  length_normalized_loss: True
  lsm_weight: 0
  spk_embed_dim: 192
  qwen_pretrained_path: ./pretrained_models/CosyVoice2-0.5B/CosyVoice-BlankEN
  aligner_layer: 5