exp_dir:
hydra:
  run:
    dir: ${exp_dir}/logs/${now:%Y-%m-%d}/${now:%H-%M-%S}  # Force Hydra output (logs, etc.) into your experiment dir

tokenizer:
  type: asr-spm-bbpe
  source: data/lang_bbpe_2k

model:
  name: "zipformer-asr"   # Required
  config_path: null         # Optional path to existing config.json to load
  config_preset: null        # Optional preset (e.g., 'base', 'large') to use if not loading
  config:        # Values to override in the config
    tokenizer_type: ${tokenizer.type}

trainer:
  optimizer: "scaled_adam"
  base_lr: 0.045
  scheduler: "eden"
  lr_epochs: 3.5 # Number of epochs that affects how rapidly the learning rate decreases.
  lr_batches: 7500 # Number of steps that affects how rapidly the learning rate decreases. We suggest not to change this.
  warmup_batches: 500 # lr warmup steps
  lr_steps_per_epoch: 0 # recommend to adjust this value when you do use_infinite_dataset=True to get exact learning rate schedule as usual. Set it close to your estimated number of steps per epoch
  num_epochs: 30
  start_epoch: 1
  start_batch: 0
  ref_duration: 600 # Reference batch duration for purposes of adjusting batch counts for setting various schedules inside the model
  keep_last_k: 30 # save last_k checkpoints on disk
  use_averaged_model: True
  log_interval: 50
  average_period: 200 # how rapidly the averaged_model is averaged and saved
  reset_interval: 200 # moving average interval for info tracker
  valid_interval: 1000
  save_every_n: 4 # save checkpoint every (n * valid_interval) steps
  use_fp16: True
  initialization:
    checkpoint: ~ # initialized from other pretrained checkpoint (path to a .pt file). We will only use the init_modules
    init_modules: ['encoder_embed', 'encoder'] # these 2 are for audio encoders
  tensorboard: True
  freeze_modules: ~ # name of modules to be frozen during training

  prune_range: 5 # The prune range for rnnt loss, it means how many symbols(context) we are using to compute the loss
  lm_scale: 0.25 # The scale to smooth the loss with lm(output of prediction network) part
  am_scale: 0 #The scale to smooth the loss with am (output of encoder network) part
  simple_loss_scale: 0.5
  rnnt_warm_step: 2000

data:
  train_data_config: configs/train_data_config.yaml
  augmentation_data_config: configs/augmentation_data_config.yaml
  enable_data_mixing: false
  valid_sets: [data/test/aishell/cuts_fbank.jsonl.gz, data/test/librispeech/test-clean/cuts_fbank.jsonl.gz]
  valid_data_path_prefix: true
  enable_spec_aug: True
  input_strategy: ~ # PrecomputedFeatures
  enable_musan: False
  musan: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/musan/musan_cuts.jsonl.gz
  enable_speed_perturb: False
  bucketing_sampler: True
  num_buckets: 30
  max_duration: 600 # total secs of speech within a minibatch
  on_the_fly_feats: True
  text_normalization: True
  shuffle: True
  drop_last: True
  num_workers: 8
  use_infinite_dataset: True # the iterator of each dataset will never be exhausted so there will only be steps, no epochs