exp_dir:
hydra:
  run:
    dir: ${exp_dir}/logs/${now:%Y-%m-%d}/${now:%H-%M-%S}  # Force Hydra output (logs, etc.) into your experiment dir

model:
  name: "asr-llm"   # Required
  config:
    speech_encoder_type: "whisper"
    speech_encoder_path: "/apdcephfs_qy3/share_302401520/user/bingshenmu/workspace/ailab_speech_llm/models/yuekai_whisper/whisper-large-v2-multi-hans-zh-epoch-3-avg-10.pt"
    llm_path: "/apdcephfs_qy3/share_302401520/user/bingshenmu/pretrained_models/Qwen2.5-7B-Instruct"
    use_flash_attn: True
    stage: 1
    pretrained_stage1_model_path:
    lora_rank: 64
    lora_alpha: 16
    lora_dropout: 0.05
    target_modules: ["q_proj", "k_proj", "v_proj", "o_proj", "up_proj", "gate_proj", "down_proj"]
    task_type: "CAUSAL_LM"
    encoder_projector_ds_rate: 8
    subsampling_factor: 2 

trainer:
  optimizer: "adam"
  base_lr: 0.0001
  scheduler: "eden"
  lr_epochs: 3.5 # Number of epochs that affects how rapidly the learning rate decreases.
  lr_batches: 7500 # Number of steps that affects how rapidly the learning rate decreases. We suggest not to change this.
  warmup_batches: 500 # lr warmup steps
  lr_steps_per_epoch: 0 # recommend to adjust this value when you do use_infinite_dataset=True to get exact learning rate schedule as usual. Set it close to your estimated number of steps per epoch
  num_epochs: 30
  start_epoch: 1
  start_batch: 0
  ref_duration: 80 # Reference batch duration for purposes of adjusting batch counts for setting various schedules inside the model
  keep_last_k: 30 # save last_k checkpoints on disk
  use_averaged_model: True
  log_interval: 50
  average_period: 200 # how rapidly the averaged_model is averaged and saved
  reset_interval: 200 # moving average interval for info tracker
  valid_interval: 1000
  save_every_n: 4 # save checkpoint every (n * valid_interval) steps
  use_fp16: True
  initialization:
    checkpoint: ~ # initialized from other pretrained checkpoint (path to a .pt file). We will only use the init_modules
    init_modules: # these 2 are for audio encoders
  tensorboard: True
  freeze_modules: ~ # name of modules to be frozen during training

data:
  train_data_config: configs/train_data_config.yaml
  valid_data_config: configs/valid_data_config.yaml
  enable_spec_aug: True
  input_strategy: ~
  enable_musan: False
  musan: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/musan/musan_cuts.jsonl.gz
  enable_speed_perturb: False
  bucketing_sampler: True
  num_buckets: 30
  max_duration: 80 # total secs of speech within a minibatch
  on_the_fly_feats: True
  text_normalization: True
  shuffle: True
  drop_last: True
  num_workers: 8
  use_infinite_dataset: True # the iterator of each dataset will never be exhausted so there will only be steps, no epochs
  whisper_fbank: False