export CUDA_VISIBLE_DEVICES="7"
python ../decode.py \
  data.max_duration=80 \
  exp_dir=exp/test1 \
  mode.speech_encoder_type=whisper \
  model.speech_encoder_path=pretrained_models/whisper-medium/medium.pt \
  model.llm_path=pretrained_models/Qwen2.5-7B-Instruct \
  checkpoint.epoch=7 \
  checkpoint.avg=1 \
  model.use_flash_attn=True \
  model.stage=1