#!/bin/bash
export PYTHONPATH=/apdcephfs_cq12/share_302080740/user/raytseng/research/Auden-0722-dev/Auden:$PYTHONPATH

# # ----------------finetune on audiocaps from a pretrained checkpoint------------------
pretrained_model_checkpoint=/apdcephfs_cq12/share_302080740/user/raytseng/research/Auden-refactor-online/Auden/egs/audio_clap/exp/CaptionStew_1M_CLAP_Au_Tu_1e-2_fp16_bsz5120_shuffled/checkpoint-400000.pt
CUDA_VISIBLE_DEVICES=5 torchrun --nproc_per_node=1 --master_port=29505 \
        train.py \
        exp_dir=exp/audiocaps_CaptionStew_1M_CLAP_400k_4_5e-2_bsz1280_xattn_only \
        data.train_data_config=configs/audiocaps/train_data_config_audiocaps.yaml \
        data.max_duration=1280 \
        data.valid_sets='[/apdcephfs_cq12/share_302080740/user/raytseng/research/Auden-refactor-online/Auden/egs/audio_captioning/manifests/audiocaps_test.jsonl.gz]' \
        data.use_infinite_dataset=true \
        trainer.lr_steps_per_epoch=500 \
        trainer.freeze_modules=['encoder_embed','encoder'] \
        trainer.initialization.checkpoint=$pretrained_model_checkpoint \
        trainer.valid_interval=250 \
        trainer.base_lr=0.0045 \
        ++model.config.text_decoder_type='facebook/bart-base' \
        ++model.config.text_tokenizer_type='facebook/bart-base' \
        trainer.save_every_n=2 \
        ++model.config.train_decoder_xattn_only=true \