#!/bin/bash
export PYTHONPATH=/apdcephfs_cq12/share_302080740/user/raytseng/research/Auden-refactor-online/Auden:$PYTHONPATH

# ---------------CLAP pretrained step 252k, freeze------------------
pretrained_model_checkpoint=/apdcephfs_cq12/share_302080740/user/raytseng/research/Auden-refactor-online/Auden/egs/audio_clap/exp/CaptionStew_400k_CLAP_AU_TU_5e-3_fp16_bsz5120_shuffled/checkpoint-25000.pt
CUDA_VISIBLE_DEVICES=0 torchrun --nproc_per_node=1 \
         --master_port=29500 \
        train.py \
        exp_dir=/apdcephfs_cq12/share_302080740/user/raytseng/research/exp/audio_tagging/fsd50k_CaptionStew_400k_CLAP_TUAU_25k_4_5e-3_bs1280_MeanPooling \
        model.id2label_json=configs/fsd50k/id2label_fsd50k.json \
        trainer.valid_interval=225 \
        data.train_data_config=configs/fsd50k/train_data_config_fsd50k.yaml \
        data.valid_sets='[/apdcephfs_cq12/share_302080740/data/audio_test_data/fsd50k/fsd50k_test.jsonl.gz]' \
        data.max_duration=1280 \
        trainer.use_fp16=false \
        data.use_infinite_dataset=true \
        trainer.base_lr=0.0045 \
        trainer.initialization.checkpoint=$pretrained_model_checkpoint \
        trainer.freeze_modules='[encoder_embed, encoder]' \
        model.config.is_multilabel=true \
        trainer.lr_steps_per_epoch=225 \

# # ---------------Cap pretrained step 100k, freeze------------------
# pretrained_model_checkpoint=/apdcephfs_cq12/share_302080740/user/raytseng/research/Auden-refactor-online/Auden/egs/audio_captioning/exp/wavcaps_captioning_Au_Tu_5e-3_fp16_bsz6400/checkpoint-100000.pt
# torchrun --nproc_per_node=1 \
#          --master_port=29503 \
#         train.py \
#         ++exp_dir=urbansound8k_captioning_100k_4.5e-3_bsz80 \
#         trainer.valid_interval=100 \
#         data.train_data_config=configs/urbansound8k/train_data_config_urbansound8k.yaml \
#         data.valid_sets='[/apdcephfs_cq12/share_302080740/data/audio_train_data/manifests/urbansound/urbansound_9.jsonl.gz]' \
#         data.max_duration=320 \
#         trainer.use_fp16=false \
#         data.use_infinite_dataset=true \
#         trainer.base_lr=0.0045 \
#         trainer.initial_batch_count=100000 \
#         ++trainer.checkpoint.pretrained_model=$pretrained_model_checkpoint \
#         ++trainer.checkpoint.init_modules='[encoder_embed, encoder]' \

# # ---------------CapPa pretrained step 100k, freeze------------------
# pretrained_model_checkpoint=/apdcephfs_cq12/share_302080740/user/raytseng/research/Auden-refactor-online/Auden/egs/audio_captioning/exp/wavcaps_masked_captioning_Au_Tu_5e-3_fp16_bsz6400/checkpoint-100000.pt
# torchrun --nproc_per_node=1 \
#          --master_port=29503 \
#         train.py \
#         ++exp_dir=urbansound8k_captioningPa_100k_4.5e-3_bsz80 \
#         trainer.valid_interval=100 \
#         data.train_data_config=configs/urbansound8k/train_data_config_urbansound8k.yaml \
#         data.valid_sets='[/apdcephfs_cq12/share_302080740/data/audio_train_data/manifests/urbansound/urbansound_9.jsonl.gz]' \
#         data.max_duration=320 \
#         trainer.use_fp16=false \
#         data.use_infinite_dataset=true \
#         trainer.base_lr=0.0045 \
#         trainer.initial_batch_count=100000 \
#         ++trainer.checkpoint.pretrained_model=$pretrained_model_checkpoint \
#         ++trainer.checkpoint.init_modules='[encoder_embed, encoder]' \