#!/bin/bash
export PYTHONPATH=/apdcephfs_cq12/share_302080740/user/raytseng/research/Auden-refactor-online/Auden:/apdcephfs_cq12/share_302080740/user/raytseng/research/lhotse:$PYTHONPATH

pretrained_model_checkpoint=/apdcephfs_cq12/share_302080740/user/raytseng/research/Auden-refactor-online/Auden/egs/audio_captioning/exp/CaptionStew_4M_masked_captioning_Au_Tu_5e-3_fp16_bsz5120_shuffled/checkpoint-400000.pt
CUDA_VISIBLE_DEVICES=4 torchrun --nproc_per_node=1 \
        --master_port=29504 \
        train.py \
        exp_dir=/apdcephfs_cq10_1603164/share_1603164/user/raytseng/research/exp/clap_finetuning/paraspeechcaps_exp/paraspeechcaps_CaptionStew_4M_captioningPa_5e-3_bsz1280_MHAP \
        ++model.config.pooling=mhap \
        ++model.config.text_encoder_type="roberta-base" \
        data.train_data_config=configs/paraspeechcaps/train_data_config_paraspeechcaps.yaml \
        data.max_duration=1280 \
        data.valid_sets='[/apdcephfs_cq12/share_302080740/user/raytseng/data/ParaSpeechCaps/dev_500.jsonl.gz]' \
        data.use_infinite_dataset=true \
        data.num_workers=12 \
        trainer.lr_steps_per_epoch=500 \
        trainer.base_lr=0.005 \
        trainer.valid_interval=500 \
        trainer.save_every_n=1 \
        trainer.freeze_modules=['encoder_embed','encoder'] \
        trainer.initialization.checkpoint=$pretrained_model_checkpoint

# CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python3 /apdcephfs_cq12/share_302080740/user/raytseng/run_gpu.py --size 40000 --gpus 8 --interval 0.01
