#!/bin/bash

audio_model_type=large_v3_turbo-qwen2_7b_instruct

cur_time=$(date +%Y-%m-%d_%H-%M-%S)
output_dir="outputs/train/alignchat/pretrain/${audio_model_type}/${cur_time}"
logging_dir="${output_dir}/logs"

mkdir -p ${output_dir}
mkdir -p ${logging_dir}

cp $0 ${output_dir}

CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 accelerate launch --config-file configs/deepspeed_configs/zero1_gpu8.yaml \
    train/pretrain_alignchat.py \
    --audio_model_path model_weights/alignchat/"$audio_model_type" \
    --audio_model_type "$audio_model_type" \
    --dataset_paths \
        datasets/local/libritts_r_filtered \
        datasets/local/mls_eng_10k \
        datasets/local/VoiceAssistant-400K \
        datasets/local/mmsu_openbookqa/train \
        datasets/local/DeepDialogue-orpheus/train \
        datasets/local/DeepDialogue-xtts/train \
        datasets/local/EuroSpeech/uk/train \
        datasets/local/fleurs/en_us/train \
        datasets/local/common_voice_21_0/train \
    --dataset_ratios 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 3.0 \
    --dataset_split train \
    --output_dir "${output_dir}" \
    --logging_dir "${logging_dir}" \
    --warmup_ratio 0.05 \
    --lr_scheduler_type cosine_with_min_lr \
    --learning_rate 1e-4 \
    --min_learning_rate 1e-6 \
    --loss_ratios 1.0 1.0 5.0 \
    --loss_types lm l1 cos_l2 \
    --freeze_encoder True --freeze_decoder False \
    --do_train \
    --per_device_train_batch_size 32 \
    --gradient_accumulation_steps 8 \
    --num_train_epochs 2 \
    --save_strategy steps \
    --save_only_model True \
    --save_steps 3000 \
    --save_total_limit 20 \
    --logging_steps 1 \
    --logging_strategy steps \
    --seed 42 \
    --fp16 \
    --weight_decay 1e-3 \
    --dataloader_num_workers 8 \
    --remove_unused_columns False \
    --ddp_find_unused_parameters False
