#!/bin/bash

. /opt/conda/bin/activate # activate conda env
# Ensure Python paths and debugging flags are set
AUDEN_PATH=/apdcephfs_cq10/share_1603164/user/jamelynli/workspace/Auden
LHOTSE_PATH=/apdcephfs_cq10/share_1603164/user/jamelynli/workspace/ASR/lhotse
cd $AUDEN_PATH/egs/asr_whisper
export PYTHONPATH=$LHOTSE_PATH:$AUDEN_PATH:$PYTHONPATH
export HYDRA_FULL_ERROR=1
export OMP_NUM_THREADS=8

MASTER_ADDR=$(hostname --ip-address)
MASTER_PORT=29507
ngpu=8
# Run `torchrun` using automatically detected MASTER_ADDR

# train lora
torchrun \
    --nnodes=1 --nproc_per_node=${ngpu} \
    --rdzv_id=100 \
    --rdzv_backend=c10d \
    --rdzv_endpoint="$MASTER_ADDR:$MASTER_PORT" \
    train.py \
    ++exp_dir=exp/lora_ko \
    trainer.valid_interval=500 \
    trainer.save_every_n=100000 \
    model.config_path=configs/model/medium.json \
    tokenizer.path=configs/tokenizer/base.json
