#!/bin/bash
source scripts/path.sh

export MASTER_ADDR=9.206.39.16
export MASTER_PORT=62345

ps aux | grep python | awk '{print $2}' | xargs kill -9

torchrun --nnodes=4 --nproc_per_node=8 \
        --rdzv_id=100 \
        --rdzv_backend=c10d \
        --rdzv_endpoint="$MASTER_ADDR:$MASTER_PORT" \
    train.py \
    exp_dir=exp/tta_260k_base \
    model.config.use_s2t_alignment=true \
    data.train_data_config=configs/asr/train_data_config.yaml \
    data.valid_data_config=configs/asr/valid_data_config.yaml \
    data.max_duration=250 \
    trainer.use_fp16=true \
    data.use_infinite_dataset=true \
    trainer.initialization.checkpoint=/apdcephfs_cq12/share_302080740/user/louislauliu/code/Auden/egs/masr/exp/rnnt_10lang/averaged_ckpt260000_avg5.pt \
    trainer.initialization.strict=False \
    trainer.valid_interval=2000 \
    trainer.save_every_n=2 \
    trainer.base_lr=0.005

ps aux | grep python | awk '{print $2}' | xargs kill -9
sleep 10
python ~/workspace/keep.py --gpus $(seq 0 7)
