#!/bin/bash
export CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
# export CUDA_VISIBLE_DEVICES="3"

# The num of nodes
num_nodes=1
# The rank of current node
node_rank=0

xx_path=/home/disk2/nips/Result/controlspeech/train

dir=config/ddp

echo "start training"

rm -rf $xx_path/model_styletrolnet
rm -rf $xx_path/tensorborad_styletrolnet
rm -rf $xx_path/train_log_styletrolnet

rm -rf $dir
mkdir -p $dir
# INIT_FILE is for DDP synchronization
INIT_FILE=$dir/ddp_init
init_method=file://$(readlink -f $INIT_FILE)
echo "$0: init method is $init_method"
num_gpus=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
# Use "nccl" if it works, otherwise use 'gloo'
dist_backend='nccl'
world_size=`expr $num_gpus \* $num_nodes`
echo "total gpus is: $world_size"
# train.py will write $train_config to $dir/train.yaml with model input
# and output dimension, train.yaml will be used for inference or model 
# export later.

echo "最后使用的gpu数量是 $num_gpus"

for ((i=0; i<$num_gpus; ++i)); do
{
    gpu_id=$(echo $CUDA_VISIBLE_DEVICES | cut -d',' -f$[$i+1])
    # Rank of each gpu/process used for knowing whether it is the
    # master of a worker.
    rank=`expr $node_rank \* $num_gpus + $i`
    python nn_ss/trainer/soundstorm/train_controlspeech.py --gpu $gpu_id \
        --ddp.init_method $init_method \
        --ddp.world_size $world_size \
        --ddp.rank $rank \
        --ddp.dist_backend $dist_backend \
        --num_workers 1 \
        --logdir $xx_path/train_log_styletrolnet \
        --config /home/disk1/nips/speech/code/controlspeech/config/controlspeech_bert_mdn3.yaml \
        --pin_memory
} &
done
wait
