#!/usr/bin/env bash
NPROC_PER_NODE=$1
NNODE=$2
NODE_RANK=$3
MASTER_ADDR=$4
MASTER_PORT=$5
SINGLE_NODE=true

ulimit -n 4096
if [ "$SINGLE_NODE" = true ]; then
    MASTER_ADDR=$2
    MASTER_PORT=$3
    CUDA_VISIBLE_DEVICES=0,1,2,3 python -m torch.distributed.launch \
    --nproc_per_node=$NPROC_PER_NODE \
    --master_addr=$MASTER_ADDR \
    --master_port=$MASTER_PORT \
    ./train_imagenet_nv.py \
    --model=resnet50 \
    --dataset=imagenet \
    --data=~/dataset/cv/imagenet/ \
    --optimizer=SGD \
    --update-freq=100 \
    --stat-decay-param=0.9 \
    --stat-decay-grad=0.9 \
    --history-size=20 \
    --lbfgs-damping=0.2 \
    --grad-clip=0.05 \
    --max-epoch=100 \
    --lr=0.1 \
    --decay-period=10 \
    --wd=0.0005 \
    --momentum=0.9 \
    --workers=4 \
    --logdir=log/imagenet/SGD/lr0.1-10-0.5_wd5e-4_m0.9 \
    --init-bn0 \
    --fp16 \
    --distributed \
    --phases "[{'ep': 0, 'sz': 224, 'bs': 128},
    {'ep': (0, 10), 'lr': (0.05, 0.025)},
    {'ep': (10, 40), 'lr': (0.025, 0.00625)},
    {'ep': (40, 100), 'lr': (0.00625, 0.0015)}]" \
    --skip-auto-shutdown
else
    CUDA_VISIBLE_DEVICES=4,5,6,7 python -m torch.distributed.launch \
    --nproc_per_node=$NPROC_PER_NODE --nnodes=$NNODE --node_rank=$NODE_RANK \
    --master_addr $MASTER_ADDR \
    --master_port $MASTER_PORT \
    ./train_imagenet_nv.py \
    --data /home/chaoyanghe/dataset/cv/imagenet \
    --workers=4 \
    --logdir ./log/SGD/lr_0.1 \
    --fp16 \
    --distributed \
    --init-bn0 \
    --no-bn-wd \
    --phases "[{'ep': 0, 'sz': 224, 'bs': 64},
    {'ep': (0, 10), 'lr': (0.05, 0.05)},
    {'ep': (10, 40), 'lr': (0.025, 0.025)}]" \
    --skip-auto-shutdown
fi

#
#ulimit -n 4096
#python -m torch.distributed.launch \
#--nproc_per_node=4 --nnodes=1 --node_rank=0 \
#training/train_imagenet_nv.py /home/ubuntu/data/imagenet \
#--workers=4 --fp16 --logdir ./ncluster/runs/lambda-cloud-1-instance --distributed --init-bn0 --no-bn-wd \
#--phases "[{'ep': 0, 'sz': 128, 'bs': 256, 'trndir': '-sz/160'}, {'ep': (0, 8), 'lr': (0.5, 1.0)}, {'ep': (8, 15), 'lr': (1.0, 0.125)}, {'ep': 15, 'sz': 224, 'bs': 112, 'trndir': '-sz/320', 'min_scale': 0.087}, {'ep': (15, 25), 'lr': (0.22, 0.022)}, {'ep': (25, 28), 'lr': (0.022, 0.0022)}, {'ep': 28, 'sz': 288, 'bs': 64, 'min_scale': 0.5, 'rect_val': True}, {'ep': (28, 29), 'lr': (0.00125, 0.000125)}]" --skip-auto-shutdown

# kill all processes using GPUs
# for i in $(sudo lsof /dev/nvidia0 | grep python  | awk '{print $2}' | sort -u); do kill -9 $i; done


