#!/usr/bin/env bash
NPROC_PER_NODE=$1
NNODE=$2
NODE_RANK=$3
MASTER_ADDR=$4
MASTER_PORT=$5
SINGLE_NODE=true

# single node: ResNet on ImageNet
MASTER_ADDR=$2
MASTER_PORT=$3
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python -m torch.distributed.launch \
--nproc_per_node=$NPROC_PER_NODE \
--master_addr=$MASTER_ADDR \
--master_port=$MASTER_PORT \
./train_imagenet_nv.py \
--model=resnet50 \
--dataset=imagenet \
--data=~/dataset/ \
--optimizer=KFAC \
--kfac-update-freq=50 \
--max-epoch=100 \
--lr=0.1 \
--decay-period=10 \
--wd=0.0001 \
--momentum=0.9 \
--workers=8 \
--logdir=log/imagenet/lr_stepdecay/KFAC/lr0.1_b512_wd5e-4_m0.9_H_50\
--init-bn0 \
--distributed \
--phases "[{'ep': 0, 'sz': 224, 'bs': 64},
{'ep': (0, 10), 'lr': (0.05, 0.025)},
{'ep': (10, 40), 'lr': (0.025, 0.00625)},
{'ep': (40, 100), 'lr': (0.00625, 0.0015)}]" \
--skip-auto-shutdown


#
#ulimit -n 4096
#python -m torch.distributed.launch \
#--nproc_per_node=4 --nnodes=1 --node_rank=0 \
#training/train_imagenet_nv.py /home/ubuntu/data/imagenet \
#--workers=4 --fp16 --logdir ./ncluster/runs/lambda-cloud-1-instance --distributed --init-bn0 --no-bn-wd \
#--phases "[{'ep': 0, 'sz': 128, 'bs': 256, 'trndir': '-sz/160'}, {'ep': (0, 8), 'lr': (0.5, 1.0)}, {'ep': (8, 15), 'lr': (1.0, 0.125)}, {'ep': 15, 'sz': 224, 'bs': 112, 'trndir': '-sz/320', 'min_scale': 0.087}, {'ep': (15, 25), 'lr': (0.22, 0.022)}, {'ep': (25, 28), 'lr': (0.022, 0.0022)}, {'ep': 28, 'sz': 288, 'bs': 64, 'min_scale': 0.5, 'rect_val': True}, {'ep': (28, 29), 'lr': (0.00125, 0.000125)}]" --skip-auto-shutdown

# kill all processes using GPUs
# for i in $(sudo lsof /dev/nvidia0 | grep python  | awk '{print $2}' | sort -u); do kill -9 $i; done


