# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import copy
import os
import os.path as osp
import time
import warnings
import yaml
import shutil
import wandb

import torch
import torch.distributed as dist
import torch.multiprocessing as mp

from utils.parallel import auto_select_device, get_dist_info, init_dist, setup_multi_processes
from apis import init_random_seed, set_random_seed, train_model
from utils import get_root_logger
from configs import config_from_file

def parse_args():
    parser = argparse.ArgumentParser(description='Train a model')
    parser.add_argument('--config', help='train config file path')
    parser.add_argument('--work-dir', help='the dir to save logs and models')
    parser.add_argument('--data-dir', help='the dir to the datasets, just for different servers.')
    parser.add_argument(
        '--resume-from', help='the checkpoint file to resume from')
    parser.add_argument(
        '--no-validate',
        action='store_true',
        help='whether not to evaluate the checkpoint during training')
    group_gpus = parser.add_mutually_exclusive_group()
    group_gpus.add_argument(
        '--gpu-ids',
        type=int,
        nargs='+',
        help='ids of gpus to use '
        '(only applicable to non-distributed training)')
    
    parser.add_argument('--seed', type=int, default=None, help='random seed')
    parser.add_argument('--wandb', action='store_true')
    parser.add_argument(
        '--diff-seed',
        action='store_true',
        help='Whether or not set different seeds for different ranks')
    parser.add_argument(
        '--deterministic',
        action='store_true',
        help='whether to set deterministic options for CUDNN backend.')
    parser.add_argument('--local_rank', type=int, default=0)

    args = parser.parse_args()
    if 'LOCAL_RANK' not in os.environ:
        os.environ['LOCAL_RANK'] = str(args.local_rank)

    return args

args = parse_args()

def main(rank=0):
    cfg = config_from_file(args.config)
    if args.wandb:
        os.system("wandb login --relogin eaced01a519033c0eca0dbff02e8006a9b01971f")
        wandb.init(project='CMD', name=(args.config).split('/')[-2], config=cfg)
    
    cfg.runner.wandb = args.wandb
    # set multi-process settings
    setup_multi_processes(cfg)

    # set cudnn_benchmark
    if cfg.get('cudnn_benchmark', False):
        torch.backends.cudnn.benchmark = True

    # work_dir is determined in this priority: CLI > segment in file > filename
    if args.work_dir is not None:
        # update configs according to CLI args if args.work_dir is not None
        cfg.work_dir = args.work_dir
    elif cfg.get('work_dir', None) is None:
        # use config filename as default work_dir if cfg.work_dir is None
        cfg.work_dir = './exprs/tmp'
    if args.data_dir is not None:
        def spec_root(root):
            return root.split('/datasets/')[-1]
        cfg.data.train.root = [os.path.join(args.data_dir, spec_root(root)) for root in cfg.data.train.root]
        cfg.data.val.root = [os.path.join(args.data_dir, spec_root(root)) for root in cfg.data.val.root]
        cfg.data.test.root = [os.path.join(args.data_dir, spec_root(root)) for root in cfg.data.test.root]
    if args.resume_from is not None:
        cfg.resume_from = args.resume_from
    # print(args.gpu_ids)
    cfg.gpu_ids = args.gpu_ids

    # init distributed env first, since logger depends on the dist info.
    if len(cfg.gpu_ids) < 2:
        distributed = False
    else:
        distributed = True
        # print(cfg.dist_params)
        dist_params = cfg.get('dist_params', {
            'init_method':'tcp://127.0.0.1:3447',
            'rank':rank,
            'world_size':len(cfg.gpu_ids)
            })
        init_dist('nccl', **dist_params)
        rank, world_size = get_dist_info()
        cfg.gpu_ids = range(world_size)
        

    # create work_dir
    os.makedirs(cfg.work_dir, exist_ok=True)
    # dump config
    # yaml.safe_dump(cfg, osp.join(cfg.work_dir, osp.basename(args.config)))
    # shutil.copyfile(args.config, osp.join(cfg.work_dir, osp.basename(args.config)))

    # init the logger before other steps
    timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    log_file = osp.join(cfg.work_dir, f'{timestamp}.log')
    logger = get_root_logger(log_file=log_file, log_level=cfg.log_level)

    # init the meta dict to record some important information such as
    # environment info and seed, which will be logged
    meta = dict()

    # log some basic info
    logger.info(f'Distributed training: {distributed}')
    logger.info(f'Config:\n{cfg}')

    # set random seeds
    if distributed:
        cfg.device = f'cuda:{cfg.gpu_ids[dist.get_rank()]}'
    else:
        cfg.device = f'cuda:{cfg.gpu_ids[0]}'
    seed = init_random_seed(args.seed, device=cfg.device)
    if distributed:
        seed = seed + dist.get_rank() if args.diff_seed else seed
    logger.info(f'Set random seed to {seed}, '
                f'deterministic: {args.deterministic}')
    set_random_seed(seed, deterministic=args.deterministic)
    cfg.seed = seed
    meta['seed'] = seed

    # add an attribute for visualization convenience
    train_model(
        cfg,
        distributed=distributed,
        validate=(not args.no_validate),
        device=cfg.device,
        meta=meta)


if __name__ == '__main__':
    if len(args.gpu_ids) > 1:
        mp.spawn(main, nprocs=len(args.gpu_ids))
    else:
        main()
