"""
Train a diffusion model on images.
"""

import argparse

from ddbm import dist_util, logger
from datasets import load_data
from ddbm.resample import create_named_schedule_sampler
from ddbm.script_util import (
    model_and_diffusion_defaults,
    create_model_and_diffusion,
    sample_defaults,
    args_to_dict,
    add_dict_to_argparser,
    get_workdir,
)
from ddbm.train_util import TrainLoop

import torch.distributed as dist

from pathlib import Path

import wandb

from glob import glob
import os
from datasets.augment import AugmentPipe


def main(args):

    workdir = get_workdir(args.exp)
    Path(workdir).mkdir(parents=True, exist_ok=True)

    dist_util.setup_dist()
    logger.configure(dir=workdir)
    if dist.get_rank() == 0:
        name = args.exp if args.resume_checkpoint == "" else args.exp + "_resume"
        wandb.init(
            project="bridge",
            group=args.exp,
            name=name,
            config=vars(args),
            mode="offline" if not args.debug else "disabled",
        )
        logger.log("creating model and diffusion...")

    data_image_size = args.image_size

    # Load target model
    resume_train_flag = False
    if args.resume_checkpoint == "":
        model_ckpts = list(glob(f"{workdir}/*model*[0-9].*"))
        if len(model_ckpts) > 0:
            max_ckpt = max(model_ckpts, key=lambda x: int(x.split("model_")[-1].split(".")[0]))
            if os.path.exists(max_ckpt):
                args.resume_checkpoint = max_ckpt
                resume_train_flag = True
        elif args.pretrained_ckpt is not None:
            max_ckpt = args.pretrained_ckpt
            args.resume_checkpoint = max_ckpt
        if dist.get_rank() == 0 and args.resume_checkpoint != "":
            logger.log("Resuming from checkpoint: ", max_ckpt)

    model, diffusion = create_model_and_diffusion(**args_to_dict(args, model_and_diffusion_defaults().keys()))
    model.to(dist_util.dev())

    if dist.get_rank() == 0:
        wandb.watch(model, log="all")
    schedule_sampler = create_named_schedule_sampler(args.schedule_sampler, diffusion)

    if args.batch_size == -1:
        batch_size = args.global_batch_size // dist.get_world_size()
        if args.global_batch_size % dist.get_world_size() != 0:
            logger.log(f"warning, using smaller global_batch_size of {dist.get_world_size()*batch_size} instead of {args.global_batch_size}")
    else:
        batch_size = args.batch_size

    if dist.get_rank() == 0:
        logger.log("creating data loader...")

    data, test_data = load_data(
        data_dir=args.data_dir,
        dataset=args.dataset,
        batch_size=batch_size,
        image_size=data_image_size,
        num_workers=args.num_workers,
    )

    if args.use_augment:
        augment = AugmentPipe(p=0.12, xflip=1e8, yflip=1, scale=1, rotate_frac=1, aniso=1, translate_frac=1)
    else:
        augment = None

    logger.log("training...")
    TrainLoop(
        model=model,
        diffusion=diffusion,
        train_data=data,
        test_data=test_data,
        batch_size=batch_size,
        microbatch=-1 if args.microbatch >= batch_size else args.microbatch,
        lr=args.lr,
        ema_rate=args.ema_rate,
        log_interval=args.log_interval,
        test_interval=args.test_interval,
        save_interval=args.save_interval,
        save_interval_for_preemption=args.save_interval_for_preemption,
        resume_checkpoint=args.resume_checkpoint,
        workdir=workdir,
        use_fp16=args.use_fp16,
        fp16_scale_growth=args.fp16_scale_growth,
        schedule_sampler=schedule_sampler,
        weight_decay=args.weight_decay,
        lr_anneal_steps=args.lr_anneal_steps,
        augment_pipe=augment,
        train_mode=args.train_mode,
        resume_train_flag=resume_train_flag,
        **sample_defaults(),
    ).run_loop()


def create_argparser():
    defaults = dict(
        data_dir="",
        dataset="edges2handbags",
        schedule_sampler="real-uniform",
        lr=2e-5,
        weight_decay=0.0,
        lr_anneal_steps=0,
        global_batch_size=14,
        batch_size=-1,
        microbatch=1,  # -1 disables microbatches
        ema_rate="0.9999",  # comma-separated list of EMA values
        log_interval=50,
        test_interval=500,
        save_interval=10000,
        save_interval_for_preemption=50000,
        resume_checkpoint="",
        exp="",
        use_fp16=True,
        fp16_scale_growth=1e-3,
        debug=False,
        num_workers=8,
        use_augment=False,
        pretrained_ckpt=None,
        train_mode="ddbm",
    )
    defaults.update(model_and_diffusion_defaults())
    parser = argparse.ArgumentParser()
    add_dict_to_argparser(parser, defaults)
    return parser


if __name__ == "__main__":
    args = create_argparser().parse_args()
    main(args)
