import gc
import os
import sys
import time
import yaml
import wandb
import socket
import shutil
import random
import pynvml
import warnings
import numpy as np
from tqdm import tqdm
from typing import Tuple, List

from easydict import EasyDict
from itertools import product

import torch
from torch.utils.data import TensorDataset, DataLoader

from sampling import sampling
from eval_sample import eval_sample
from utils.runner import train_epoch, val_epoch

from models.diffusion import Diffusion

def seed_all(seed: int = 42) -> None:
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.use_deterministic_algorithms = True

def grid_search(param_grid: dict) -> List[dict]:
    keys, values = param_grid.keys(), param_grid.values()
    return [dict(zip(keys, combination)) for combination in product(*values)]

def train_sample_eval(conf_path: str, is_coding_mode: bool = False, print_or_not: bool = True) -> str:
    with open(conf_path, 'r') as f:
        config = EasyDict(yaml.safe_load(f))

    device = torch.device(config.model.device)

    seed_all(config.train.seed)
    log_dir = '/'.join(conf_path.split('/')[:-1])
    shutil.copytree('./models', os.path.join(log_dir, 'models'))
    ckpt_dir = os.path.join(log_dir, 'checkpoints')
    os.makedirs(ckpt_dir, exist_ok=True)

    proj_name = config.project
    if not is_coding_mode:
        wandb.init(
            project=f'Phy-diffusion-aug-3body',
            entity='Anonymous',
            config=config,
            group=proj_name,
            job_type="training",
            name=proj_name,
            notes=socket.gethostname(),
            save_code=True,
            reinit=True
        )
        
    print('Loading dataset')

    train_loader = DataLoader(
        TensorDataset(torch.from_numpy(np.load(config.dataset.train)).to(torch.float32)),
        config.train.batch_size,
        num_workers=0,
        shuffle=True
    )
    val_loader = DataLoader(
        TensorDataset(torch.from_numpy(np.load(config.dataset.val)).to(torch.float32)),
        config.train.batch_size,
        num_workers=0,
        shuffle=False
    )

    model = Diffusion(config.model).to(device)

    optimizer = torch.optim.Adam(
        model.parameters(),
        lr=1e-3,
        weight_decay=0,
        betas=(0.95, 0.999)
    )

    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        factor=0.6,
        patience=10,
    )

    best_val_loss = float('inf')
    print("Training start!!!")

    for epoch in tqdm(range(config.train.num_epoches), dynamic_ncols=True):
        is_convergent = train_epoch(model, train_loader, optimizer, epoch, device, print_or_not)

        if epoch % config.train.val_freq == 0:
            val_loss = val_epoch(model, val_loader, epoch, device, print_or_not)
            scheduler.step(val_loss)

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            ckpt_path = os.path.join(ckpt_dir, f'ckpt.pt')
            torch.save({
                'config': config,
                'model': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                'epoch': epoch,
                'avg_val_loss': val_loss,
            }, ckpt_path)

        if is_convergent:
            print('Model converges! Early stopping!')
            break

        if val_loss > 1e10 and os.path.exists(os.path.join(ckpt_dir, f'ckpt.pt')):
            model.load_state_dict(os.path.join(ckpt_dir, f'ckpt.pt'))
            print('Training failed. Loading model histroy!')
    del model
    torch.cuda.empty_cache()
    gc.collect()

    eval_sample(sampling(ckpt_path))
    return ckpt_path

def get_arg(
    loss: str, 
    model: str,
    network: dict,
    is_coding_mode: bool = False,
    save_path: str = 'logs',
    diffusion: str = 'vp',
    device = None,
) -> Tuple[str, bool]:
    assert diffusion in ['vp', 've']

    yml = {
        'train': {'seed': 42, 'batch_size': 64, 'num_epoches': 1000, 'val_freq': 2}
    }

    dataset = '3body'
    yml['dataset'] = {
        'train': 'data/train_x.npy',
        'val': 'data/val_x.npy'
    }
    dataset_para = {
        'input_size': 18,
        'input_length': 10,
        'n_system': 3,
        'repara_size': 3 + 9
    }

    pynvml.nvmlInit()
    to_gb = 1024**3
    if device is None:
        pynvml.nvmlInit()
        free = []
        
        n_gpu = pynvml.nvmlDeviceGetCount()
        for gpu_id in range(n_gpu):
            handler = pynvml.nvmlDeviceGetHandleByIndex(gpu_id)
            meminfo = pynvml.nvmlDeviceGetMemoryInfo(handler)
            free.append(meminfo.free / to_gb)

        device = n_gpu - np.argmax(list(reversed(free))) - 1
    elif isinstance(device, int):
        assert device >= 0
    elif isinstance(device, str):
        device = int(''.join(device.split('cuda:')[1:]))
        
    handler = pynvml.nvmlDeviceGetHandleByIndex(device)
    meminfo = pynvml.nvmlDeviceGetMemoryInfo(handler)
    print(f'Using device: {device}. Free memory: {meminfo.free / to_gb}')
    device = 'cuda:' + str(device)

    yml['model'] = {
        'loss': loss,
        'dataset': dataset,
        'device': device,
        'diffusion': diffusion,
        'model': model,
        'network': {'device': device, **dataset_para, **network},
        'sampling': {'method': 'ode'},
    }
    
    if is_coding_mode:
        save_folder = 'temp' + loss + '---' + model + '---' + \
            '--'.join([k + '-' + str(v) for k, v in network.items()])
    else:
        save_folder = loss + '---' + model + '---' + \
            '--'.join([k + '-' + str(v) for k, v in network.items()])
        
    yml['project'] = save_folder

    save_folder = save_folder + '---' + \
        time.strftime('%Y_%m_%d__%H_%M_%S', time.localtime())
    if is_coding_mode:
        save_folder = os.path.join(save_path, save_folder)
    else:
        save_folder = os.path.join(save_path, loss, model, save_folder)
    os.makedirs(save_folder)
    save_yml = os.path.join(save_folder, 'config.yml')
    with open(save_yml, 'w') as yaml_file:
        yaml.dump(yml, yaml_file, sort_keys=False)

    return save_yml, is_coding_mode



if __name__ == '__main__':
    warnings.filterwarnings('ignore', category=UserWarning, message='TypedStorage is deprecated')
    os.environ['WANDB_CONFIG_DIR'] = '/tmp/.config-' + os.environ['USER']
    torch.multiprocessing.set_start_method('spawn') 

    t_embed_size = {'t_embed_size': [128]}

    is_coding_mode = sys.gettrace() is not None

    print_or_not = False

    try:
        hidden_size = int(sys.argv[1])
    except:
        hidden_size = 1024

    try:
        n_layers = int(sys.argv[2])
    except:
        n_layers = 5

    try:
        device = int(sys.argv[3])
    except:
        device = None

    try:
        loss = sys.argv[4]
    except:
        loss = 'naive'

        
    # loss = 'ablation_0.05'
    # loss = 'ablation_0.01'
    # loss = 'ablation_0.005'
    # loss = 'ablation_0.001'
    # loss = 'ablation_0.0005'
    # loss = 'ablation_0.0001'

    # loss = 'jensen_0.05'
    # loss = 'jensen_0.01'
    # loss = 'jensen_0.005'
    # loss = 'jensen_0.001'
    # loss = 'jensen_0.0005'
    # loss = 'jensen_0.0001'

    # loss = 'naive'

    # loss = 'momentum_1.0'
    # loss = 'momentum_0.5'
    # loss = 'momentum_0.1'
    # loss = 'momentum_0.05'
    # loss = 'momentum_0.01'
    # loss = 'momentum_0.005'


    # loss = 'implicit_energy_0.05'
    # loss = 'implicit_energy_0.01'
    # loss = 'implicit_energy_0.005'
    # loss = 'implicit_energy_0.001'
    # loss = 'implicit_energy_0.0005'
    # loss = 'implicit_energy_0.0001'


    if loss.__contains__('energy'):
        model = 'ParaPhyGRU'
    else:
        model = 'ParaGRU'

    networks_para = grid_search({
        'hidden_size': [hidden_size],
        'n_layers': [n_layers]
    } | t_embed_size)


    for network in networks_para:
        train_sample_eval(*get_arg(
            loss, model, network, is_coding_mode,
            device=device
        ), print_or_not)

