import os
import warnings
import time
import numpy as np
import argparse
import seaborn as sns
import torch

from models import *
from utils import set_seed, random_splits
from unsup_model import GRACE
from dataset_loader import DataLoader
from eval import unsupervised_test_linear
warnings.filterwarnings("ignore")


def print_params(model, string):
    print(f'----------- {string} ----------')
    params = model.encoder.named_parameters()
    for name, param in params:
        print(name)
        print(param)
    print('-----------------------------------')


def get_encoder(dataset, args):
    if args.net == 'ChebNetII':
        encoder = ChebNetII(dataset=dataset, args=args)
    elif args.net == 'GCN':
        encoder = GCN_Net(dataset=dataset, args=args)
    elif args.net == 'BernNet':
        encoder = BernNet(dataset=dataset, args=args)
    elif args.net == 'GPRGNN':
        encoder = GPRGNN(dataset=dataset, args=args)
    elif args.net == 'PropChebNetII':
        encoder = PropChebNetII(dataset=dataset, args=args)
    elif args.net == 'PropBernNet':
        encoder = PropBernNet(dataset=dataset, args=args)
    elif args.net == 'PropGPRGNN':
        encoder = PropGPRGNN(dataset=dataset, args=args)
    else:
        raise NotImplementedError
    return encoder


def unsupervised_learning(data, args):
    best = float("inf")
    cnt_wait = 0
    unsup_tag = str(int(time.time()))

    for epoch in range(args.unsup_epochs):
        model.train()
        optimizer.zero_grad()

        z1, z2 = model(data.x, data.edge_index)
        loss = model.infonce_loss(z1, z2)

        loss.backward()
        optimizer.step()

        if loss < best:
            best = loss
            best_t = epoch
            cnt_wait = 0
            torch.save(model.state_dict(), 'unsup_pkl/' + 'grace_' + args.net + '_best_model_'+ args.dataset + unsup_tag + '.pkl')
        else:
            cnt_wait += 1

        if cnt_wait == args.patience:
            break

    model.load_state_dict(torch.load('unsup_pkl/' + 'grace_' + args.net + '_best_model_'+ args.dataset + unsup_tag + '.pkl'))
    model.eval()
    embeds = model.get_embedding(data.x, data.edge_index).to(device)
    os.remove('unsup_pkl/' + 'grace_' + args.net + '_best_model_'+ args.dataset + unsup_tag + '.pkl')
    return embeds


def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--seed', type=int, default=42, help='seed.')
    parser.add_argument('--dataset', type=str,default='Cora')
    parser.add_argument('--device', type=int, default=0, help='GPU device.')
    parser.add_argument('--runs', type=int, default=10, help='number of runs.')
    parser.add_argument('--net', type=str, default='GCN')

    parser.add_argument('--num_layers', type=int, default=2)
    parser.add_argument('--hidden', type=int, default=64, help='hidden units.')
    parser.add_argument('--dropout', type=float, default=0.5, help='dropout for neural networks.')
    parser.add_argument('--use_bn', action='store_true')
    parser.add_argument('--K', type=int, default=10, help='propagation steps.')
    parser.add_argument('--alpha', type=float, default=0.1, help='alpha for APPN/GPRGNN.')
    parser.add_argument('--dprate', type=float, default=0.0, help='dropout for propagation layer.')
    parser.add_argument('--q', type=int, default=0, help='The constant for ChebBase.')
    parser.add_argument('--Init', type=str,choices=['SGC', 'PPR', 'NPPR', 'Random', 'WS', 'Null'], default='PPR', help='initialization for GPRGNN.')
    
    parser.add_argument('--train_rate', type=float, default=0.6, help='train set rate.')
    parser.add_argument('--val_rate', type=float, default=0.2, help='val set rate.')

    # unsupervised learning
    parser.add_argument('--de1', default=0.2, type=float)
    parser.add_argument('--de2', default=0.2, type=float)
    parser.add_argument('--df1', default=0.2, type=float)
    parser.add_argument('--df2', default=0.2, type=float)
    parser.add_argument('--tau', default=0.5, type=float)
    parser.add_argument("--proj_hid_dim", type=int, default=128, help="Projection hidden layer dim.")
    parser.add_argument('--residual', action='store_true')

    parser.add_argument("--patience", type=int, default=50, help="Patient epochs to wait before early stopping.")
    parser.add_argument("--unsup_epochs", type=int, default=1000, help="Unupservised training epochs.")
    parser.add_argument("--lr1", type=float, default=0.01, help="Learning rate of the unsupervised model.")
    parser.add_argument("--lr2", type=float, default=0.01, help="Learning rate of linear evaluator.")
    parser.add_argument("--wd1", type=float, default=0.0, help="Weight decay of the unsupervised model.")
    parser.add_argument("--wd2", type=float, default=0.0, help="Weight decay of linear evaluator.")

    # norm layer
    parser.add_argument('--norm_type', default='none', type=str)
    parser.add_argument('--scale', default=0.5, type=float)
    parser.add_argument('--norm_x', action='store_true')
    parser.add_argument('--plusone', action='store_true')
    parser.add_argument('--layer_norm', action='store_true')

    args = parser.parse_args()
    return args


if __name__ == '__main__':
    args = parse_args()
    print(args)
    print("---------------------------------------------")
    
    set_seed(args.seed)
    #10 fixed seeds for random splits from BernNet
    SEEDS=[1941488137,4198936517,983997847,4023022221,4019585660,2108550661,1648766618,629014539,3212139042,2424918363]
    device = torch.device('cuda:'+str(args.device) if torch.cuda.is_available() else 'cpu')

    dataset = DataLoader(args.dataset)
    data = dataset[0]
    data = data.to(device)

    percls_trn = int(round(args.train_rate * len(data.y) / dataset.num_classes))
    val_lb = int(round(args.val_rate * len(data.y)))
    
    encoder = get_encoder(dataset=dataset, args=args).to(device)
    if args.net in ['PropChebNetII', 'PropBernNet', 'PropGPRGNN']:
        fc_dim = dataset.num_node_features * 2 if args.residual else dataset.num_node_features
    else:
        fc_dim = args.hidden
    model = GRACE(encoder=encoder, input_dim=fc_dim, num_hidden=args.hidden, num_proj_hidden=args.proj_hid_dim, 
                  tau=args.tau, drop_rate=(args.de1, args.de2, args.df1, args.df2), args=args).to(device)
    optimizer = torch.optim.Adam([{'params': model.parameters(), 'weight_decay': args.wd1, 'lr': args.lr1}])

    embeds = unsupervised_learning(data=data, args=args)
    
    unsup_results = []
    for RP in range(args.runs):
        args.seed = SEEDS[RP]
        data = random_splits(data, dataset.num_classes, percls_trn, val_lb, args.seed).to(device)
        eval_acc = unsupervised_test_linear(data=data, embeds=embeds, n_classes=dataset.num_classes, device=device, args=args)
        unsup_results.append(eval_acc)

    test_acc_mean = np.mean(unsup_results) * 100
    values = np.asarray(unsup_results, dtype=object)
    uncertainty = np.max(np.abs(sns.utils.ci(sns.algorithms.bootstrap(values, func=np.mean, n_boot=1000), 95) - values.mean()))
    print(f'test acc mean = {test_acc_mean:.4f} ± {uncertainty * 100:.4f}')

