import argparse
import os
import numpy as np
import pandas as pd
from datetime import datetime
from tqdm import trange
import torch
import torch.nn as nn
from sklearn.preprocessing import StandardScaler
from sklearn_pandas import DataFrameMapper
import wandb
import torchtuples as tt

# my models
from model.Survival import MTLR, CoxPH, CenQuanRegNN, WeibullAFT, LogLogisticAFT
from model.TwoBranch import MTLR2B, CoxPH2B, WeibullAFT2B, LogLogisticAFT2B
from model.LatDecReps import MTLR_LDR, CoxPH_LDR, WeibullAFT_LDR, LogLogisticAFT_LDR
from model.utils import build_sequential_nn

# baselines
from sksurv.ensemble import RandomSurvivalForest, ComponentwiseGradientBoostingSurvivalAnalysis
from pycox.models import DeepHitSingle, CoxTime, LogisticHazard
from pycox.models.cox_time import MLPVanillaCoxTime
from SODEN.models import ODEFunc
from iwsg.models import DiscNN
from iwsg.wrapper import train_iwsg, make_survival_prediction

from utils import save_params, set_seed, print_performance, pad_tensor, df2np
from utils.util_survival import survival_data_split, xcal_from_hist, make_time_bins, format_pred_sksurv, \
    make_mono_quantiles
from args import generate_parser
from data import make_survival_data
from SurvivalEVAL import SurvivalEvaluator, QuantileRegEvaluator

folder = 'logs'
# create folder if it does not exist
if not os.path.exists(folder):
    os.makedirs(folder)

DISCRETE_MODELS = ["MTLR", "MTLR-2B", "MTLR-LDR", "DeepHit", "Nnet-survival", "IWSG"]


def main(args=None):
    if isinstance(args, argparse.Namespace):
        wandb.init(
            project="SALaD(main)",    # Survival Analysis via Latent Decomposition
            config=args,
            name=args.model + "_" + args.data
        )
    else:
        wandb.init(config=args)
    wandb.define_metric("C-index", summary="mean")
    wandb.define_metric("IBS", summary="mean")
    wandb.define_metric("MAE_Hinge", summary="mean")
    wandb.define_metric("MAE_PO", summary="mean")
    wandb.define_metric("KM-cal", summary="mean")
    wandb.define_metric("X-cal", summary="mean")

    args = wandb.config
    data, cols_stdz = make_survival_data(args.data)
    features = data.columns.to_list()
    if 'true_time' in features:
        features.remove('true_time')
    assert "time" in data.columns and "event" in data.columns, "The event time variable and censor indicator " \
                                                               "variable is missing or need to be renamed."
    cols_wo_stdz = list(set(features).symmetric_difference(cols_stdz))  # including time and event
    stdz = [([col], StandardScaler()) for col in cols_stdz]
    wo_stdz = [(col, None) for col in cols_wo_stdz]
    columns_transform = stdz + wo_stdz

    # define the split ratio for training, validation, and testing
    if args.early_stop:
        pct_train = 0.8
        pct_val = 0.1
        pct_test = 0.1
    else:
        pct_train = 0.9
        pct_val = 0.0
        pct_test = 0.1

    args.n_features = len(features) - 2     # excluding time and event
    args.timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    args.device = "cuda:0" if torch.cuda.is_available() else "cpu"
    device = torch.device(args.device)

    path = save_params(args)

    ci = []
    mae_hinge = []
    mae_po = []
    ibs = []
    km_cal = []
    dcal_ps = []
    xcal_stats = []

    pbar_outer = trange(args.n_exp, disable=not args.verbose, desc='Experiment')
    for i in pbar_outer:
        seed_i = args.seed + i
        set_seed(seed_i, device)
        data_train, data_val, data_test = survival_data_split(data, stratify_colname='both', frac_train=pct_train,
                                                              frac_val=pct_val, frac_test=pct_test, random_state=seed_i)
        if args.data == 'synth':
            # remove the true time column for training
            data_train = data_train.drop(columns=['true_time'])
            data_val = data_val.drop(columns=['true_time'])
            # use the true time for evaluation
            data_test = data_test.drop(columns=['time'])
            data_test = data_test.rename(columns={'true_time': 'time'})
            data_test.event = np.ones(data_test.shape[0])

        # standardize the data
        # [features] to keep the order, otherwise the feature order will be changed and the result is not reproducible
        mapper_df = DataFrameMapper(columns_transform, df_out=True)

        data_train = mapper_df.fit_transform(data_train).astype('float32')[features]
        data_val = mapper_df.transform(data_val).astype('float32')[features] if not data_val.empty else data_val
        data_test = mapper_df.transform(data_test).astype('float32')[features]
        data_train_val = pd.concat([data_train, data_val], ignore_index=True) if not data_val.empty else data_train

        x_train, t_train, e_train = df2np(data_train)
        x_val, t_val, e_val = df2np(data_val) if not data_val.empty else (None, None, None)
        x_test, t_test, e_test = df2np(data_test)
        x_train_val, t_train_val, e_train_val = df2np(data_train_val)

        # create time bins for discrete survival analysis models
        if args.model in DISCRETE_MODELS:
            discrete_bins_e = make_time_bins(t_train, event=e_train)
            discrete_bins_c = make_time_bins(t_train, event=1 - e_train)
            if args.model in ["DeepHit", "Nnet-survival"]:
                # the first bin of DeepHit must smaller than the smallest time in the data
                discrete_bins_e[0] = float(max(t_train_val.min() - 1e-5, 0))
                discrete_bins_c[0] = float(max(t_train_val.min() - 1e-5, 0))

        if "CoxPH" in args.model:
            if args.model == "CoxPH":
                model = CoxPH(
                    n_features=args.n_features,
                    hidden_size=args.neurons,
                    norm=args.norm,
                    activation=args.activation,
                    dropout=args.dropout
                )
            elif args.model == "CoxPH-2B":
                model = CoxPH2B(
                    n_features=args.n_features,
                    hidden_size=args.neurons,
                    norm=args.norm,
                    activation=args.activation,
                    dropout=args.dropout
                )
            elif args.model == "CoxPH-LDR":
                model = CoxPH_LDR(
                    n_features=args.n_features,
                    rep_dims=args.neurons,
                    event_dims=args.e_dims,
                    censor_dims=args.e_dims,    # this is for easier hyperparameter tuning,
                    norm=args.norm,
                    activation=args.activation,
                    dropout=args.dropout,
                    ipm=args.ipm,
                    alpha=args.alpha,
                    beta=args.beta,
                )
            else:
                raise ValueError(f"Unknown model name: {args.model}")
            model.fit(data_train, data_val, device=device, optimizer=args.optimizer, batch_size=args.batch_size,
                      epochs=args.n_epochs, lr=args.lr, lr_min=1e-3 * args.lr, weight_decay=args.weight_decay,
                      early_stop=args.early_stop, fname=folder + f'/{model.__class__.__name__}_{args.timestamp}',
                      verbose=args.verbose)
            x_test = torch.from_numpy(x_test).float().to(device)
            surv_test = model.predict_survival(x_test)
            time_coordinates = model.time_bins
        elif "MTLR" in args.model:
            if args.model == "MTLR":
                model = MTLR(
                    n_features=args.n_features,
                    time_bins=discrete_bins_e,
                    hidden_size=args.neurons,
                    norm=args.norm,
                    activation=args.activation,
                    dropout=args.dropout
                )
            elif args.model == "MTLR-2B":
                model = MTLR2B(
                    n_features=args.n_features,
                    time_bins_event=discrete_bins_e,
                    time_bins_censor=discrete_bins_c,
                    hidden_size=args.neurons,
                    norm=args.norm,
                    activation=args.activation,
                    dropout=args.dropout
                )
            elif args.model == "MTLR-LDR":
                model = MTLR_LDR(
                    n_features=args.n_features,
                    time_bins_event=discrete_bins_e,
                    time_bins_censor=discrete_bins_c,
                    rep_dims=args.neurons,
                    event_dims=args.e_dims,
                    censor_dims=args.e_dims,    # this is for easier hyperparameter tuning
                    norm=args.norm,
                    activation=args.activation,
                    dropout=args.dropout,
                    ipm=args.ipm,
                    alpha=args.alpha,
                    beta=args.beta,
                )
            else:
                raise ValueError(f"Unknown model name: {args.model}")
            model.fit(data_train, data_val, device=device, optimizer=args.optimizer, batch_size=args.batch_size,
                      epochs=args.n_epochs, lr=args.lr, lr_min=1e-3 * args.lr, weight_decay=args.weight_decay,
                      early_stop=args.early_stop, fname=folder + f'/{model.__class__.__name__}_{args.timestamp}',
                      verbose=args.verbose)
            x_test = torch.from_numpy(x_test).float().to(device)
            surv_test = model.predict_survival(x_test)
            time_coordinates = model.time_bins
            time_coordinates = pad_tensor(time_coordinates, 0, where='start')
        elif "WeibullAFT" in args.model:
            if args.model == "WeibullAFT":
                model = WeibullAFT(
                    n_features=args.n_features,
                    hidden_size=args.neurons,
                    norm=args.norm,
                    activation=args.activation,
                    dropout=args.dropout
                )
            elif args.model == "WeibullAFT-2B":
                model = WeibullAFT2B(
                    n_features=args.n_features,
                    hidden_size=args.neurons,
                    norm=args.norm,
                    activation=args.activation,
                    dropout=args.dropout
                )
            elif args.model == "WeibullAFT-LDR":
                model = WeibullAFT_LDR(
                    n_features=args.n_features,
                    rep_dims=args.neurons,
                    event_dims=args.e_dims,
                    censor_dims=args.e_dims,    # this is for easier hyperparameter tuning
                    norm=args.norm,
                    activation=args.activation,
                    dropout=args.dropout,
                    ipm=args.ipm,
                    alpha=args.alpha,
                    beta=args.beta,
                )
            else:
                raise ValueError(f"Unknown model name: {args.model}")
            model.fit(data_train, data_val, device=device, optimizer=args.optimizer, batch_size=args.batch_size,
                      epochs=args.n_epochs, lr=args.lr, lr_min=1e-3 * args.lr, weight_decay=args.weight_decay,
                      early_stop=args.early_stop, fname=folder + f'/{model.__class__.__name__}_{args.timestamp}',
                      verbose=args.verbose)
            x_test = torch.from_numpy(x_test).float().to(device)
            surv_test = model.predict_survival(x_test)
            time_coordinates = model.t_grids
        elif "LogLogisticAFT" in args.model:
            if args.model == "LogLogisticAFT":
                model = LogLogisticAFT(
                    n_features=args.n_features,
                    hidden_size=args.neurons,
                    norm=args.norm,
                    activation=args.activation,
                    dropout=args.dropout
                )
            elif args.model == "LogLogisticAFT-2B":
                model = LogLogisticAFT2B(
                    n_features=args.n_features,
                    hidden_size=args.neurons,
                    norm=args.norm,
                    activation=args.activation,
                    dropout=args.dropout
                )
            elif args.model == "LogLogisticAFT-LDR":
                model = LogLogisticAFT_LDR(
                    n_features=args.n_features,
                    rep_dims=args.neurons,
                    event_dims=args.e_dims,
                    censor_dims=args.e_dims,    # this is for easier hyperparameter tuning
                    norm=args.norm,
                    activation=args.activation,
                    dropout=args.dropout,
                    ipm=args.ipm,
                    alpha=args.alpha,
                    beta=args.beta,
                )
            else:
                raise ValueError(f"Unknown model name: {args.model}")
            model.fit(data_train, data_val, device=device, optimizer=args.optimizer, batch_size=args.batch_size,
                      epochs=args.n_epochs, lr=args.lr, lr_min=1e-3 * args.lr, weight_decay=args.weight_decay,
                      early_stop=args.early_stop, fname=folder + f'/{model.__class__.__name__}_{args.timestamp}',
                      verbose=args.verbose)
            x_test = torch.from_numpy(x_test).float().to(device)
            surv_test = model.predict_survival(x_test)
            time_coordinates = model.t_grids
        elif args.model == "CQRNN":
            model = CenQuanRegNN(
                n_features=args.n_features,
                hidden_size=args.neurons,
                n_quantiles=args.n_quantiles,
                norm=args.norm,
                activation=args.activation,
                dropout=args.dropout,
                t_max=1.2 * data_train.time.max()
            )
            model.fit(data_train, data_val, device=device, optimizer=args.optimizer, batch_size=args.batch_size,
                      epochs=args.n_epochs, lr=args.lr, lr_min=1e-3 * args.lr, weight_decay=args.weight_decay,
                      early_stop=args.early_stop, fname=folder + f'/{model.__class__.__name__}_{args.timestamp}',
                      verbose=args.verbose)
            x_test = torch.from_numpy(x_test).float().to(device)
            quan_test = model.predict_quantiles(x_test)
            # quan_test = pad_tensor(quan_test, 0, where='start')     # for quantile = 0, the prediction is 0
            quan_levels = model.quan_levels
            # quan_levels = pad_tensor(quan_levels, 0, where='start')
            quan_levels, quan_test = make_mono_quantiles(quan_levels.cpu().numpy(), quan_test.cpu().numpy(),
                                                         method=args.mono_method, seed=seed_i)
        elif args.model in ["DeepHit", "Nnet-survival"]:
            labtrans = DeepHitSingle.label_transform(discrete_bins_e.numpy())
            net = tt.practical.MLPVanilla(in_features=args.n_features, num_nodes=args.neurons,
                                          out_features=labtrans.out_features, batch_norm=args.norm,
                                          dropout=args.dropout, activation=getattr(nn, args.activation))
            optim = getattr(tt.optim, args.optimizer)
            if args.model == "DeepHit":
                model = DeepHitSingle(net, optim, device=args.device, alpha=0.2, sigma=0.1, duration_index=labtrans.cuts)
            elif args.model == "Nnet-survival":
                model = LogisticHazard(net, optim, device=args.device, duration_index=labtrans.cuts)
            model.label_transform = labtrans

            y_train = model.label_transform.transform(*(t_train, e_train))
            y_val = model.label_transform.transform(*(t_val, e_val))

            val = (x_val, y_val)
            val_size = x_val.shape[0]

            model.optimizer.set_lr(args.lr)
            model.optimizer.set('weight_decay', args.weight_decay)
            if args.early_stop:
                callbacks = [tt.callbacks.EarlyStopping()]
            else:
                callbacks = None
            model.fit(input=x_train, target=y_train, batch_size=args.batch_size, epochs=args.n_epochs,
                      callbacks=callbacks, verbose=args.verbose, val_data=val, val_batch_size=val_size)
            surv_df = model.predict_surv_df(x_test)
            time_coordinates = surv_df.index.values
            surv_test = surv_df.values.T
        elif args.model == "CoxTime":
            labtrans = CoxTime.label_transform()
            labtrans.fit(t_train, e_train)
            net = MLPVanillaCoxTime(in_features=args.n_features, num_nodes=args.neurons, batch_norm=args.norm,
                                    dropout=args.dropout, activation=getattr(nn, args.activation))
            optim = getattr(tt.optim, args.optimizer)
            model = CoxTime(net, optim, device=args.device, labtrans=labtrans)
            model.label_transform = labtrans

            y_train = model.label_transform.fit_transform(*(t_train, e_train))
            y_val = model.label_transform.transform(*(t_val, e_val))

            val = (x_val, y_val)
            val_size = x_val.shape[0]

            model.optimizer.set_lr(args.lr)
            model.optimizer.set('weight_decay', args.weight_decay)
            if args.early_stop:
                callbacks = [tt.callbacks.EarlyStopping()]
            else:
                callbacks = None
            model.fit(input=x_train, target=y_train, batch_size=args.batch_size, epochs=args.n_epochs,
                      callbacks=callbacks, verbose=args.verbose, val_data=val, val_batch_size=val_size)
            model.compute_baseline_hazards()
            surv_df = model.predict_surv_df(x_test)
            time_coordinates = surv_df.index.values
            surv_test = surv_df.values.T

            # add the initial time point
            time_coordinates = np.concatenate([np.array([0]), time_coordinates], 0)
            surv_test = np.concatenate([np.ones([surv_test.shape[0], 1]), surv_test], 1)
        elif args.model == "SODEN":
            try:
                # This is the default structure from the intro:
                # https://github.com/georgehc/survival-intro/blob/main/S5.2_SODEN.ipynb
                # base_neural_net = nn.Sequential(nn.Linear(args.n_features + 2, 8),
                #                                 nn.BatchNorm1d(8),
                #                                 nn.ReLU(),
                #                                 nn.Linear(8, 1),
                #                                 nn.Softplus()).to(device)
                if args.neurons is not None:
                    nn_list = build_sequential_nn(args.n_features + 2, args.neurons, args.norm, args.activation, None)
                    nn_list.append(nn.Linear(args.neurons[-1], 1))
                    nn_list.append(nn.Softplus())
                else:
                    nn_list = [nn.Linear(args.n_features + 2, 1), nn.Softplus()]
                base_neural_net = nn.Sequential(*nn_list).to(device)
                model = ODEFunc(base_neural_net, num_features=args.n_features)
                model.fit(data_train, data_val, device=device, optimizer=args.optimizer, batch_size=args.batch_size,
                          epochs=args.n_epochs, lr=args.lr, lr_min=1e-3 * args.lr, weight_decay=args.weight_decay,
                          early_stop=args.early_stop, verbose=args.verbose)
                model.time_bins = torch.tensor(data_train['time'], dtype=torch.float).to(device).unique()
                x_test = torch.from_numpy(x_test).float().to(device)
                surv_test = model.predict_survival(x_test)
                time_coordinates = model.time_bins
            except Exception as e:
                print(f"Error in SODEN: {e}")
                # go to the next experiment
                continue
        elif args.model == "RSF":
            y_train_val = np.empty(dtype=[('cens', bool), ('time', np.float64)], shape=t_train_val.shape[0])
            y_train_val['cens'] = e_train_val
            y_train_val['time'] = t_train_val

            model = RandomSurvivalForest(n_estimators=100, n_jobs=None, random_state=seed_i)
            model.fit(x_train_val, y_train_val)
            pred_surv = model.predict_survival_function(x_test)
            surv_test, time_coordinates = format_pred_sksurv(pred_surv)
        elif args.model == "GB":
            y_train_val = np.empty(dtype=[('cens', bool), ('time', np.float64)], shape=t_train_val.shape[0])
            y_train_val['cens'] = e_train_val
            y_train_val['time'] = t_train_val

            model = ComponentwiseGradientBoostingSurvivalAnalysis(loss='coxph', n_estimators=100, random_state=seed_i)
            model.fit(x_train_val, y_train_val)
            pred_surv = model.predict_survival_function(x_test)
            surv_test, time_coordinates = format_pred_sksurv(pred_surv)
        elif args.model == "IWSG":
            n_bins = len(discrete_bins_e)
            Fmodel = DiscNN(
                n_features=args.n_features,
                output_size=n_bins,
                hidden_size=args.neurons,
                norm=args.norm,
                activation=args.activation,
                dropout=args.dropout,
            )
            Gmodel = DiscNN(
                n_features=args.n_features,
                output_size=n_bins,
                hidden_size=args.neurons,
                norm=args.norm,
                activation=args.activation,
                dropout=args.dropout,
            )
            # remove the first time point in the bins for training, because no one is before this point.
            Fmodel, Gmodel = train_iwsg(
                data_train, data_val, Fmodel, Gmodel, bins=discrete_bins_e[1:], optimizer=args.optimizer,
                batch_size=args.batch_size, epochs=args.n_epochs, lr=args.lr, lr_min=1e-3 * args.lr,
                weight_decay=args.weight_decay, device=device, early_stop=args.early_stop,
                fname=folder + f'/IWSG_{args.timestamp}', verbose=args.verbose)
            x_test = torch.from_numpy(x_test).float().to(device)
            surv_test = make_survival_prediction(x_test, n_bins, Fmodel, device)
            # the surv curves are starting from 0, so we need to add the first bin
            # (remember previously we remove this first time point so we can now replace it with 0)
            discrete_bins_e[0] = 0
            time_coordinates = discrete_bins_e
        else:
            raise ValueError(f"Unknown model name: {args.model}")

        # evaluate the performance
        if "CQRNN" not in args.model:
            evaler = SurvivalEvaluator(surv_test, time_coordinates, t_test, e_test, t_train_val, e_train_val,
                                       predict_time_method="Median", interpolation='Pchip')
        else:
            evaler = QuantileRegEvaluator(quan_test, quan_levels, t_test, e_test, t_train_val, e_train_val,
                                          predict_time_method="Median", interpolation='Pchip')
        c_index = evaler.concordance()[0]
        ibs_score = evaler.integrated_brier_score(num_points=10)
        hinge_abs = evaler.mae(method='Hinge', verbose=False, weighted=False)
        po_abs = evaler.mae(method='Pseudo_obs', verbose=False, weighted=True)
        km_cal_score = evaler.km_calibration()
        dcal_pvalue, dcal_hist = evaler.d_calibration()
        xcal_score = xcal_from_hist(dcal_hist)

        ci.append(c_index)
        ibs.append(ibs_score)
        mae_hinge.append(hinge_abs)
        mae_po.append(po_abs)
        km_cal.append(km_cal_score)
        dcal_ps.append(dcal_pvalue)
        xcal_stats.append(xcal_score)

        wandb.log({'C-index': c_index,
                   'IBS': ibs_score,
                   'MAE_Hinge': hinge_abs,
                   'MAE_PO': po_abs,
                   'KM-cal': km_cal_score,
                   'D-cal': dcal_pvalue,
                   'X-cal': xcal_score,
                   })
    print_performance(
        path=path,
        Cindex=ci,
        IBS=ibs,
        MAE_Hinge=mae_hinge,
        MAE_PO=mae_po,
        KM_cal=km_cal,
        dcal_pvalues=dcal_ps,
        xCal_stats=xcal_stats,
    )


if __name__ == '__main__':
    # enable for debugging
    # torch.autograd.set_detect_anomaly(True)

    args = generate_parser()
    main(args)
    wandb.finish()
