from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os

import numpy as np
from scipy.io import savemat

from DotmapUtils import get_required_argument
from optimizers_dtwil import CEMOptimizer
import gc, torch
from tqdm import trange
import time
import torch
import dtaidistance
from dtaidistance import dtw_ndim
TORCH_DEVICE = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
from multiprocessing import Pool
import threading

class Controller:
    def __init__(self, *args, **kwargs):
        """Creates class instance.
        """
        pass

    def train(self, obs_trajs, acs_trajs, rews_trajs):
        """Trains this controller using lists of trajectories.
        """
        raise NotImplementedError("Must be implemented in subclass.")

    def reset(self):
        """Resets this controller.
        """
        raise NotImplementedError("Must be implemented in subclass.")

    def act(self, obs, t, get_pred_cost=False):
        """Performs an action.
        """
        raise NotImplementedError("Must be implemented in subclass.")

    def dump_logs(self, primary_logdir, iter_logdir):
        """Dumps logs into primary log directory and per-train iteration log directory.
        """
        raise NotImplementedError("Must be implemented in subclass.")


def shuffle_rows(arr):
    idxs = np.argsort(np.random.uniform(size=arr.shape), axis=-1)
    return arr[np.arange(arr.shape[0])[:, None], idxs]

class MPC(Controller):
    optimizers = {"CEM": CEMOptimizer}

    def __init__(self, params):
        """Creates class instance.

        Arguments:
            params
                .env (gym.env): Environment for which this controller will be used.
                .ac_ub (np.ndarray): (optional) An array of action upper bounds.
                    Defaults to environment action upper bounds.
                .ac_lb (np.ndarray): (optional) An array of action lower bounds.
                    Defaults to environment action lower bounds.
                .per (int): (optional) Determines how often the action sequence will be optimized.
                    Defaults to 1 (reoptimizes at every call to act()).
                .prop_cfg
                    .model_init_cfg (DotMap): A DotMap of initialization parameters for the model.
                        .model_constructor (func): A function which constructs an instance of this
                            model, given model_init_cfg.
                    .model_train_cfg (dict): (optional) A DotMap of training parameters that will be passed
                        into the model every time is is trained. Defaults to an empty dict.
                    .model_pretrained (bool): (optional) If True, assumes that the model
                        has been trained upon construction.
                    .mode (str): Propagation method. Choose between [E, DS, TSinf, TS1, MM].
                        See https://arxiv.org/abs/1805.12114 for details.
                    .npart (int): Number of particles used for DS, TSinf, TS1, and MM propagation methods.
                    .ign_var (bool): (optional) Determines whether or not variance output of the model
                        will be ignored. Defaults to False unless deterministic propagation is being used.
                    .obs_preproc (func): (optional) A function which modifies observations (in a 2D matrix)
                        before they are passed into the model. Defaults to lambda obs: obs.
                        Note: Must be able to process both NumPy and Tensorflow arrays.
                    .obs_postproc (func): (optional) A function which returns vectors calculated from
                        the previous observations and model predictions, which will then be passed into
                        the provided cost function on observations. Defaults to lambda obs, model_out: model_out.
                        Note: Must be able to process both NumPy and Tensorflow arrays.
                    .obs_postproc2 (func): (optional) A function which takes the vectors returned by
                        obs_postproc and (possibly) modifies it into the predicted observations for the
                        next time step. Defaults to lambda obs: obs.
                        Note: Must be able to process both NumPy and Tensorflow arrays.
                    .targ_proc (func): (optional) A function which takes current observations and next
                        observations and returns the array of targets (so that the model learns the mapping
                        obs -> targ_proc(obs, next_obs)). Defaults to lambda obs, next_obs: next_obs.
                        Note: Only needs to process NumPy arrays.
                .opt_cfg
                    .mode (str): Internal optimizer that will be used. Choose between [CEM].
                    .cfg (DotMap): A map of optimizer initializer parameters.
                    .plan_hor (int): The planning horizon that will be used in optimization.
                    .obs_cost_fn (func): A function which computes the cost of every observation
                        in a 2D matrix.
                        Note: Must be able to process both NumPy and Tensorflow arrays.
                    .ac_cost_fn (func): A function which computes the cost of every action
                        in a 2D matrix.
                .log_cfg
                    .save_all_models (bool): (optional) If True, saves models at every iteration.
                        Defaults to False (only most recent model is saved).
                        Warning: Can be very memory-intensive.
                    .log_traj_preds (bool): (optional) If True, saves the mean and variance of predicted
                        particle trajectories. Defaults to False.
                    .log_particles (bool) (optional) If True, saves all predicted particles trajectories.
                        Defaults to False. Note: Takes precedence over log_traj_preds.
                        Warning: Can be very memory-intensive
        """
        super().__init__(params)
        self.dO, self.dU = params.env.observation_space.shape[0], params.env.action_space.shape[0]
        self.ac_ub, self.ac_lb = params.env.action_space.high, params.env.action_space.low
        self.constraint_ub = params.get("constraint_ub")
        self.constraint_type = params.get("constraint_type")
        self.criteria = params.get("criteria")
        self.obs_clip = params.get('obs_clip')
        self.arc_beta = params.get('arc_beta')
        self.arc_step = params.get('arc_step')
        if self.obs_clip == 0:
            self.obs_clip = params.env.observation_space.shape[0]
        if self.constraint_type == 'box':
            self.ac_ub = np.minimum(np.minimum(self.ac_ub, params.get("ac_ub", self.ac_ub)), self.constraint_ub)
            self.ac_lb = np.maximum(np.maximum(self.ac_lb, params.get("ac_lb", self.ac_lb)), -self.constraint_ub)
        elif self.constraint_type == 'l2':
            self.ac_ub = np.minimum(np.minimum(self.ac_ub, params.get("ac_ub", self.ac_ub)), self.constraint_ub **0.5)
            self.ac_lb = np.maximum(np.maximum(self.ac_lb, params.get("ac_lb", self.ac_lb)), -self.constraint_ub **0.5)
        elif self.constraint_type == 'H_M' or self.constraint_type =='HC_O' or self.constraint_type =='W_M' or self.constraint_type == 'no':
            self.ac_ub = np.minimum(self.ac_ub, params.get("ac_ub", self.ac_ub))
            self.ac_lb = np.maximum(self.ac_lb, params.get("ac_lb", self.ac_lb))
        self.update_fns = params.get("update_fns", [])
        self.per = params.get("per", 1)
        self.model_init_cig = params.prop_cfg.get("model_init_cfg", {})
        self.model_train_cfg = params.prop_cfg.get("model_train_cfg", {})
        self.prop_mode = get_required_argument(params.prop_cfg, "mode", "Must provide propagation method.")
        self.npart = get_required_argument(params.prop_cfg, "npart", "Must provide number of particles.")
        self.ign_var = params.prop_cfg.get("ign_var", False) or self.prop_mode == "E"

        self.obs_preproc = params.prop_cfg.get("obs_preproc", lambda obs: obs)
        self.obs_postproc = params.prop_cfg.get("obs_postproc", lambda obs, model_out: model_out)
        self.obs_postproc2 = params.prop_cfg.get("obs_postproc2", lambda next_obs: next_obs)
        self.targ_proc = params.prop_cfg.get("targ_proc", lambda obs, next_obs: next_obs)

        self.opt_mode = get_required_argument(params.opt_cfg, "mode", "Must provide optimization method.")
        self.plan_hor = get_required_argument(params.opt_cfg, "plan_hor", "Must provide planning horizon.")
        self.obs_cost_fn = get_required_argument(params.opt_cfg, "obs_cost_fn", "Must provide cost on observations.")
        self.ac_cost_fn = get_required_argument(params.opt_cfg, "ac_cost_fn", "Must provide cost on actions.")

        self.save_all_models = params.log_cfg.get("save_all_models", False)
        self.log_traj_preds = params.log_cfg.get("log_traj_preds", False)
        self.log_particles = params.log_cfg.get("log_particles", False)

        # Perform argument checks
        assert self.opt_mode == 'CEM'
        assert self.prop_mode == 'TSinf', 'only TSinf propagation mode is supported'
        assert self.npart % self.model_init_cig.num_nets == 0, "Number of particles must be a multiple of the ensemble size."

        # Create action sequence optimizer
        opt_cfg = params.opt_cfg.get("cfg", {})
        self.optimizer = CEMOptimizer(
            sol_dim=self.plan_hor * self.dU,
            lower_bound=np.tile(self.ac_lb, [self.plan_hor]),
            upper_bound=np.tile(self.ac_ub, [self.plan_hor]),
            cost_function=self._compile_cost,
            _predict_next_obs=self._predict_next_obs,
            obs_postproc2=self.obs_postproc2,
            constraint_type=self.constraint_type,
            constraint_ub=self.constraint_ub,
            action_dim = self.dU,
            obs_dim = self.dO,
            beta = self.arc_beta,
            arc_step = self.arc_step,
            **opt_cfg
        )

        # Controller state variables
        self.has_been_trained = params.prop_cfg.get("model_pretrained", False)
        self.ac_buf = np.array([]).reshape(0, self.dU)
        self.prev_sol = np.tile((self.ac_lb + self.ac_ub) / 2, [self.plan_hor])
        self.init_var = np.tile(np.square(self.ac_ub - self.ac_lb) / 16, [self.plan_hor])
        self.train_in = np.array([]).reshape(0, self.dU + self.obs_preproc(np.zeros([1, self.dO])).shape[-1])
        self.train_targs = np.array([]).reshape(
            0, self.targ_proc(np.zeros([1, self.dO]), np.zeros([1, self.dO])).shape[-1]
        )

        print("Created an MPC controller, prop mode %s, %d particles. " % (self.prop_mode, self.npart) +
              ("Ignoring variance." if self.ign_var else ""))

        if self.save_all_models:
            print("Controller will save all models. (Note: This may be memory-intensive.")
        if self.log_particles:
            print("Controller is logging particle predictions (Note: This may be memory-intensive).")
            self.pred_particles = []
        elif self.log_traj_preds:
            print("Controller is logging trajectory prediction statistics (mean+var).")
            self.pred_means, self.pred_vars = [], []
        else:
            print("Trajectory prediction logging is disabled.")

        # Set up pytorch model
        self.model = get_required_argument(
            params.prop_cfg.model_init_cfg, "model_constructor", "Must provide a model constructor."
        )(params.prop_cfg.model_init_cfg)
        self.expert_traj = None
        self.expert_max = None
        self.expert_min = None
        self.expert_maxmin_gap = None
        self.expert_actions = None
        self.t = 0
        self.cur_progress = 0
        self.dynamic_alignment_progress = params.get("dynamic_alignment_progress")
        self.state_normalize = params.get("state_normalize")

    def train(self, obs_trajs, acs_trajs, rews_trajs):
        """Trains the internal model of this controller. Once trained,
        this controller switches from applying random actions to using MPC.

        Arguments:
            obs_trajs: A list of observation matrices, observations in rows.
            acs_trajs: A list of action matrices, actions in rows.
            rews_trajs: A list of reward arrays.

        Returns: None.
        """
        # Construct new training points and add to training set
        new_train_in, new_train_targs = [], []
        for obs, acs in zip(obs_trajs, acs_trajs):
            new_train_in.append(np.concatenate([self.obs_preproc(obs[:-1]), acs], axis=-1))
            new_train_targs.append(self.targ_proc(obs[:-1], obs[1:]))
        self.train_in = np.concatenate([self.train_in] + new_train_in, axis=0)
        self.train_targs = np.concatenate([self.train_targs] + new_train_targs, axis=0)
        
        # Train the model
        self.has_been_trained = True

        # Train the pytorch model
        self.model.fit_input_stats(self.train_in)

        idxs = np.random.randint(self.train_in.shape[0], size=[self.model.num_nets, self.train_in.shape[0]])

        epochs = self.model_train_cfg['epochs']

        # TODO: double-check the batch_size for all env is the same
        batch_size = 32

        epoch_range = trange(epochs, unit="epoch(s)", desc="Network training")
        num_batch = int(np.ceil(idxs.shape[-1] / batch_size))

        for _ in epoch_range:

            for batch_num in range(num_batch):
                batch_idxs = idxs[:, batch_num * batch_size : (batch_num + 1) * batch_size]

                loss = 0.01 * (self.model.max_logvar.sum() - self.model.min_logvar.sum())
                loss += self.model.compute_decays()

                # TODO: move all training data to GPU before hand
                train_in = torch.from_numpy(self.train_in[batch_idxs]).to(TORCH_DEVICE).float()
                train_targ = torch.from_numpy(self.train_targs[batch_idxs]).to(TORCH_DEVICE).float()
                mean, logvar = self.model(train_in, ret_logvar=True)
                inv_var = torch.exp(-logvar)

                train_losses = ((mean - train_targ) ** 2) * inv_var + logvar
                train_losses = train_losses.mean(-1).mean(-1).sum()
                # Only taking mean over the last 2 dimensions
                # The first dimension corresponds to each model in the ensemble

                loss += train_losses

                self.model.optim.zero_grad()
                loss.backward()
                self.model.optim.step()

            idxs = shuffle_rows(idxs)

            
            if idxs.shape[0]>2500:
                val_in1 = torch.from_numpy(self.train_in[idxs[:2500]]).to(TORCH_DEVICE).float()
                val_in2 = torch.from_numpy(self.train_in[idxs[2500:5000]]).to(TORCH_DEVICE).float()
            else:
                val_in1 = torch.from_numpy(self.train_in[idxs[:2500]]).to(TORCH_DEVICE).float()
            val_targ = torch.from_numpy(self.train_targs[idxs[:5000]]).to(TORCH_DEVICE).float()

            gc.collect()
            torch.cuda.empty_cache()
            
            if idxs.shape[0]>2500:
                mean1, _ = self.model(val_in1)
                mean2, _ = self.model(val_in2)
                mean = torch.cat((mean1,mean2),0)
            else:
                mean, _ = self.model(val_in1)
            mse_losses = ((mean - val_targ) ** 2).mean(-1).mean(-1)

            epoch_range.set_postfix({
                "Training loss(es)": mse_losses.detach().cpu().numpy().sum()
            })

    def reset(self):
        """Resets this controller (clears previous solution, calls all update functions).

        Returns: None
        """
        self.prev_sol = np.tile((self.ac_lb + self.ac_ub) / 2, [self.plan_hor])
        self.optimizer.reset()
        self.cur_progress = 0
        self.cur_progress_tmp = 0
        
        for update_fn in self.update_fns:
            update_fn()

    def act(self, obs, t, obs_history, get_pred_cost=False):
        """Returns the action that this controller would take at time t given observation obs.

        Arguments:
            obs: The current observation
            t: The current timestep
            get_pred_cost: If True, returns the predicted cost for the action sequence found by
                the internal optimizer.

        Returns: An action (and possibly the predicted cost)
        """
        if not self.has_been_trained:
            return np.random.uniform(self.ac_lb, self.ac_ub, self.ac_lb.shape)
        if self.ac_buf.shape[0] > 0:
            action, self.ac_buf = self.ac_buf[0], self.ac_buf[1:]
            return action
        self.t = t
        self.sy_cur_obs = obs
        self.sy_cur_obs_history = obs_history
        soln,var = self.optimizer.obtain_solution(self.prev_sol, self.init_var,self.sy_cur_obs,self.expert_actions[self.cur_progress:self.cur_progress+self.plan_hor])
        self.prev_sol = np.concatenate([np.copy(soln)[self.per * self.dU:], np.zeros(self.per * self.dU)])
        self.ac_buf = soln[:self.per * self.dU].reshape(-1, self.dU)


        if self.dynamic_alignment_progress:
            self.cur_progress = self.cur_progress_tmp
        else:
            self.cur_progress = self.cur_progress + 1
        return self.act(obs, t, obs_history)

    def dump_logs(self, primary_logdir, iter_logdir):
        """Saves logs to either a primary log directory or another iteration-specific directory.
        See __init__ documentation to see what is being logged.

        Arguments:
            primary_logdir (str): A directory path. This controller assumes that this directory
                does not change every iteration.
            iter_logdir (str): A directory path. This controller assumes that this directory
                changes every time dump_logs is called.

        Returns: None
        """
        # TODO: implement saving model for pytorch
        # self.model.save(iter_logdir if self.save_all_models else primary_logdir)
        if self.log_particles:
            savemat(os.path.join(iter_logdir, "predictions.mat"), {"predictions": self.pred_particles})
            self.pred_particles = []
        elif self.log_traj_preds:
            savemat(
                os.path.join(iter_logdir, "predictions.mat"),
                {"means": self.pred_means, "vars": self.pred_vars}
            )
            self.pred_means, self.pred_vars = [], []
    
    @torch.no_grad()
    def _compile_cost(self, ac_seqs):
        nopt = ac_seqs.shape[0]
        # starttime=time.time()
        ac_seqs = torch.from_numpy(ac_seqs).float().to(TORCH_DEVICE)
        # Reshape ac_seqs so that it's amenable to parallel compute
        # Before, ac seqs has dimension (400, 25) which are pop size and sol dim coming from CEM
        ac_seqs = ac_seqs.view(-1, self.plan_hor, self.dU)
        #  After, ac seqs has dimension (400, 25, 1)

        transposed = ac_seqs.transpose(0, 1)
        # Then, (25, 400, 1)

        expanded = transposed[:, :, None]
        # Then, (25, 400, 1, 1)

        tiled = expanded.expand(-1, -1, self.npart, -1)
        # Then, (25, 400, 20, 1)

        ac_seqs = tiled.contiguous().view(self.plan_hor, -1, self.dU)
        # Then, (25, 8000, 1)

        # Expand current observation
        cur_obs = torch.from_numpy(self.sy_cur_obs).float().to(TORCH_DEVICE)
        cur_obs = cur_obs[None]
        cur_obs = cur_obs.expand(nopt * self.npart, -1)
        dtw_obs = []
        dtw_target = []
        

        plan_hor = self.plan_hor
        
        for t in range(plan_hor):
            cur_acs = ac_seqs[t]
            next_obs = self._predict_next_obs(cur_obs, cur_acs)
            # cost = self.obs_cost_fn(next_obs) + self.ac_cost_fn(cur_acs)

            # cost = cost.view(-1, self.npart)
            
            dtw_obs.append(next_obs[:,])
            
            cur_obs = self.obs_postproc2(next_obs)
        
        try:
            dtw_obs = torch.stack(dtw_obs).transpose(0,1).to(TORCH_DEVICE)
        except:
            print('except',dtw_obs)
            dtw_obs = dtw_obs[0].to(TORCH_DEVICE)
            quit()
        target = self.expert_traj[self.cur_progress:self.cur_progress+plan_hor]
        if target.shape[0]<plan_hor:
            target = torch.cat((target, self.expert_traj[-1].repeat(plan_hor-target.shape[0], 1)))
        target = target.unsqueeze(0).to(TORCH_DEVICE)
        
        if self.criteria == 'dtw':
            dtw_obs = torch.cat((target,dtw_obs),dim=0)
            if self.state_normalize:
                dtw_obs = (((dtw_obs-self.expert_min)/(self.expert_maxmin_gap)))[:, :, :self.obs_clip].cpu().numpy().astype(np.double)
            else:
                dtw_obs = dtw_obs[:, :, :self.obs_clip].cpu().numpy().astype(np.double)
            costs = dtw_ndim.distance_matrix_fast(dtw_obs,block=((0,1),(1,dtw_obs.shape[0])),compact=True,parallel=True)
            costs = torch.FloatTensor(costs).view(-1, self.npart)
            # Replace nan with high cost
            costs[costs != costs] = 1e6
            # Determine the alignment progress
            if self.dynamic_alignment_progress:
                best_action_idx = torch.argmin(costs.mean(dim=1))
                progress_update = 0
                for npart_idx in range(self.npart):
                    _, dtw_matrix = dtw_ndim.warping_paths_fast(dtw_obs[0][:-1], dtw_obs[int(best_action_idx*self.npart+npart_idx)+1][:, :self.obs_clip])
                    dtw_path = dtaidistance.dtw.best_path(dtw_matrix)
                    for idx, path_step in enumerate(dtw_path):
                        if path_step[1] == 1:
                            progress_update+=path_step[0]
                            break
                self.cur_progress_tmp = self.cur_progress+int(round(progress_update/self.npart))
    
        
        return costs.mean(dim=1).detach().cpu().numpy()

    def _predict_next_obs(self, obs, acs):
        proc_obs = self.obs_preproc(obs)

        assert self.prop_mode == 'TSinf'

        proc_obs = self._expand_to_ts_format(proc_obs)
        acs = self._expand_to_ts_format(acs)

        inputs = torch.cat((proc_obs, acs), dim=-1)
        torch.manual_seed(0)
        mean, var = self.model(inputs)
        
        # torch seed can't be fixed if you use different gpu
        
        predictions = mean + torch.randn_like(mean, device=TORCH_DEVICE) * var.sqrt()
        
        # TS Optimization: Remove additional dimension
        predictions = self._flatten_to_matrix(predictions)
        
        return self.obs_postproc(obs, predictions)

    def _expand_to_ts_format(self, mat):
        dim = mat.shape[-1]

        # Before, [10, 5] in case of proc_obs
        reshaped = mat.view(-1, self.model.num_nets, self.npart // self.model.num_nets, dim)
        # After, [2, 5, 1, 5]

        transposed = reshaped.transpose(0, 1)
        # After, [5, 2, 1, 5]

        reshaped = transposed.contiguous().view(self.model.num_nets, -1, dim)
        # After. [5, 2, 5]

        return reshaped

    def _flatten_to_matrix(self, ts_fmt_arr):
        dim = ts_fmt_arr.shape[-1]

        reshaped = ts_fmt_arr.view(self.model.num_nets, -1, self.npart // self.model.num_nets, dim)

        transposed = reshaped.transpose(0, 1)

        reshaped = transposed.contiguous().view(-1, dim)

        return reshaped
