import os
import torch
import ecole as ec
import numpy as np
import collections
import random
from agents.agent_model import GNNPolicyItem, GNNPolicyAno, GNNPolicy


class ObservationFunction(ec.observation.NodeBipartite):

    def __init__(self, problem):
        super().__init__()

    def seed(self, seed):
        pass


class Policy():

    def __init__(self, problem):
        self.rng = np.random.RandomState()

        self.device = f"cuda:0"
        self.problem = problem

        if problem == 'item_placement':
            self.policy = GNNPolicy().to(self.device)
            self.policy.load_state_dict(torch.load('agents/model/itemplacement/item_SOTARL.pkl'))  # best model params
            self.policy.eval()

        elif problem == 'setcover':
            self.policy = GNNPolicy().to(self.device)
            self.policy.load_state_dict(torch.load('agents/model/setcover/setcover_SOTARL.pkl'))  # best model params
            self.policy.eval()

        elif problem == 'indset':
            self.policy = GNNPolicy().to(self.device)
            self.policy.load_state_dict(torch.load('agents/model/indset/indset_SOTARL.pkl'))
            self.policy.eval()

        elif problem == 'cauctions':
            self.policy = GNNPolicy().to(self.device)
            self.policy.load_state_dict(torch.load('agents/model/cauctions/cauctions_SOTARL.pkl'))
            self.policy.eval()

        elif problem == 'facilities':
            self.policy = GNNPolicy().to(self.device)
            self.policy.load_state_dict(torch.load('agents/model/facilities/facilities_SOTARL.pkl'))
            self.policy.eval()

    def seed(self, seed):
        self.rng = np.random.RandomState(seed)

    def __call__(self, action_set, observation):
        variable_features = observation.column_features
        variable_features = np.delete(variable_features, 14, axis=1)
        variable_features = np.delete(variable_features, 13, axis=1)

        observation = (torch.from_numpy(observation.row_features.astype(np.float32)).to(self.device),
                        torch.from_numpy(observation.edge_features.indices.astype(np.int64)).to(self.device),
                        torch.from_numpy(observation.edge_features.values.astype(np.float32)).view(-1, 1).to(self.device),
                        torch.from_numpy(variable_features.astype(np.float32)).to(self.device))

        logits = self.policy(*observation)
        action = action_set[logits[action_set.astype(np.int64)].argmax()]
        return action
