#!/usr/bin/env python
# coding: utf-8

# In[ ]:


import numpy as np
import math
import copy
import time
import cvxopt
import matplotlib.patches as patch
import matplotlib.pyplot as plt
#The class of Agent
class Agent(object):
    def __init__(self, state, actions, time_step,neighbors=None):
        # the location of agent
        self.state = np.array(state)
        # the trajectory record for agent
        self.traj = [np.array(state)]       
        # the time frequency for update e.g. 0.05s
        self.time_step = time_step          
    
        # Action Space 
        # the whole feasible actions for agent, i.e, [np.array([0,1]),np.array([0,2])]
        # np.array([0,1])indicates agent move toward direction np.array([0,1])
        self.actions = actions             
        # the number of all actions
        self.n_actions = len(self.actions)
        self.action_indices = np.array(range(self.n_actions))
        # the operated action
        self.next_action_index = 0
        # Neighbors {index_neighbor:weight_neighbor}
        self.neighbors = neighbors

    def motion_model(self, state, action):
        '''
        :param state: The current state at time t.
        :param action: The current action at time t.
        :return: The resulting state x_{t+1}
        '''
        return state+action*self.time_step
    
    def apply_next_action(self):
        """
        Applies the next action to modify the agent state.
        :param t: Time step.
        :return: None
        """
        self.state = self.motion_model(self.state, self.actions[self.next_action_index])

    def Initialize(self,N=16,Surrogate=True):
        """
        Intialize the action_prob_dist
        :param N: the number of agents
        :return: None
        """
        if Surrogate:
            self.action_prob_dist=(1/self.n_actions)*np.ones((N,self.n_actions))
            self.pre_prob_dist=(1/self.n_actions)*np.ones((N,self.n_actions))
        else:
            self.action_prob_dist=np.zeros((N,self.n_actions))
            self.pre_prob_dist=np.zeros((N,self.n_actions))
            
       
        
    def OSG_intialize(self,n_time_step=2000):
        self.J = math.ceil(math.log2(n_time_step)) # number of experts
        self.g = np.sqrt(math.log(self.J) / n_time_step) 
        self.beta = 1 / n_time_step
        self.gamma = [np.sqrt(math.log(self.n_actions * n_time_step) / 2 ** (j - 1)) for j in range(1, self.J + 1)]   # J x 1
        self.expert_weight =np.array([1.0 / self.J for i in range(self.J)])                                  # weights of experts        
        self.action_weight = np.array([[1.0 / self.n_actions for k in range(self.n_actions)] for i in range(self.J)]) # weights of actions for all experts        
        self.action_prob_dist = None
        self.loss =[0.0 for i in range(self.n_actions)]
  
    def get_losses0(self, agents_considered, targets):
        """
        Returns the losses of all possible actions based on the estimation result of just executed actions
        :return: The losses of all possible actions.
        """
        #agents_considered is the list of locations/states of agents considered
    
        losses = np.zeros(self.n_actions)
        # the loss f(A\cup\{a\})-f(A)
        if len(agents_considered) == 0:
            # f(empty set) = 0 before any agent's moving
            # f(a): distance with action a without any other agent's action
            obj_action = []
            for i in range(self.n_actions):
                action = self.actions[i] # self.action_indices is np.array()
                state = self.motion_model(self.state, action)
                obj_action.append(sum([Reward_func(np.linalg.norm(tar.state - state)) for tar in targets]))
            losses =np.array(obj_action)
        else:
            tar_min=[max([Reward_func(np.linalg.norm(tar.state - ag)) for ag in agents_considered]) for tar in targets]
            curr_obj = sum(tar_min)
            length=len(targets)
            for i in range(self.n_actions):
                action = self.actions[i]
                state = self.motion_model(self.state, action)
                # f(A_{i-1} U a)
                temp_obj = 0
                for j in range(length):
                    temp_obj += max([Reward_func(np.linalg.norm(targets[j].state - state)), tar_min[j]])
        

                losses[i] = temp_obj-curr_obj #obj_action[i]
                #if abs(losses[i])<0.0001:
                    #print(losses[i])
                    #print(np.array([1/(np.linalg.norm(targets[j].state - state)+ 0.001) for j in range(length)])<=np.array(tar_min))
        return losses
        
    
    
    def get_losses1(self, agents_considered,index,targets,agents):
        """
        Returns the losses of all possible actions based on the estimation result of just executed actions
        :return: The losses of all possible actions.
        """
        #agents_considered is the list of locations/states of agents considered
    
        losses = np.zeros(self.n_actions)
        # the loss f(A\cup\{a\})-f(A-\{a\})
        if len(agents_considered) == 0:
            # f(empty set) = 0 before any agent's moving
            # f(a): distance with action a without any other agent's action
            obj_action = []
            for i in range(self.n_actions):
                action = self.actions[i] # self.action_indices need the np.array()
                state = self.motion_model(self.state, action)
                obj_action.append(sum([Reward_func(np.linalg.norm(tar.state - state)) for tar in targets]))
            losses =np.array(obj_action)
        else:
            length=len(targets)
            Reduced_Points1=[self.motion_model(agents[k].state,ACTIONS[m]) for k,m in agents_considered if k!=index]
            if len(Reduced_Points1)>0:
                Reduced_min1=[max([Reward_func(np.linalg.norm(tar.state - ag)) for ag in Reduced_Points1]) for tar in targets]
            else:
                Reduced_min1=[0]*length
            for i in range(self.n_actions):
                Reduced_Points2=[self.motion_model(agents[k].state,ACTIONS[m]) for k,m in agents_considered if k==index and m!=i]
                if len(Reduced_Points2)>0:
                    Reduced_min2=[max([Reward_func(np.linalg.norm(tar.state - ag)) for ag in Reduced_Points2]) for tar in targets]
                else:
                    Reduced_min2=[0]*length
                
                #f(A\cup\{a})
                action = self.actions[i]
                state = self.motion_model(self.state, action)
                # f(A_{i-1} U a)
                temp_obj1 = 0
                temp_obj2 = 0
                for j in range(length):
                    temp_obj1 += max([Reward_func(np.linalg.norm(targets[j].state - state)), Reduced_min1[j],Reduced_min2[j]])
                    temp_obj2 +=max([Reduced_min1[j],Reduced_min2[j]])
        
                losses[i] = temp_obj1-temp_obj2
        return losses

    def get_losses2(self, agents_considered,index,targets,agents):
        """
        Returns the losses of all possible actions based on the estimation result of just executed actions
        :return: The losses of all possible actions.
        """
        #agents_considered is the list of locations/states of agents considered
        
        losses = np.zeros(self.n_actions)
        # the loss f(A\cup\{a\})-f(A-\{a\})
        if len(agents_considered) == 0:
            # f(empty set) = 0 before any agent's moving
            # f(a): distance with action a without any other agent's action
            obj_action = []
            for i in range(self.n_actions):
                action = self.actions[i] # self.action_indices need the np.array()
                state = self.motion_model(self.state, action)
                obj_action.append(sum([Reward_func(np.linalg.norm(tar.state - state)) for tar in targets]))
            losses =np.array(obj_action)
        else:
            length=len(targets)
            Reduced_Points=[self.motion_model(agents[k].state,ACTIONS[m]) for k,m in agents_considered if k!=index]
            if len(Reduced_Points)>0:
                Reduced_min=[max([Reward_func(np.linalg.norm(tar.state - ag)) for ag in Reduced_Points]) for tar in targets]
            else:
                Reduced_min=[0]*len(targets)
            Reduced_obj=sum(Reduced_min)
            for i in range(self.n_actions):
                #f(A\cup\{a})
                action = self.actions[i]
                state = self.motion_model(self.state, action)
                temp_obj = 0
                for j in range(length):
                    temp_obj += max([Reward_func(np.linalg.norm(targets[j].state - state)), Reduced_min[j]])
        
               #f(A-\{a\})
    

                losses[i] = temp_obj-Reduced_obj
        return losses



    def update_experts(self):
        """
        Updates the parameters of experts after getting losses (from t to t + 1)
        :param t: The index of time step
        :return: None
        """
        for j in range(self.J):
            Index_Set=[self.gamma[j] * self.loss[i] for i in range(self.n_actions)]
            MAX=max(Index_Set)
            if MAX>700:
                Index_Set=[Index_Set[i]*700/MAX  for i in range(self.n_actions)]
            v = [self.action_weight[j][i] * np.exp(Index_Set[i]) for i in range(self.n_actions)]
            #################
            update1=np.dot(np.array(self.loss), np.array(self.action_weight[j]))
            self.action_weight[j]= [self.beta * np.sum(v) / self.n_actions + (1 - self.beta) * v[i] for i in range(self.n_actions)]
            self.expert_weight[j] = self.expert_weight[j] * np.exp(self.g *update1)
            

        self.action_weight= np.array([self.action_weight[j] / np.linalg.norm(self.action_weight[j], ord=1) for j in range(self.J)],dtype=np.float128)
        self.expert_weight= self.expert_weight/ np.linalg.norm(np.array(self.expert_weight), ord=1)
    
    def get_action_prob_dist(self):
        """
        Returns the output of FSF* (the predicted action probability distribution)
        :param t: The index of time step.
        :return: None.
        """
        q = np.array(self.expert_weight) # J x 1
        p = np.array(self.action_weight) # J x m
        self.action_prob_dist=np.dot(q, p).tolist() # m x 1, note that self.action_prob_dist is a list

