#!/usr/bin/env python
# coding: utf-8

# In[ ]:


def MA_SPL(agents,targets,DR_ratio=1):
    length=len(agents)
    n_actions=len(ACTIONS)
    G=cvxopt.matrix(np.concatenate((np.ones((1,n_actions)),np.eye(n_actions),-np.eye(n_actions)),axis=0))
    g=cvxopt.matrix(np.concatenate((np.ones((1,1)),np.ones((n_actions,1)),np.zeros((n_actions,1))),axis=0))
    P=cvxopt.matrix(2*np.eye(n_actions))
    for idx in range(N_TRIAL):
            np.random.seed(idx)
            for t in range(N_STEPS):
                for target in targets:
                    # all target move for one step
                    target.update_distance(agents)
                    target.record_reward(agents)
                    target.update_state()
                    target.traj.append(target.state)
                
                for i in range(length):
                    #probability
                    pro=agents[i].pre_prob_dist[i]
                    # sample the next action
                    next_action_index = np.random.choice(agents[i].action_indices, 1, p=pro/sum(pro))[0]
                    agents[i].next_action_index = next_action_index
                    #Aggregate the probability
                    agents[i].action_prob_dist=sum([weight* agents[id_num].pre_prob_dist for id_num,weight in agents[i].neighbors.items()])
                    #update the probability:
                    #    1. first step: compute the stochastic gradient
                    gradient=np.zeros((1,n_actions))
                    for size in range(SAMPLE_SIZE):
                        Location=[]
                        z=math.log((1-math.exp(-DR_ratio))*np.random.rand()+math.exp(-DR_ratio))/DR_ratio+1
                        for i1 in range(length):
                            #generate actions
                            new_pre_prob=(z*agents[i].pre_prob_dist[i]).tolist()+[1-sum(z*agents[i].pre_prob_dist[i])]
                            new_action_indices=agents[i].action_indices.tolist()+['empty']
                            new_action=np.random.choice(new_action_indices, 1, p=new_pre_prob)[0]
                            if new_action!='empty':
                                Location.append((i1,int(new_action)))
                        gradient+=agents[i].get_losses2(Location,i,targets,agents).reshape(1,n_actions)/SAMPLE_SIZE
                    #    2. Projection
                    agents[i].action_prob_dist[i]+=10*gradient[0]/math.sqrt(t+1)
                    q=cvxopt.matrix(-2*agents[i].action_prob_dist[i])
                    sol=cvxopt.solvers.qp(P,q,G,g)
                    point=np.array(sol['x'])
                    point=point.reshape(1,n_actions)
                    agents[i].action_prob_dist[i]=point
                    agents[i].action_prob_dist[i]=(agents[i].action_prob_dist[i]>0)*agents[i].action_prob_dist[i]
                for i in range(length):
                    # apply the next action 
                    agents[i].apply_next_action()
                    agents[i].traj.append(agents[i].state)
                    agents[i].pre_prob_dist=copy.deepcopy(agents[i].action_prob_dist)

