#!/usr/bin/env python
# coding: utf-8

# In[ ]:


def OSG(agents,tagets):
    for idx in range(N_TRIAL):
        np.random.seed(idx)
        # at every step, each agent gets feedback and outputs
        for t in range(N_STEPS):
            #print(t)
            for agent in agents:
                # sample the next action
                agent.get_action_prob_dist()
                next_action_index = np.random.choice(agent.action_indices, 1, p=agent.action_prob_dist)[0]
                agent.next_action_index = next_action_index
            for target in targets:
                # all target move for one step
                target.update_distance(agents)
                target.record_reward(agents)
                target.update_state(HORIZON,agents,t)
                target.traj.append(target.state)
                

            for i in range(len(agents)):
                # get loss vector
                agents_considered=[]
                for neigh in agent.neighbors:
                    if int(neigh)!=int(i):
                        agents_considered.append(agents[neigh].motion_model(agents[neigh].state,agents[neigh].actions[agents[neigh].next_action_index]))
                agents[i].loss=agents[i].get_losses(agents_considered, targets)
                #print(agents[i].loss)
                # update experts
                agents[i].update_experts()
                
            for agent in agents:
                # apply the next action 
                agent.apply_next_action()
                agent.traj.append(agent.state)

