#!/usr/bin/env python
# coding: utf-8

# In[1]:


import math
import random
import numpy as np
import time
from IPython.display import display, clear_output
import matplotlib.image as mpimg
from gridworld import *
from agent import *
from utils import *
from magent import *


# In[2]:


stochastic = False
useNegativeRewards = False
random_reset = True
env_name = 'mdps/5rooms.mdp'

env = GridWorld(path = env_name, useNegativeRewards=useNegativeRewards, stochastic=stochastic, random_reset=random_reset)
plotPolicy(env, policy=None, plot_policy=False)


# In[3]:


N_AGENT = 3
GAMMA=0.99
TOLERACNE = 1e-5
VERBOSE = False
N_ITER = 50


# In[4]:


agent_mops = []
for _ in range(N_AGENT):
    agent_mop = MaxEntropyPolicy(
        env, 
        gamma=GAMMA
    )
    
    # Train the agent
    converged = agent_mop.policy_iteration(
        n_iterations=N_ITER,
        tolerance=TOLERACNE,
        verbose=VERBOSE
    )
    agent_mops.append(agent_mop)


# In[6]:


import numpy as np

ents = []
for agent in agent_mops:
    mV, max_delta = max_entropy_value_iteration(env, agent.pi, tolerance=TOLERACNE)
    entropy = mV[env.getStartState()]
    ents.append(entropy)
ents = np.array(ents)

# Calculate statistics
print("Final Accumulated Entropy Statistics:")
print("\nMaxEntropy Policy:")
print(f"Mean: {np.mean(ents):.4f}")
print(f"Std: {np.std(ents):.4f}")


# In[7]:


N_COMPONENT = 4
ALPHA = 0.2

agent_mps = []
for _ in range(N_AGENT):    
    agent_mp = ValueMixtureEntropyWeightedAgent(
        env, 
        n_components=N_COMPONENT,
        gamma=GAMMA,
        alpha=ALPHA
    )
    
    converged = agent_mp.policy_iteration(
        n_iterations=N_ITER,
        tolerance=TOLERACNE,
        verbose=VERBOSE
    )
    agent_mps.append(agent_mp)


# In[11]:


import numpy as np

ents = []
# Get final accumulated entropy for each agent type
for agent in agent_mps:
    mV, max_delta = max_entropy_value_iteration(env, agent.get_full_mixture_policy(), tolerance=TOLERACNE)
    entropy = mV[env.getStartState()]
    ents.append(entropy)
ents = np.array(ents)

# Calculate statistics
print("Final Accumulated Entropy Statistics:")
print("\nMaxEntropy Policy:")
print(f"Mean: {np.mean(ents):.4f}")
print(f"Std: {np.std(ents):.4f}")


# In[10]:


ents = []
for agent in agent_mps:
    ents.append(agent.history['component_entropy'][-1])

print(f"Mean: {np.mean(ents):.4f}")
print(f"Std: {np.std(ents):.4f}")


# In[12]:


N_COMPONENT = 3
ALPHA = 0.2

agent_mps = []
for _ in range(N_AGENT):    
    agent_mp = ValueMixtureEntropyWeightedAgent(
        env, 
        n_components=N_COMPONENT,
        gamma=GAMMA,
        alpha=ALPHA
    )
    
    converged = agent_mp.policy_iteration(
        n_iterations=N_ITER,
        tolerance=TOLERACNE,
        verbose=VERBOSE
    )
    agent_mps.append(agent_mp)


# In[13]:


import numpy as np

ents = []
# Get final accumulated entropy for each agent type
for agent in agent_mps:
    mV, max_delta = max_entropy_value_iteration(env, agent.get_full_mixture_policy(), tolerance=TOLERACNE)
    entropy = mV[env.getStartState()]
    ents.append(entropy)
ents = np.array(ents)

# Calculate statistics
print("Final Accumulated Entropy Statistics:")
print("\nMaxEntropy Policy:")
print(f"Mean: {np.mean(ents):.4f}")
print(f"Std: {np.std(ents):.4f}")


# In[14]:


N_COMPONENT = 2
ALPHA = 0.2

agent_mps = []
for _ in range(N_AGENT):    
    agent_mp = ValueMixtureEntropyWeightedAgent(
        env, 
        n_components=N_COMPONENT,
        gamma=GAMMA,
        alpha=ALPHA
    )
    
    converged = agent_mp.policy_iteration(
        n_iterations=N_ITER,
        tolerance=TOLERACNE,
        verbose=VERBOSE
    )
    agent_mps.append(agent_mp)


# In[15]:


import numpy as np

ents = []
# Get final accumulated entropy for each agent type
for agent in agent_mps:
    mV, max_delta = max_entropy_value_iteration(env, agent.get_full_mixture_policy(), tolerance=TOLERACNE)
    entropy = mV[env.getStartState()]
    ents.append(entropy)
ents = np.array(ents)

# Calculate statistics
print("Final Accumulated Entropy Statistics:")
print("\nMaxEntropy Policy:")
print(f"Mean: {np.mean(ents):.4f}")
print(f"Std: {np.std(ents):.4f}")


# In[16]:


N_COMPONENT = 2
ALPHA = 0

agent_mps = []
for _ in range(N_AGENT):    
    agent_mp = ValueMixtureEntropyWeightedAgent(
        env, 
        n_components=N_COMPONENT,
        gamma=GAMMA,
        alpha=ALPHA
    )
    
    converged = agent_mp.policy_iteration(
        n_iterations=N_ITER,
        tolerance=TOLERACNE,
        verbose=VERBOSE
    )
    agent_mps.append(agent_mp)


# In[17]:


import numpy as np

ents = []
# Get final accumulated entropy for each agent type
for agent in agent_mps:
    mV, max_delta = max_entropy_value_iteration(env, agent.get_full_mixture_policy(), tolerance=TOLERACNE)
    entropy = mV[env.getStartState()]
    ents.append(entropy)
ents = np.array(ents)

# Calculate statistics
print("Final Accumulated Entropy Statistics:")
print("\nMaxEntropy Policy:")
print(f"Mean: {np.mean(ents):.4f}")
print(f"Std: {np.std(ents):.4f}")


# In[18]:


N_COMPONENT = 4
ALPHA = 0.2

agent_mp = ValueMixtureEntropyWeightedAgent(
    env, 
    n_components=N_COMPONENT,
    gamma=GAMMA,
    alpha=ALPHA
)

converged = agent_mp.policy_iteration(
    n_iterations=N_ITER,
    tolerance=TOLERACNE,
    verbose=VERBOSE
)

plt.plot(agent_mp.history['component_entropy'])


# In[19]:


N_COMPONENT = 4
ALPHA = 0.9

agent_mp = ValueMixtureEntropyWeightedAgent(
    env, 
    n_components=N_COMPONENT,
    gamma=GAMMA,
    alpha=ALPHA
)

converged = agent_mp.policy_iteration(
    n_iterations=N_ITER,
    tolerance=TOLERACNE,
    verbose=VERBOSE
)

plt.plot(agent_mp.history['component_entropy'])


# In[20]:


agent_mp.history['component_entropy'][-1]


# In[21]:


N_COMPONENT = 2
ALPHA = 0.2

agent_mp = ValueMixtureEntropyWeightedAgent(
    env, 
    n_components=N_COMPONENT,
    gamma=GAMMA,
    alpha=ALPHA
)

converged = agent_mp.policy_iteration(
    n_iterations=N_ITER,
    tolerance=TOLERACNE,
    verbose=VERBOSE
)

plt.plot(agent_mp.history['component_entropy'])


# In[22]:


N_COMPONENT = 2
ALPHA = 0.5

agent_mp = ValueMixtureEntropyWeightedAgent(
    env, 
    n_components=N_COMPONENT,
    gamma=GAMMA,
    alpha=ALPHA
)

converged = agent_mp.policy_iteration(
    n_iterations=N_ITER,
    tolerance=TOLERACNE,
    verbose=VERBOSE
)

plt.plot(agent_mp.history['component_entropy'])


# In[ ]:





# In[ ]:





# In[ ]:





# In[ ]:





# In[ ]:





# In[ ]:





# In[ ]:





# In[ ]:




