import torch.nn.functional as F
import torch.optim as optim
from k_level_policy_gradients.src.algorithms.value.dqn_continuous import DQNContinuous
from k_level_policy_gradients.src.policy.cem_policy import CEMPolicy
from k_level_policy_gradients.src.networks.q_network import CEMNEtwork
from k_level_policy_gradients.src.utils.replay_memory import ReplayMemoryObs


def setup_dqn_continuous_agent(mdp_info, idx_agent, agent_params, **kwargs):
    """
    Instantiates a multiplayer DQNContinuous agent.
    """
    batch_size = agent_params["batch_size"]
    max_replay_size = agent_params["max_replay_size"]
    target_update_frequency = agent_params["target_update_frequency"]
    tau = agent_params["tau"]
    warmup_replay_size = agent_params["warmup_replay_size"]
    target_update_mode = agent_params["target_update_mode"]
    assert target_update_mode == "soft" or target_update_mode == "hard"
    n_features = eval(agent_params["n_features"])
    lr = float(agent_params["lr"])
    grad_norm_clip = agent_params["grad_norm_clip"]
    obs_last_action = agent_params["obs_last_action"]
    share_agent_params = agent_params["share_agent_params"]
    use_mixer = agent_params.get("use_mixer", False)
    use_cuda = agent_params["use_cuda"]

    pi = CEMPolicy(mdp_info.action_space[idx_agent])

    input_dim = mdp_info.observation_space[idx_agent].shape[0]
    if obs_last_action:
        input_dim += mdp_info.action_space[idx_agent].shape[0]
    if share_agent_params:
        input_dim += mdp_info.n_agents
    input_dim += mdp_info.action_space[idx_agent].shape[
        0
    ]  # critic takes state and action
    input_shape = (input_dim,)

    approximator_params = dict(
        input_shape=input_shape,
        output_shape=(1,),
        network=CEMNEtwork,
        optimizer={"class": optim.Adam, "params": {"lr": lr}},
        loss=F.smooth_l1_loss,
        n_features=n_features,
        use_cuda=agent_params["use_cuda"],
    )

    replay_memory = ReplayMemoryObs(
        max_replay_size,
        mdp_info.state_space.shape[0],
        input_shape[0],
        mdp_info.action_space[idx_agent].shape[0],
        discrete_actions=False,
    )

    if share_agent_params and idx_agent != 0:
        primary_agent = kwargs["primary_agent"]
    else:
        primary_agent = None

    agent = DQNContinuous(
        mdp_info=mdp_info,
        idx_agent=idx_agent,
        policy=pi,
        batch_size=batch_size,
        replay_memory=replay_memory,
        target_update_frequency=target_update_frequency,
        tau=tau,
        warmup_replay_size=warmup_replay_size,
        target_update_mode=target_update_mode,
        approximator_params=approximator_params,
        grad_norm_clip=grad_norm_clip,
        obs_last_action=obs_last_action,
        primary_agent=primary_agent,
        use_mixer=use_mixer,
        use_cuda=use_cuda,
    )

    return agent
