import torch
import torch._dynamo
torch._dynamo.config.suppress_errors = True
from easydict import EasyDict
from grl.neural_network.encoders import register_encoder
import torch.nn as nn
class manipulator_insert (nn.Module):
    def __init__(self):
        super(manipulator_insert, self).__init__()
        self.arm_pos = nn.Sequential(
            nn.Linear(16,32),
            nn.ReLU(),
            nn.Linear(32, 32),
            nn.LayerNorm(32),
        )
        self.arm_vel = nn.Sequential(
            nn.Linear(8, 16),
            nn.ReLU(),
            nn.Linear(16, 16),
            nn.LayerNorm(16)
        )
        self.touch = nn.Sequential(
            nn.Linear(5, 10),
            nn.ReLU(),
            nn.Linear(10, 10),
            nn.LayerNorm(10)
        )
        self.hand_pos = nn.Sequential(
            nn.Linear(4, 8),
            nn.ReLU(),
            nn.Linear(8, 8),
            nn.LayerNorm(8)
        )
        self.object_pos = nn.Sequential(
            nn.Linear(4, 8),
            nn.ReLU(),
            nn.Linear(8, 8),
            nn.LayerNorm(8)
        )
        self.object_vel = nn.Sequential(
            nn.Linear(3, 6),
            nn.ReLU(),
            nn.Linear(6, 6),
            nn.LayerNorm(6)
        )
        self.target_pos = nn.Sequential(
            nn.Linear(4, 8),
            nn.ReLU(),
            nn.Linear(8, 8),
            nn.LayerNorm(8)
        )
        self.fish_swim = nn.Sequential(
            nn.Linear(26, 52),
            nn.ReLU(),
            nn.Linear(52, 52),
            nn.LayerNorm(52)
        )
    def forward(self, x: dict) -> torch.Tensor:
        shape=x["arm_pos"].shape
        arm_pos=self.arm_pos(x["arm_pos"].reshape(shape[0],16))
        arm_vel=self.arm_vel(x["arm_vel"])
        touch=self.touch(x["touch"])
        hand_pos=self.hand_pos(x["hand_pos"])
        object_pos=self.object_pos(x["object_pos"])
        object_vel=self.object_vel(x["object_vel"])
        target_pos=self.target_pos(x["target_pos"])
        combined_output = torch.cat([arm_pos,arm_vel, touch,hand_pos,object_pos,object_vel,target_pos], dim=-1)
        return combined_output
register_encoder(manipulator_insert,"manipulator_insert") 
    

path=""
domain_name="manipulator"
task_name="insert_ball"
env_id=f"{domain_name}-{task_name}"
algorithm="QGPO"
action_size = 5
state_size = 44
project_name =  f"{env_id}-{algorithm}"
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
t_embedding_dim = 32
t_encoder = dict(
    type="GaussianFourierProjectionTimeEncoder",
    args=dict(
        embed_dim=t_embedding_dim,
        scale=30.0,
    ),
)
solver_type = "DPMSolver"
action_augment_num = 16
config = EasyDict(
    train=dict(
        project=project_name,
        simulator=dict(
            type="DeepMindControlEnvSimulator",
            args=dict(
                domain_name=domain_name,
                task_name=task_name,
            ),
        ),
        dataset=dict(
            type="QGPODMcontrolTensorDictDataset",
            args=dict(
                path=path,
                action_augment_num=action_augment_num,
                dcit_calulate=True
            ),
        ),
        model=dict(
            QGPOPolicy=dict(
                device=device,
                critic=dict(
                    device=device,
                    q_alpha=1.0,
                    DoubleQNetwork=dict(
                        backbone=dict(
                            type="ConcatenateMLP",
                            args=dict(
                                hidden_sizes=[action_size + state_size, 256, 256],
                                output_size=1,
                                activation="relu",
                            ),
                        ),
                        state_encoder=dict(
                            type="manipulator_insert",
                            args=dict(
                            ),
                        ),
                    ),
                ),
                diffusion_model=dict(
                    device=device,
                    x_size=action_size,
                    alpha=1.0,
                    solver=(
                        dict(
                            type="DPMSolver",
                            args=dict(
                                order=2,
                                device=device,
                                steps=17,
                            ),
                        )
                        if solver_type == "DPMSolver"
                        else (
                            dict(
                                type="ODESolver",
                                args=dict(
                                    library="torchdyn",
                                ),
                            )
                            if solver_type == "ODESolver"
                            else dict(
                                type="SDESolver",
                                args=dict(
                                    library="torchsde",
                                ),
                            )
                        )
                    ),
                    path=dict(
                        type="linear_vp_sde",
                        beta_0=0.1,
                        beta_1=20.0,
                    ),
                    reverse_path=dict(
                        type="linear_vp_sde",
                        beta_0=0.1,
                        beta_1=20.0,
                    ),
                    model=dict(
                        type="noise_function",
                        args=dict(
                            t_encoder=t_encoder,
                            condition_encoder=dict(
                                type="manipulator_insert",
                                args=dict(
                                            ),
                            ),
                            backbone=dict(
                                type="TemporalSpatialResidualNet",
                                args=dict(
                                    hidden_sizes=[512, 256, 128],
                                    output_dim=action_size,
                                    t_dim=t_embedding_dim,
                                    condition_dim=state_size,
                                    condition_hidden_dim=32,
                                    t_condition_hidden_dim=128,
                                ),
                            ),
                        ),
                    ),
                    energy_guidance=dict(
                        t_encoder=t_encoder,
                        condition_encoder=dict(
                                type="manipulator_insert",
                                args=dict(
                                            ),
                            ),
                        backbone=dict(
                            type="ConcatenateMLP",
                            args=dict(
                                hidden_sizes=[
                                    action_size + state_size + t_embedding_dim,
                                    256,
                                    256,
                                ],
                                output_size=1,
                                activation="silu",
                            ),
                        ),
                    ),
                ),
            )
        ),
        parameter=dict(
            behaviour_policy=dict(
                batch_size=4096,
                learning_rate=1e-4,
                iterations=600000,
            ),
            action_augment_num=action_augment_num,
            fake_data_t_span=None if solver_type == "DPMSolver" else 32,
            energy_guided_policy=dict(
                batch_size=256,
            ),
            critic=dict(
                stop_training_iterations=500000,
                learning_rate=3e-4,
                discount_factor=0.99,
                update_momentum=0.005,
            ),
            energy_guidance=dict(
                iterations=600000,
                learning_rate=3e-4,
            ),
            evaluation=dict(
                evaluation_interval=10000,
                guidance_scale=[0.0, 1.0, 2.0, 3.0, 5.0, 8.0, 10.0],
            ),
            checkpoint_path=f"./{env_id}-{algorithm}",
        ),
    ),
    deploy=dict(
        device=device,
        env=dict(
            env_id="Walker2d-v2",
            seed=0,
        ),
        num_deploy_steps=1000,
        t_span=None if solver_type == "DPMSolver" else 32,
    ),
)

import gym

from grl.algorithms.qgpo import QGPOAlgorithm
from grl.utils.log import log


def qgpo_pipeline(config):

    qgpo = QGPOAlgorithm(config)

    # ---------------------------------------
    # Customized train code ↓
    # ---------------------------------------
    qgpo.train()
    # ---------------------------------------
    # Customized train code ↑
    # ---------------------------------------

    # ---------------------------------------
    # Customized deploy code ↓
    # ---------------------------------------
    agent = qgpo.deploy()
    env = gym.make(config.deploy.env.env_id)
    observation = env.reset()
    for _ in range(config.deploy.num_deploy_steps):
        env.render()
        observation, reward, done, _ = env.step(agent.act(observation))
    # ---------------------------------------
    # Customized deploy code ↑
    # ---------------------------------------


if __name__ == "__main__":
    log.info("config: \n{}".format(config))
    qgpo_pipeline(config)

