from cgi import test
import math
import pkgutil
import sys
from abc import ABC, abstractmethod
from multiprocessing.connection import Client
from pathlib import Path
from pprint import pprint
from turtle import done, position

import numpy as np
import pybullet
import pybullet_utils.bullet_client as bc
from scipy.ndimage import rotate as rotate_image
from scipy.ndimage.morphology import distance_transform_edt
from skimage.draw import line
from skimage.morphology import binary_dilation, dilation
from skimage.morphology.selem import disk
from gym import spaces
import gym

class VectorEnv(gym.Env):
    WALL_HEIGHT = 0.1
    #CUBE_WIDTH = 0.044
    CUBE_WIDTH = 0.06
    #CUBE_WIDTH = 0.1
    IDENTITY_QUATERNION = (0, 0, 0, 1)
    REMOVED_BODY_Z = -1000  # Hide removed bodies 1000 m below
    CUBE_COLOR = (237.0 / 255, 201.0 / 255, 72.0 / 255, 1)  # Yellow
    DEBUG_LINE_COLORS = [
        (78.0 / 255, 121.0 / 255, 167.0 / 255),  # Blue
        (89.0 / 255, 169.0 / 255, 79.0 / 255),  # Green
        (176.0 / 255, 122.0 / 255, 161.0 / 255),  # Purple
        (242.0 / 255, 142.0 / 255, 43.0 / 255),  # Orange
    ]

    def __init__(
        # This comment is here to make code folding work
            self, robot_config=None, room_length=1.0, room_width=0.5, num_cubes=2, env_name='small_empty',
            use_robot_map=True, use_distance_to_receptacle_map=False, distance_to_receptacle_map_scale=0.25,
            use_shortest_path_to_receptacle_map=True, use_shortest_path_map=True, shortest_path_map_scale=0.25,
            use_intention_map=False, intention_map_encoding='ramp',
            intention_map_scale=1.0, intention_map_line_thickness=2,
            use_history_map=False,
            use_intention_channels=False, intention_channel_encoding='spatial', intention_channel_nonspatial_scale=0.025,
            use_shortest_path_partial_rewards=True, success_reward=1.0, partial_rewards_scale=2.0,
            lifting_pointless_drop_penalty=0.25, obstacle_collision_penalty=0.25, robot_collision_penalty=1.0,
            use_shortest_path_movement=True, use_partial_observations=True,
            inactivity_cutoff_per_robot=200,
            random_seed=None, use_egl_renderer=False,
            show_gui=False, show_debug_annotations=False, show_occupancy_maps=False,
            real=False, real_robot_indices=None, real_cube_indices=None, real_debug=False,
            obs_radius = 0.1, termination_step = 10000, target_pos = None, target_width = 0.3,
            step_penalty = 0, target_reward = 100, lift_cube_reward = 0
        ):

        ################################################################################
        # Arguments

        # Room configuration
        self.robot_config = robot_config
        self.room_length = room_length
        self.room_width = room_width
        self.num_cubes = num_cubes
        self.env_name = env_name

        # Misc
        self.use_egl_renderer = use_egl_renderer
        self.random_seed = random_seed
        self.inactivity_cutoff_per_robot = inactivity_cutoff_per_robot

        # Debugging
        self.show_gui = show_gui
        self.show_debug_annotations = show_debug_annotations

        self.obs_radius = obs_radius
        self.radiusIds = []
        self.termination_step = termination_step

        self.target_pos = target_pos
        self.target_width = target_width

        self.step_penalty = step_penalty
        self.target_reward = target_reward
        self.lift_cube_reward = lift_cube_reward

        # self.n_agent = n_agent

        # self.action_space = spaces.Discrete(5)

        #pprint(self.__dict__)

        ################################################################################
        # Set up pybullet

        if self.show_gui:
            self.p = bc.BulletClient(connection_mode=pybullet.GUI)
            self.p.configureDebugVisualizer(pybullet.COV_ENABLE_GUI, 0)
        else:
            self.p = bc.BulletClient(connection_mode=pybullet.DIRECT)
            if self.use_egl_renderer:
                assert sys.platform == 'linux'  # Linux only
                self.plugin_id = self.p.loadPlugin(pkgutil.get_loader('eglRenderer').get_filename(), "_eglRendererPlugin")

        self.p.resetDebugVisualizerCamera(
            0.47 + (5.25 - 0.47) / (10 - 0.7) * (self.room_length - 0.7), 0, -70,
            (0, -(0.07 + (1.5 - 0.07) / (10 - 0.7) * (self.room_width - 0.7)), 0))

        # Used to determine whether robot poses are out of date
        self.step_simulation_count = 0

        ################################################################################
        # Robots and room configuration

        # Random placement of robots, cubes, and obstacles
        self.room_random_state = np.random.RandomState(self.random_seed)
        self.robot_spawn_bounds = None
        self.cube_spawn_bounds = None

        # Robots
        if self.robot_config is None:
            self.robot_config = [{'lifting_robot': 2}]
            #self.robot_config = [{'throwing_robot': 2}]
            #self.robot_config = [{'pushing_robot': 2}]
        self.num_robots = sum(sum(g.values()) for g in self.robot_config)
        self.robot_group_types = [next(iter(g.keys())) for g in self.robot_config]
        self.robot_ids = None
        self.robots = None
        self.robot_groups = None
        self.robot_random_state = np.random.RandomState(self.random_seed + 1 if self.random_seed is not None else None)  # Add randomness to throwing
        self.n_agent = self.num_robots

        # Room
        self.obstacle_ids = None
        self.cube_ids = None
        self.receptacle_id = None
        if not any('rescue_robot' in g for g in self.robot_config):
            #self.receptacle_position = (self.room_length / 2 - self.target_width / 2, self.room_width / 2 - self.target_width / 2, 0)
            #print("self.receptacle_position", self.receptacle_position)
            if self.target_pos:
                self.receptacle_position = (self.target_pos[0], self.target_pos[1], 0)
            else:
                #x, y =  self._get_random_position(self.target_width / 2)
                #x, y =  self._get_diagonal_random_target_position(self.target_width / 2)
                x, y =  self._get_random_target_position(self.target_width / 2)
                self.receptacle_position = (x, y, 0)
            #print("self.receptacle_position", self.receptacle_position)

        # Collections for keeping track of environment state
        self.obstacle_collision_body_b_ids_set = None  # For collision detection
        self.robot_collision_body_b_ids_set = None  # For collision detection
        self.available_cube_ids_set = None  # Excludes removed cubes, and cubes that are being lifted, thrown, or rescued
        self.removed_cube_ids_set = None  # Cubes that have been removed

        ################################################################################
        # Misc
        self.action_space = spaces.Box(low=-1, high=1, shape=(self.n_agent, 2), dtype=np.float32)
        self.observation_space = spaces.Box(low=0, high=1, shape=(3 * (self.n_agent + self.num_cubes) + 2,), dtype=np.float32)
        #self.observation_space = spaces.Box(low=0, high=1, shape=(2,), dtype=np.float32)

        # End an episode after too many steps of inactivity
        self.inactivity_cutoff = self.num_robots * self.inactivity_cutoff_per_robot

        # Stats
        self.steps = None
        self.simulation_steps = None
        self.inactivity_steps = None


        # Stats
        self.simulation_steps = None

    def reset(self):
        # Reset pybullet
        self.p.resetSimulation()
        self.p.setRealTimeSimulation(0)
        self.p.setGravity(0, 0, -9.8)

        if not self.target_pos:
            #x, y =  self._get_random_position(self.target_width / 2)

            x, y =  self._get_random_target_position(self.target_width / 2)

            #x, y =  self._get_diagonal_random_target_position(self.target_width / 2)

            self.receptacle_position = (x, y, 0)

        # Create env
        self._create_env()

        self.drawRadius()

        self._reset_poses()

        # Stats
        # Stats
        self.steps = 0
        self.simulation_steps = 0
        self.inactivity_steps = 0

        return self.get_macro_obs()

    def store_new_action(self, action):
        #print("store_new_action", action)
        for i in range(self.num_robots):
            if self.robots[i].is_idle():
                self.robots[i].store_new_action([action[i * 2] * self.room_length / 2, action[i * 2 + 1] * self.room_width / 2])
                self.robots[i].mac_action = [action[i * 2], action[i * 2 + 1]]

    def step(self, action):
        ################################################################################
        # Setup before action execution

        self.store_new_action(action)

        # Store initial cube positions for pushing partial rewards
        if any(isinstance(robot, PushingRobot) for robot in self.robots):
            initial_cube_positions = {}
            for cube_id in self.available_cube_ids_set:
                initial_cube_positions[cube_id] = self.get_cube_position(cube_id)

        ################################################################################
        # Execute actions
        self._execute_actions()
        ################################################################################
        # Process cubes after action execution

        for robot in self.robots:
            if isinstance(robot, LiftingRobot) and robot.lift_state == 'lifting' and robot.controller.state != 'idle':
                if robot.try_drop_cube():
                    robot.drop_reward += 1
                    robot.controller.state = 'idle'

        n_cube_in_receptacle = 0
        for cube_id in self.available_cube_ids_set.copy():
            cube_position = self.get_cube_position(cube_id)

            # Reset out-of-bounds cubes
            if (cube_position[2] > VectorEnv.WALL_HEIGHT + 0.49 * VectorEnv.CUBE_WIDTH or  # On top of obstacle
                    cube_position[2] < 0.4 * VectorEnv.CUBE_WIDTH):  # Inside obstacle (0.4 since dropped cubes can temporarily go into the ground)
                pos_x, pos_y, heading = self._get_random_cube_pose()
                self.reset_cube_pose(cube_id, pos_x, pos_y, heading)
                continue

            if self.receptacle_id is not None:
                closest_robot = self.robots[np.argmin([distance(robot.get_position(), cube_position) for robot in self.robots])]

                # Process final cube position for pushing partial rewards
                if isinstance(closest_robot, PushingRobot):
                    closest_robot.process_cube_position(cube_id, initial_cube_positions)

                # Process cubes that are in the receptacle (cubes were pushed in)
                if self.cube_position_in_receptacle(cube_position):
                    closest_robot.process_cube_success()
                    self.remove_cube(cube_id)
                    self.available_cube_ids_set.remove(cube_id)
                    n_cube_in_receptacle += 1

        # Increment counters
        self.simulation_steps += 1
        if sum(robot.cubes for robot in self.robots) > 0:
            self.inactivity_steps = 0
        else:
            self.inactivity_steps += 1

        done, reward = self.if_done(n_cube_in_receptacle)

        cur_mac = self._collectCurMacroActions()
        mac_done = self._computeMacroActionDone()
        info = {'cur_mac': cur_mac, 'mac_done': mac_done}

        return self.get_macro_obs(), reward, done, info

    def get_state(self):
        state = []
        for robot1 in self.robots:
            obs = []
            position1, heading1 = robot1.get_position(), robot1.get_heading()
            for robot2 in self.robots:
                position2, heading2 = robot2.get_position(), robot2.get_heading()
                if distance(position1, position2) <= self.obs_radius:
                    obs += [position2[0], position2[1], heading2]
                else:
                    obs += [-1.1, -1.1, 0]

                for cube in self.cube_ids:
                    cube_position, cube_hearding = self.get_cube_pose(cube)
                    cube_hearding = orientation_to_heading(cube_hearding)
                    if cube_position[2] < -100:
                        obs += [-1.1, -1.1, 0]
                    else:
                        obs += [cube_position[0] / self.room_length * 2, cube_position[1] / self.room_width * 2, cube_hearding / math.pi / 2]

            if distance(position1, self.receptacle_position) <= self.obs_radius:
                obs += [self.receptacle_position[0], self.receptacle_position[1]] 
            else:
                obs += [-1, -1]
            state.append(np.array(obs))
        return state

    def get_macro_obs(self):
        state = []
        for robot1 in self.robots:
            obs = []
            if robot1.is_idle():
                position1, heading1 = robot1.get_position(), robot1.get_heading()
                for robot2 in self.robots:
                    position2, heading2 = robot2.get_position(), robot2.get_heading()
                    if distance(position1, position2) <= self.obs_radius:
                        obs += [position2[0] / self.room_length * 2, position2[1] / self.room_width * 2, heading2 / math.pi / 2]
                    else:
                        obs += [-1.1, -1.1, 0]
                
                for cube in self.cube_ids:
                    cube_position, cube_hearding = self.get_cube_pose(cube)
                    cube_hearding = orientation_to_heading(cube_hearding)
                    if cube_position[2] < -100 or distance(position1, cube_position) > self.obs_radius:
                        obs += [-1.1, -1.1, 0]
                    else:
                        obs += [cube_position[0] / self.room_length * 2, cube_position[1] / self.room_width * 2, cube_hearding / math.pi / 2]

                if distance(position1, self.receptacle_position) <= self.obs_radius:
                    obs += [self.receptacle_position[0]  / self.room_length * 2, self.receptacle_position[1]  / self.room_width * 2] 
                else:
                    obs += [-1, -1]
                robot1.obs = obs
            else:
                obs = robot1.obs
            state.append(np.array(obs))
        return state

    def drawRadius(self):
        self.p.removeAllUserDebugItems()
        colors = [[1, 0, 0], [0, 1, 0]]
        for robot, color, radiusIds in zip(self.robots, colors, self.radiusIds):
            x, y, _ = robot.get_position()
            t = 0
            pre_pos1 = [np.cos(t) * self.obs_radius + x, np.sin(t) * self.obs_radius + y, 0.01]
            for i in range(21):
                target_pos1 = [np.cos(t) * self.obs_radius + x , np.sin(t)  * self.obs_radius + y, 0.01]
                radiusIds.append(self.p.addUserDebugLine(pre_pos1, target_pos1, color, lineWidth = 3))
                pre_pos1 = target_pos1
                t += math.pi / 10

    def updateRadius(self):
        colors = [[1, 0, 0], [0, 1, 0]]
        for robot, color, radiusIds in zip(self.robots, colors, self.radiusIds):
            x, y, _ = robot.get_position()
            t = 0
            pre_pos1 = [np.cos(t) * self.obs_radius + x, np.sin(t) * self.obs_radius + y, 0.01]
            for i in range(21):
                target_pos1 = [np.cos(t) * self.obs_radius + x , np.sin(t)  * self.obs_radius + y, 0.01]
                self.p.addUserDebugLine(pre_pos1, target_pos1, color, lineWidth = 3, replaceItemUniqueId=radiusIds[i])
                pre_pos1 = target_pos1
                t += math.pi / 10

    def if_done(self, n_cube_in_receptacle):
        #done = False
        reward = self.step_penalty + n_cube_in_receptacle * self.target_reward

        for robot in self.robots:
            reward += robot.reward * self.lift_cube_reward + robot.drop_reward * self.target_reward

        # if len(self.removed_cube_ids_set) == self.num_cubes or self.simulation_steps >= self.termination_step:
        #     done = True

        #done = len(self.removed_cube_ids_set) == self.num_cubes or self.inactivity_steps >= self.inactivity_cutoff or self.simulation_steps >= self.termination_step

        done = len(self.removed_cube_ids_set) == self.num_cubes or self.simulation_steps >= self.termination_step


        return done, [reward] * self.num_robots


    def robot_in_receptacle(self, robot):
        rx, ry, _ = robot.get_position()
        tx, ty, _ = self.receptacle_position
        x_min = tx - self.target_width / 2
        x_max = tx + self.target_width / 2
        y_min = ty - self.target_width / 2
        y_max = ty + self.target_width / 2
        return (rx >= x_min and rx <= x_max and ry >= y_min and ry <= y_max)

    def close(self):
        if not self.show_gui and self.use_egl_renderer:
            self.p.unloadPlugin(self.plugin_id)
        self.p.disconnect()

    def step_simulation(self):
        self.p.stepSimulation()
        import time; time.sleep(1.0 / 60)
        self.step_simulation_count += 1
        self.updateRadius()
        #self.drawRadius()

    def get_cube_pose(self, cube_id):
        return self.p.getBasePositionAndOrientation(cube_id)

    def get_cube_position(self, cube_id):
        position, _ = self.get_cube_pose(cube_id)
        return position

    def reset_cube_pose(self, cube_id, pos_x, pos_y, heading):
        position = (pos_x, pos_y, VectorEnv.CUBE_WIDTH / 2)
        self.p.resetBasePositionAndOrientation(cube_id, position, heading_to_orientation(heading))

    def remove_cube(self, cube_id):
        self.p.resetBasePositionAndOrientation(cube_id, (0, 0, VectorEnv.REMOVED_BODY_Z), VectorEnv.IDENTITY_QUATERNION)
        self.removed_cube_ids_set.add(cube_id)

    def cube_position_in_receptacle(self, cube_position):
        assert self.receptacle_id is not None

        half_width = (self.target_width - VectorEnv.CUBE_WIDTH) / 2
        #if (self.receptacle_position[0] - half_width < cube_position[0] < self.receptacle_position[0] + half_width and
        #        self.receptacle_position[1] - half_width < cube_position[1] < self.receptacle_position[1] + half_width):
        if cube_position[0] > self.receptacle_position[0] - half_width and cube_position[1] > self.receptacle_position[1] - half_width:
            # Note: Assumes receptacle is in top right corner
            return True
        return False

    def get_robot_group_types(self):
        return self.robot_group_types

    def _create_env(self):

        # Create floor
        floor_thickness = 10
        wall_thickness = 1.4
        room_length_with_walls = self.room_length + 2 * wall_thickness
        room_width_with_walls = self.room_width + 2 * wall_thickness
        floor_half_extents = (room_length_with_walls / 2, room_width_with_walls / 2, floor_thickness / 2)
        floor_collision_shape_id = self.p.createCollisionShape(pybullet.GEOM_BOX, halfExtents=floor_half_extents)
        floor_visual_shape_id = self.p.createVisualShape(pybullet.GEOM_BOX, halfExtents=floor_half_extents)
        self.p.createMultiBody(0, floor_collision_shape_id, floor_visual_shape_id, (0, 0, -floor_thickness / 2))

        # Create obstacles (including walls)
        obstacle_color = (0.9, 0.9, 0.9, 1)
        rounded_corner_path = str(Path(__file__).parent / 'assets' / 'rounded_corner.obj')
        self.obstacle_ids = []
        for obstacle in self._get_obstacles(wall_thickness):
            if obstacle['type'] == 'corner':
                obstacle_collision_shape_id = self.p.createCollisionShape(pybullet.GEOM_MESH, fileName=rounded_corner_path)
                obstacle_visual_shape_id = self.p.createVisualShape(pybullet.GEOM_MESH, fileName=rounded_corner_path, rgbaColor=obstacle_color)
            else:
                half_height = VectorEnv.CUBE_WIDTH / 2 if 'low' in obstacle else VectorEnv.WALL_HEIGHT / 2
                obstacle_half_extents = (obstacle['x_len'] / 2, obstacle['y_len'] / 2, half_height)
                obstacle_collision_shape_id = self.p.createCollisionShape(pybullet.GEOM_BOX, halfExtents=obstacle_half_extents)
                obstacle_visual_shape_id = self.p.createVisualShape(pybullet.GEOM_BOX, halfExtents=obstacle_half_extents, rgbaColor=obstacle_color)

            obstacle_id = self.p.createMultiBody(
                0, obstacle_collision_shape_id, obstacle_visual_shape_id,
                (obstacle['position'][0], obstacle['position'][1], VectorEnv.WALL_HEIGHT / 2), heading_to_orientation(obstacle['heading']))
            self.obstacle_ids.append(obstacle_id)

        # Create target receptacle
        if not any('rescue_robot' in g for g in self.robot_config):
            receptacle_color = (1, 87.0 / 255, 89.0 / 255, 1)  # Red
            receptacle_collision_shape_id = self.p.createCollisionShape(pybullet.GEOM_BOX, halfExtents=(0, 0, 0))
            receptacle_visual_shape_id = self.p.createVisualShape(
                #pybullet.GEOM_BOX, halfExtents=(self.target_width / 2, self.target_width / 2, 0),  # Gets rendered incorrectly in EGL renderer if height is 0
                pybullet.GEOM_BOX, halfExtents=(self.target_width / 2, self.target_width / 2, 0.0001),
                rgbaColor=receptacle_color, visualFramePosition=(0, 0, 0.0001))
            self.receptacle_id = self.p.createMultiBody(0, receptacle_collision_shape_id, receptacle_visual_shape_id, self.receptacle_position)

        # Create robots
        self.robot_collision_body_b_ids_set = set()
        self.robot_ids = []
        self.robots = []  # Flat list
        self.robot_groups = [[] for _ in range(len(self.robot_config))]  # Grouped list
        for robot_group_index, g in enumerate(self.robot_config):
            robot_type, count = next(iter(g.items()))
            for _ in range(count):
                robot = Robot.get_robot(robot_type, self, robot_group_index)
                self.robots.append(robot)
                self.robot_groups[robot_group_index].append(robot)
                self.robot_ids.append(robot.id)

        # Create cubes
        cube_half_extents = (VectorEnv.CUBE_WIDTH / 2, VectorEnv.CUBE_WIDTH / 2, VectorEnv.CUBE_WIDTH / 2)
        cube_collision_shape_id = self.p.createCollisionShape(pybullet.GEOM_BOX, halfExtents=cube_half_extents)
        cube_visual_shape_id = self.p.createVisualShape(pybullet.GEOM_BOX, halfExtents=cube_half_extents, rgbaColor=VectorEnv.CUBE_COLOR)
        #cube_mass = 0.024  # 24 g
        cube_mass = 0.24 * 2
        self.cube_ids = []
        for _ in range(self.num_cubes):
            cube_id = self.p.createMultiBody(cube_mass, cube_collision_shape_id, cube_visual_shape_id)
            #print(self.p.getDynamicsInfo(cube_id,-1))
            self.p.changeDynamics(cube_id,-1,lateralFriction=1)
            self.cube_ids.append(cube_id)

        # Initialize collections
        self.obstacle_collision_body_b_ids_set = set(self.obstacle_ids)
        self.robot_collision_body_b_ids_set.update(self.robot_ids)
        self.available_cube_ids_set = set(self.cube_ids)
        self.removed_cube_ids_set = set()
        self.radiusIds = [[] for _ in range(len(self.robots))]

    def _get_obstacles(self, wall_thickness):

        def add_divider(x_offset=0):
            divider_width = 0.05
            opening_width = 0.16
            obstacles.append({'type': 'divider', 'position': (x_offset, 0), 'heading': 0, 'x_len': divider_width, 'y_len': self.room_width - 2 * opening_width})
            self.robot_spawn_bounds = (x_offset + divider_width / 2, None, None, None)
            self.cube_spawn_bounds = (None, x_offset - divider_width / 2, None, None)

        def add_tunnels(tunnel_length, x_offset=0, y_offset=0):
            tunnel_width = 0.18
            tunnel_x = (self.room_length + tunnel_width) / 6 + x_offset
            outer_divider_len = self.room_length / 2 - tunnel_x - tunnel_width / 2
            divider_x = self.room_length / 2 - outer_divider_len / 2
            middle_divider_len = 2 * (tunnel_x - tunnel_width / 2)
            obstacles.append({'type': 'divider', 'position': (-divider_x, y_offset), 'heading': 0, 'x_len': outer_divider_len, 'y_len': tunnel_length})
            obstacles.append({'type': 'divider', 'position': (0, y_offset), 'heading': 0, 'x_len': middle_divider_len, 'y_len': tunnel_length})
            obstacles.append({'type': 'divider', 'position': (divider_x, y_offset), 'heading': 0, 'x_len': outer_divider_len, 'y_len': tunnel_length})
            self.robot_spawn_bounds = (None, None, y_offset + tunnel_length / 2, None)
            self.cube_spawn_bounds = (None, None, None, y_offset - tunnel_length / 2)

        def add_rooms(x_offset=0, y_offset=0):
            divider_width = 0.05
            opening_width = 0.18
            divider_len = self.room_width / 2 - opening_width - divider_width / 2
            top_divider_len = divider_len - y_offset
            bot_divider_len = divider_len + y_offset
            top_divider_y = self.room_width / 2 - opening_width - top_divider_len / 2
            bot_divider_y = -self.room_width / 2 + opening_width + bot_divider_len / 2
            obstacles.append({'type': 'divider', 'position': (0, y_offset), 'heading': 0, 'x_len': self.room_length - 2 * opening_width, 'y_len': divider_width})
            obstacles.append({'type': 'divider', 'position': (x_offset, top_divider_y), 'heading': 0, 'x_len': divider_width, 'y_len': top_divider_len, 'snap_y': y_offset + divider_width / 2})
            obstacles.append({'type': 'divider', 'position': (x_offset, bot_divider_y), 'heading': 0, 'x_len': divider_width, 'y_len': bot_divider_len, 'snap_y': y_offset - divider_width / 2})

        # Walls
        obstacles = []
        for x, y, length, width in [
                (-self.room_length / 2 - wall_thickness / 2, 0, wall_thickness, self.room_width),
                (self.room_length / 2 + wall_thickness / 2, 0, wall_thickness, self.room_width),
                (0, -self.room_width / 2 - wall_thickness / 2, self.room_length + 2 * wall_thickness, wall_thickness),
                (0, self.room_width / 2 + wall_thickness / 2, self.room_length + 2 * wall_thickness, wall_thickness),
            ]:
            obstacles.append({'type': 'wall', 'position': (x, y), 'heading': 0, 'x_len': length, 'y_len': width})

        # Other obstacles
        if self.env_name == 'small_empty':
            pass

        elif self.env_name == 'small_divider_norand':
            add_divider()

        elif self.env_name == 'small_divider':
            add_divider(x_offset=self.room_random_state.uniform(-0.1, 0.1))

        elif self.env_name == 'large_empty':
            pass

        elif self.env_name == 'large_doors_norand':
            add_tunnels(0.05)

        elif self.env_name == 'large_doors':
            add_tunnels(0.05, x_offset=self.room_random_state.uniform(-0.05, 0.05), y_offset=self.room_random_state.uniform(-0.1, 0.1))

        elif self.env_name == 'large_tunnels_norand':
            add_tunnels(0.25)

        elif self.env_name == 'large_tunnels':
            add_tunnels(0.25, x_offset=self.room_random_state.uniform(-0.05, 0.05), y_offset=self.room_random_state.uniform(-0.05, 0.05))

        elif self.env_name == 'large_rooms_norand':
            add_rooms()

        elif self.env_name == 'large_rooms':
            add_rooms(x_offset=self.room_random_state.uniform(-0.05, 0.05), y_offset=self.room_random_state.uniform(-0.05, 0.05))

        else:
            raise Exception(self.env_name)

        ################################################################################
        # Rounded corners

        rounded_corner_width = 0.1006834873
        # Room corners
        for i, (x, y) in enumerate([
                (-self.room_length / 2, self.room_width / 2),
                (self.room_length / 2, self.room_width / 2),
                (self.room_length / 2, -self.room_width / 2),
                (-self.room_length / 2, -self.room_width / 2),
            ]):
            if any('rescue_robot' in g for g in self.robot_config) or distance((x, y), self.receptacle_position) > (1 + 1e-6) * (self.target_width / 2) * math.sqrt(2):
                heading = -math.radians(i * 90)
                offset = rounded_corner_width / math.sqrt(2)
                adjusted_position = (x + offset * math.cos(heading - math.radians(45)), y + offset * math.sin(heading - math.radians(45)))
                obstacles.append({'type': 'corner', 'position': adjusted_position, 'heading': heading})

        # Corners between walls and dividers
        new_obstacles = []
        for obstacle in obstacles:
            if obstacle['type'] == 'divider':
                position, length, width = obstacle['position'], obstacle['x_len'], obstacle['y_len']
                x, y = position
                corner_positions = None
                if math.isclose(x - length / 2, -self.room_length / 2):
                    corner_positions = [(-self.room_length / 2, y - width / 2), (-self.room_length / 2, y + width / 2)]
                    corner_headings = [0, 90]
                elif math.isclose(x + length / 2, self.room_length / 2):
                    corner_positions = [(self.room_length / 2, y - width / 2), (self.room_length / 2, y + width / 2)]
                    corner_headings = [-90, 180]
                elif math.isclose(y - width / 2, -self.room_width / 2):
                    corner_positions = [(x - length / 2, -self.room_width / 2), (x + length / 2, -self.room_width / 2)]
                    corner_headings = [180, 90]
                elif math.isclose(y + width / 2, self.room_width / 2):
                    corner_positions = [(x - length / 2, self.room_width / 2), (x + length / 2, self.room_width / 2)]
                    corner_headings = [-90, 0]
                elif 'snap_y' in obstacle:
                    snap_y = obstacle['snap_y']
                    corner_positions = [(x - length / 2, snap_y), (x + length / 2, snap_y)]
                    corner_headings = [-90, 0] if snap_y > y else [180, 90]
                if corner_positions is not None:
                    for position, heading in zip(corner_positions, corner_headings):
                        heading = math.radians(heading)
                        offset = rounded_corner_width / math.sqrt(2)
                        adjusted_position = (
                            position[0] + offset * math.cos(heading - math.radians(45)),
                            position[1] + offset * math.sin(heading - math.radians(45))
                        )
                        obstacles.append({'type': 'corner', 'position': adjusted_position, 'heading': heading})
        obstacles.extend(new_obstacles)

        return obstacles

    def _reset_poses(self):
        # Reset robot poses
        for robot in self.robots:
            pos_x, pos_y, heading = self._get_random_robot_pose(padding=robot.RADIUS, bounds=self.robot_spawn_bounds)
            robot.reset_pose(pos_x, pos_y, heading)

        # Reset cube poses
        for cube_id in self.cube_ids:
            pos_x, pos_y, heading = self._get_random_cube_pose()
            self.reset_cube_pose(cube_id, pos_x, pos_y, heading)

            #testcube
            #self.reset_cube_pose(cube_id, 0.25, -0.25, heading)
            #elf.reset_cube_pose(cube_id, 0, 0, 0)
            

        # Check if any robots need another pose reset
        done = False
        while not done:
            done = True
            self.step_simulation()
            for robot in self.robots:
                reset_robot_pose = False

                # Check if robot is stacked on top of a cube
                if robot.get_position(set_z_to_zero=False)[2] > 0.001:  # 1 mm
                    reset_robot_pose = True

                # Check if robot is inside an obstacle or another robot
                for contact_point in self.p.getContactPoints(robot.id):
                    if contact_point[2] in self.obstacle_collision_body_b_ids_set or contact_point[2] in self.robot_collision_body_b_ids_set:
                        reset_robot_pose = True
                        break

                if reset_robot_pose:
                    done = False
                    pos_x, pos_y, heading = self._get_random_robot_pose(padding=robot.RADIUS, bounds=self.robot_spawn_bounds)
                    robot.reset_pose(pos_x, pos_y, heading)

    def _get_random_cube_pose(self):
        done = False
        while not done:
            pos_x, pos_y = self._get_random_position(padding=VectorEnv.CUBE_WIDTH / 2, bounds=self.cube_spawn_bounds)

            # Only spawn cubes outside of the receptacle
            if self.receptacle_id is None or not self.cube_position_in_receptacle((pos_x, pos_y)):
                done = True
        heading = self.room_random_state.uniform(-math.pi, math.pi)
        return pos_x, pos_y, heading

    def _get_random_robot_pose(self, padding=0, bounds=None):
        position_x, position_y = self._get_random_position(padding=padding, bounds=bounds) 
        heading = self.room_random_state.uniform(-math.pi, math.pi)
        return position_x, position_y, heading
        #return position_x * 0.2, position_y * 0.2, heading

    def _get_random_position(self, padding=0, bounds=None):
        low_x = -self.room_length / 2 + padding
        high_x = self.room_length / 2 - padding
        low_y = -self.room_width / 2 + padding
        high_y = self.room_width / 2 - padding
        if bounds is not None:
            x_min, x_max, y_min, y_max = bounds
            if x_min is not None:
                low_x = x_min + padding
            if x_max is not None:
                high_x = x_max - padding
            if y_min is not None:
                low_y = y_min + padding
            if y_max is not None:
                high_y = y_max - padding
        position_x, position_y = self.room_random_state.uniform((low_x, low_y), (high_x, high_y))
        return position_x, position_y

    def _get_random_target_position(self, padding=0, bounds=None):
        low_x = -self.room_length / 2 + padding
        high_x = self.room_length / 2 - padding
        low_y = -self.room_width / 2 + padding
        high_y = self.room_width / 2 - padding
        if bounds is not None:
            x_min, x_max, y_min, y_max = bounds
            if x_min is not None:
                low_x = x_min + padding
            if x_max is not None:
                high_x = x_max - padding
            if y_min is not None:
                low_y = y_min + padding
            if y_max is not None:
                high_y = y_max - padding

        x = [low_x, high_x]
        y = [low_y, high_y]

        position_x = x[np.random.randint(0,2)]
        position_y = y[np.random.randint(0,2)]
        
        return position_x, position_y

    def _get_diagonal_random_target_position(self, padding=0, bounds=None):
        #print("_get_diagonal_random_target_position")
        low_x = -self.room_length / 2 + padding
        high_x = self.room_length / 2 - padding
        low_y = -self.room_width / 2 + padding
        high_y = self.room_width / 2 - padding
        if bounds is not None:
            x_min, x_max, y_min, y_max = bounds
            if x_min is not None:
                low_x = x_min + padding
            if x_max is not None:
                high_x = x_max - padding
            if y_min is not None:
                low_y = y_min + padding
            if y_max is not None:
                high_y = y_max - padding

        x = [low_x, high_x]
        y = [low_y, high_y]

        pos = [[high_x, high_y], [low_x, low_y]]

        position = pos[np.random.randint(0,2)]
        
        return position[0], position[1]


    def _execute_actions(self):
        for robot in self.robots:
            robot.step()
        self.step_simulation()

    @property
    def state_size(self):
        #return self.get_state()[0].shape[0] * self.n_agent
        return self.get_macro_obs[0].shape[0] * self.n_agent
        

    @property
    def obs_size(self):
        return [self.observation_space.shape[0]] * self.n_agent

    @property
    def n_action(self):
        return [self.action_space.shape[1]] * self.action_space.shape[0]

    @property
    def action_spaces(self):
        return [[[self.action_space.low[0][0], self.action_space.high[0][0]]] * self.action_space.shape[1]] * self.action_space.shape[0]

    def get_avail_actions(self):
        return [self.get_avail_agent_actions(i) for i in range(self.n_agent)]

    def get_avail_agent_actions(self, nth):
        return [1] * self.n_agent

    def action_space_sample(self, i):
        return [np.random.uniform(self.action_spaces[i][0][0], self.action_spaces[i][0][1]), np.random.uniform(self.action_spaces[i][1][0], self.action_spaces[i][1][1])]

    def _collectCurMacroActions(self):
        # loop each agent
        cur_mac = []
        for robot in self.robots:
            cur_mac.append(robot.mac_action)
        return cur_mac

    def _computeMacroActionDone(self):
        # loop each agent
        mac_done = []
        for robot in self.robots:
            mac_done.append(True if robot.is_idle() else False)
        return mac_done

    def macro_action_sample(self):
        mac_actions = []
        for i in range(self.n_agent):
            mac_actions += self.action_space_sample(i)
        return mac_actions

    def build_agents(self):
        raise

    def build_macro_actions(self):
        raise

class Robot(ABC):
    HALF_WIDTH = 0.03
    BACKPACK_OFFSET = -0.0135
    BASE_LENGTH = 0.065  # Does not include the hooks
    TOP_LENGTH = 0.057  # Leaves 1 mm gap for lifted cube
    END_EFFECTOR_LOCATION = BACKPACK_OFFSET + BASE_LENGTH
    RADIUS = math.sqrt(HALF_WIDTH**2 + END_EFFECTOR_LOCATION**2)
    HEIGHT = 0.07
    NUM_OUTPUT_CHANNELS = 1
    COLOR = (0.3529, 0.3529, 0.3529, 1)  # Gray
    CONSTRAINT_MAX_FORCE = 10

    @abstractmethod  # Should not be instantiated directly
    def __init__(self, env, group_index, obs_radius=0.2, real=False, real_robot_index=None):
        self.env = env
        self.group_index = group_index
        self.real = real
        self.obs_radius = obs_radius
        self.id = self._create_multi_body()
        self.cid = self.env.p.createConstraint(self.id, -1, -1, -1, pybullet.JOINT_FIXED, None, (0, 0, 0), (0, 0, 0))
        self._last_step_simulation_count = -1  # Used to determine whether pose is out of date
        self._position_raw = None  # Most current position, not to be directly accessed (use self.get_position())
        self._position = None  # Most current position (with z set to 0), not to be directly accessed (use self.get_position())
        self._heading = None  # Most current heading, not to be directly accessed (use self.get_heading())
        self.state = None
        

        # Movement
        self.action = None
        self.target_end_effector_position = None
        self.waypoint_positions = None
        self.waypoint_headings = None
        self.controller = RobotController(self)

        # Collision detection
        self.collision_body_a_ids_set = set([self.id])

        # State representation
        #self.mapper = Mapper(self.env, self)

        # Step variables and stats
        self.awaiting_new_action = False  # Only one robot at a time can be awaiting new action
        self.cubes = 0
        self.distance = 0
        self.prev_waypoint_position = None  # For tracking distance traveled over the step
        self.collided_with_obstacle = False
        self.collided_with_robot = False


    def store_new_action(self, action):
        # Action is specified as an index specifying an end effector action, along with (row, col) of the selected pixel location
        #self.action = tuple(np.unravel_index(action, (self.NUM_OUTPUT_CHANNELS, Mapper.LOCAL_MAP_PIXEL_WIDTH, Mapper.LOCAL_MAP_PIXEL_WIDTH)))  # Immutable tuple
        self.action = action

        # Get current robot pose
        current_position, current_heading = self.get_position(), self.get_heading()

        ################################################################################
        # Step variables and stats

        self.target_position = (action[0], action[1], 0)
        self.waypoint_positions = [current_position, self.target_position]

        self.waypoint_headings = [current_heading]
        for i in range(1, len(self.waypoint_positions)):
            dx = self.waypoint_positions[i][0] - self.waypoint_positions[i - 1][0]
            dy = self.waypoint_positions[i][1] - self.waypoint_positions[i - 1][1]
            self.waypoint_headings.append(restrict_heading_range(math.atan2(dy, dx)))

        # Reset controller
        self.controller.reset()
        self.controller.new_action()

        # Reset step variables and stats
        self.awaiting_new_action = False
        self.cubes = 0
        self.reward = None
        self.cubes_with_reward = 0
        self.distance = 0
        self.prev_waypoint_position = current_position
        self.collided_with_obstacle = False
        self.collided_with_robot = False

    def step(self):
        self.controller.step()

    def process_cube_success(self):
        self.cubes += 1

    def reset(self):
        self.action = None
        self.mac_action = None
        self.target_end_effector_position = None
        self.waypoint_positions = None
        self.waypoint_headings = None
        self.controller.reset()

    def is_idle(self):
        return self.controller.state == 'idle'

    def get_position(self, set_z_to_zero=True):
        # Returned position is immutable tuple
        if self._last_step_simulation_count < self.env.step_simulation_count:
            self._update_pose()
        if not set_z_to_zero:
            return self._position_raw
        return self._position

    def get_heading(self):
        if self._last_step_simulation_count < self.env.step_simulation_count:
            self._update_pose()
        return self._heading

    def reset_pose(self, position_x, position_y, heading):
        # Reset robot pose
        position = (position_x, position_y, 0)
        orientation = heading_to_orientation(heading)
        self.env.p.resetBasePositionAndOrientation(self.id, position, orientation)
        self.env.p.changeConstraint(self.cid, jointChildPivot=position, jointChildFrameOrientation=orientation, maxForce=Robot.CONSTRAINT_MAX_FORCE)
        self._last_step_simulation_count = -1

    def check_for_collisions(self):
        for body_a_id in self.collision_body_a_ids_set:
            for contact_point in self.env.p.getContactPoints(body_a_id):
                body_b_id = contact_point[2]
                if body_b_id in self.collision_body_a_ids_set:
                    continue
                if body_b_id in self.env.obstacle_collision_body_b_ids_set:
                    self.collided_with_obstacle = True
                if body_b_id in self.env.robot_collision_body_b_ids_set:
                    self.collided_with_robot = True
                if self.collided_with_obstacle or self.collided_with_robot:
                    break

    def update_distance(self):
        current_position = self.get_position()
        self.distance += distance(self.prev_waypoint_position, current_position)
        if self.env.show_debug_annotations:
            self.env.p.addUserDebugLine(
                (self.prev_waypoint_position[0], self.prev_waypoint_position[1], 0.001),
                (current_position[0], current_position[1], 0.001),
                VectorEnv.DEBUG_LINE_COLORS[self.group_index]
            )
        self.prev_waypoint_position = current_position

    def _update_pose(self):
        position, orientation = self.env.p.getBasePositionAndOrientation(self.id)
        self._position_raw = position
        self._position = (position[0], position[1], 0)  # Use immutable tuples to represent positions
        self._heading = orientation_to_heading(orientation)
        self._last_step_simulation_count = self.env.step_simulation_count

    def _create_multi_body(self):
        base_height = 0.035
        mass = 0.180
        shape_types = [pybullet.GEOM_CYLINDER, pybullet.GEOM_BOX, pybullet.GEOM_BOX]
        radii = [Robot.HALF_WIDTH, None, None]
        half_extents = [
            None,
            (self.BASE_LENGTH / 2, Robot.HALF_WIDTH, base_height / 2),
            (Robot.TOP_LENGTH / 2, Robot.HALF_WIDTH, Robot.HEIGHT / 2),
        ]
        lengths = [Robot.HEIGHT, None, None]
        rgba_colors = [self.COLOR, None, None]  # pybullet seems to ignore all colors after the first
        frame_positions = [
            (Robot.BACKPACK_OFFSET, 0, Robot.HEIGHT / 2),
            (Robot.BACKPACK_OFFSET + self.BASE_LENGTH / 2, 0, base_height / 2),
            (Robot.BACKPACK_OFFSET + Robot.TOP_LENGTH / 2, 0, Robot.HEIGHT / 2),
        ]
        collision_shape_id = self.env.p.createCollisionShapeArray(
            shapeTypes=shape_types, radii=radii, halfExtents=half_extents, lengths=lengths, collisionFramePositions=frame_positions)
        visual_shape_id = self.env.p.createVisualShapeArray(
            shapeTypes=shape_types, radii=radii, halfExtents=half_extents, lengths=lengths, rgbaColors=rgba_colors, visualFramePositions=frame_positions)
        return self.env.p.createMultiBody(mass, collision_shape_id, visual_shape_id)

    @staticmethod
    def get_robot_cls(robot_type):
        if robot_type == 'pushing_robot':
            return PushingRobot
        if robot_type == 'lifting_robot':
            return LiftingRobot
        if robot_type == 'throwing_robot':
            return ThrowingRobot
        if robot_type == 'rescue_robot':
            return RescueRobot
        raise Exception(robot_type)

    @staticmethod
    def get_robot(robot_type, *args, real=False, real_robot_index=None):
        return Robot.get_robot_cls(robot_type)(*args, real=real, real_robot_index=real_robot_index)

class PushingRobot(Robot):
    BASE_LENGTH = Robot.BASE_LENGTH + 0.005  # 5 mm blade
    END_EFFECTOR_LOCATION = Robot.BACKPACK_OFFSET + BASE_LENGTH
    RADIUS = math.sqrt(Robot.HALF_WIDTH**2 + END_EFFECTOR_LOCATION**2)
    COLOR = (0.1765, 0.1765, 0.1765, 1)  # Dark gray

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.cube_dist_closer = 0

    def store_new_action(self, action):
        super().store_new_action(action)
        self.cube_dist_closer = 0

    def process_cube_success(self):
        super().process_cube_success()
        self.cubes_with_reward += 1

    def process_cube_position(self, cube_id, initial_cube_positions):
        if cube_id not in initial_cube_positions:
            return
        cube_position = self.env.get_cube_position(cube_id)
        dist_closer = self.mapper.distance_to_receptacle(initial_cube_positions[cube_id]) - self.mapper.distance_to_receptacle(cube_position)
        self.cube_dist_closer += dist_closer


class RobotWithHooks(Robot):
    NUM_OUTPUT_CHANNELS = 2
    END_EFFECTOR_DIST_THRESHOLD = VectorEnv.CUBE_WIDTH
    END_EFFECTOR_THICKNESS = 0.008  # 8 mm
    END_EFFECTOR_GAP_SIZE = 0.001  # 1 mm gap makes p.stepSimulation faster by avoiding unnecessary collisions

    @abstractmethod  # Should not be instantiated directly
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.potential_cube_id = None
        self.end_effector_id = self._create_end_effector_shape()  # For collision detection (real robot detaches shape while aligning with cube)
        self.end_effector_cid = None
        self.attach_end_effector_shape()

    def reset_pose(self, *args):
        super().reset_pose(*args)
        if self.end_effector_cid is not None:
            # Reset pose of end effector shape
            self.detach_end_effector_shape()
            self.attach_end_effector_shape()

    def ray_test_cube(self):
        # World coordinates
        target_position, target_heading = self.waypoint_positions[-1], self.waypoint_headings[-1]
        ray_from = (
            target_position[0] + math.cos(target_heading) * self.END_EFFECTOR_LOCATION,
            target_position[1] + math.sin(target_heading) * self.END_EFFECTOR_LOCATION,
            VectorEnv.CUBE_WIDTH / 2
        )
        ray_to = (
            target_position[0] + math.cos(target_heading) * self.END_EFFECTOR_LOCATION + math.cos(target_heading) * RobotWithHooks.END_EFFECTOR_DIST_THRESHOLD,
            target_position[1] + math.sin(target_heading) * self.END_EFFECTOR_LOCATION + math.sin(target_heading) * RobotWithHooks.END_EFFECTOR_DIST_THRESHOLD,
            VectorEnv.CUBE_WIDTH / 2
        )
        self.env.p.addUserDebugLine(ray_from, ray_to, (0, 0, 1))
        body_id = self.env.p.rayTestBatch([ray_from], [ray_to])[0][0]
        if body_id in self.env.available_cube_ids_set:
            return body_id
        return None

    def attach_end_effector_shape(self):
        if self.end_effector_cid is not None:
            return

        # Move to front of robot
        box_width = RobotWithHooks.END_EFFECTOR_THICKNESS - RobotWithHooks.END_EFFECTOR_GAP_SIZE
        x_offset = Robot.BACKPACK_OFFSET + self.BASE_LENGTH + RobotWithHooks.END_EFFECTOR_GAP_SIZE + box_width / 2
        height = RobotWithHooks.END_EFFECTOR_GAP_SIZE + box_width / 2
        current_position, current_heading = self.get_position(), self.get_heading()
        parent_frame_position_world = (
            current_position[0] + x_offset * math.cos(current_heading),
            current_position[1] + x_offset * math.sin(current_heading),
            height
        )
        self.env.p.resetBasePositionAndOrientation(self.end_effector_id, parent_frame_position_world, heading_to_orientation(current_heading))

        # Create constraint
        parent_frame_position = (x_offset, 0, height)
        self.end_effector_cid = self.env.p.createConstraint(self.id, -1, self.end_effector_id, -1, pybullet.JOINT_FIXED, None, parent_frame_position, (0, 0, 0))

        # Collision detection
        self.collision_body_a_ids_set.add(self.end_effector_id)
        self.env.robot_collision_body_b_ids_set.add(self.end_effector_id)

    def detach_end_effector_shape(self):
        self.env.p.removeConstraint(self.end_effector_cid)
        self.end_effector_cid = None
        self.env.p.resetBasePositionAndOrientation(self.end_effector_id, (0, 0, VectorEnv.REMOVED_BODY_Z), VectorEnv.IDENTITY_QUATERNION)
        self.collision_body_a_ids_set.remove(self.end_effector_id)
        self.env.robot_collision_body_b_ids_set.remove(self.end_effector_id)

    def _create_end_effector_shape(self):
        # Add a box to approximate the triangle in front of the robot
        box_width = RobotWithHooks.END_EFFECTOR_THICKNESS - RobotWithHooks.END_EFFECTOR_GAP_SIZE
        half_extents = (box_width / 2, 0.018 / 2, box_width / 2)
        collision_shape_id = self.env.p.createCollisionShape(pybullet.GEOM_BOX, halfExtents=half_extents)
        visual_shape_id = self.env.p.createVisualShape(pybullet.GEOM_BOX, halfExtents=half_extents, rgbaColor=(0, 0, 0, 0))  # Make invisible by setting alpha to 0
        return self.env.p.createMultiBody(0.001, collision_shape_id, visual_shape_id)

class LiftingRobot(RobotWithHooks):
    LIFTED_CUBE_HEIGHT = 0.04
    LIFTED_CUBE_OFFSET = -0.007  # Cube moves backwards towards robot when lifted

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.lift_state = 'ready'
        self.lift_cid = None
        self.cube_id = None

        # Partial rewards
        self.initial_cube_position = None
        self.cube_dist_closer = 0

        # Stats
        self.pointless_cube_drop = False

    def store_new_action(self, action):
        super().store_new_action(action)
        self.potential_cube_id = self.ray_test_cube() if self.lift_state == 'ready' else None
        self.cube_dist_closer = 0
        self.pointless_cube_drop = False

    def process_cube_success(self, lifted=False):  # pylint: disable=arguments-differ
        super().process_cube_success()
        if lifted:
            self.cubes_with_reward += 1

    def reset_pose(self, *args):
        super().reset_pose(*args)
        if self.lift_state == 'lifting':
            self._reset_lifted_cube_pose()

    def process_lifted_cube_position(self, cube_position=None):
        if cube_position is None:
            cube_position = self.env.get_cube_position(self.cube_id)
        # delete cube
        # dist_closer = self.mapper.distance_to_receptacle(self.initial_cube_position) - self.mapper.distance_to_receptacle(cube_position)
        # self.cube_dist_closer += dist_closer
        # self.initial_cube_position = self.env.get_cube_position(self.cube_id)

    def lift_cube(self, cube_id):
        # Update variables and environment state
        self.cube_id = cube_id
        self.lift_state = 'lifting'
        self.env.available_cube_ids_set.remove(self.cube_id)
        self.collision_body_a_ids_set.add(self.cube_id)
        self.env.robot_collision_body_b_ids_set.add(self.cube_id)

        # Store initial cube position for partial rewards
        self.initial_cube_position = self.env.get_cube_position(self.cube_id)

        # Move cube to lifted position
        self._reset_lifted_cube_pose()

        # Create constraint
        offset = LiftingRobot.END_EFFECTOR_LOCATION + LiftingRobot.LIFTED_CUBE_OFFSET + VectorEnv.CUBE_WIDTH / 2
        parent_frame_position = (offset, 0, LiftingRobot.LIFTED_CUBE_HEIGHT + VectorEnv.CUBE_WIDTH / 2)
        self.lift_cid = self.env.p.createConstraint(self.id, -1, self.cube_id, -1, pybullet.JOINT_FIXED, None, parent_frame_position, (0, 0, 0))

    def try_drop_cube(self):
        # Process final cube position for partial rewards
        cube_position = self.env.get_cube_position(self.cube_id)
        self.process_lifted_cube_position(cube_position)

        if self.env.cube_position_in_receptacle(cube_position):
            # Remove constraint
            self.env.p.removeConstraint(self.lift_cid)
            self.lift_cid = None

            # Move cube forward beyond end effector
            current_position, current_heading = self.get_position(), self.get_heading()
            offset = LiftingRobot.END_EFFECTOR_LOCATION + RobotWithHooks.END_EFFECTOR_THICKNESS + VectorEnv.CUBE_WIDTH / 2
            cube_position = (
                current_position[0] + offset * math.cos(current_heading),
                current_position[1] + offset * math.sin(current_heading),
                Robot.HEIGHT + VectorEnv.CUBE_WIDTH / 2
            )
            self.env.p.resetBasePositionAndOrientation(self.cube_id, cube_position, heading_to_orientation(current_heading))

            # Update variables and environment state
            if self.env.cube_position_in_receptacle(cube_position):
                self.process_cube_success(lifted=True)
                self.env.remove_cube(self.cube_id)
            else:
                self.env.available_cube_ids_set.add(self.cube_id)
                self.pointless_cube_drop = True
            self.lift_state = 'ready'
            self.collision_body_a_ids_set.remove(self.cube_id)
            self.env.robot_collision_body_b_ids_set.remove(self.cube_id)
            self.initial_cube_position = None
            self.cube_id = None
            return True
        else:
            return False

    def drop_cube(self):
        # Process final cube position for partial rewards
        cube_position = self.env.get_cube_position(self.cube_id)
        self.process_lifted_cube_position(cube_position)

        # Remove constraint
        self.env.p.removeConstraint(self.lift_cid)
        self.lift_cid = None

        # Move cube forward beyond end effector
        current_position, current_heading = self.get_position(), self.get_heading()
        offset = LiftingRobot.END_EFFECTOR_LOCATION + RobotWithHooks.END_EFFECTOR_THICKNESS + VectorEnv.CUBE_WIDTH / 2
        cube_position = (
            current_position[0] + offset * math.cos(current_heading),
            current_position[1] + offset * math.sin(current_heading),
            Robot.HEIGHT + VectorEnv.CUBE_WIDTH / 2
        )

        self.env.p.resetBasePositionAndOrientation(self.cube_id, cube_position, heading_to_orientation(current_heading))

        # Update variables and environment state
        if self.env.cube_position_in_receptacle(cube_position):
            self.process_cube_success(lifted=True)
            self.env.remove_cube(self.cube_id)
        else:
            self.env.available_cube_ids_set.add(self.cube_id)
            self.pointless_cube_drop = True
        self.lift_state = 'ready'
        self.collision_body_a_ids_set.remove(self.cube_id)
        self.env.robot_collision_body_b_ids_set.remove(self.cube_id)
        self.initial_cube_position = None
        self.cube_id = None

    def _reset_lifted_cube_pose(self):
        current_position, current_heading = self.get_position(), self.get_heading()
        offset = LiftingRobot.END_EFFECTOR_LOCATION + LiftingRobot.LIFTED_CUBE_OFFSET + VectorEnv.CUBE_WIDTH / 2
        cube_position = (
            current_position[0] + offset * math.cos(current_heading),
            current_position[1] + offset * math.sin(current_heading),
            LiftingRobot.LIFTED_CUBE_HEIGHT + VectorEnv.CUBE_WIDTH / 2
        )
        self.env.p.resetBasePositionAndOrientation(self.cube_id, cube_position, heading_to_orientation(current_heading))

class ThrowingRobot(RobotWithHooks):
    BASE_LENGTH = Robot.BASE_LENGTH + 0.006  # 6 mm offset
    END_EFFECTOR_LOCATION = Robot.BACKPACK_OFFSET + BASE_LENGTH
    RADIUS = math.sqrt(Robot.HALF_WIDTH**2 + END_EFFECTOR_LOCATION**2)
    COLOR = (0.5294, 0.5294, 0.5294, 1)  # Light gray

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.cube_id = None
        self.initial_cube_position = None
        self.cube_dist_closer = 0

    def store_new_action(self, action):
        super().store_new_action(action)
        self.potential_cube_id = self.ray_test_cube()
        self.cube_dist_closer = 0

    def process_cube_success(self, thrown=False):  # pylint: disable=arguments-differ
        super().process_cube_success()
        if thrown:
            self.cubes_with_reward += 1

    def prepare_throw_cube(self, cube_id):
        # Update variables and environment state
        self.cube_id = cube_id
        self.env.available_cube_ids_set.remove(self.cube_id)

        # Store initial cube position for partial rewards
        self.initial_cube_position = self.env.get_cube_position(self.cube_id)

    def throw_cube(self):
        assert not self.real
        # Move cube over back of robot
        current_position, current_heading = self.get_position(), self.get_heading()
        back_position = (
            current_position[0] + Robot.BACKPACK_OFFSET * math.cos(current_heading),
            current_position[1] + Robot.BACKPACK_OFFSET * math.sin(current_heading),
            Robot.HEIGHT + VectorEnv.CUBE_WIDTH  # Half cube width above robot
        )
        self.env.p.resetBasePositionAndOrientation(self.cube_id, back_position, heading_to_orientation(current_heading))

        # Apply force and torque
        force_x = self.env.robot_random_state.normal(5.5, 0.75) * 40
        force_y = self.env.robot_random_state.normal(1.5, 0.75) * (-1 if self.env.robot_random_state.rand() < 0.5 else 1) * 40
        self.env.p.applyExternalForce(self.cube_id, -1, (-force_x, -force_y, 0), (0, 0, 0), flags=pybullet.LINK_FRAME)
        self.env.p.applyExternalTorque(self.cube_id, -1, (0, -0.03, 0), flags=pybullet.WORLD_FRAME)  # See https://github.com/bulletphysics/bullet3/issues/1949

    def finish_throw_cube(self):
        # Process final cube position for partial rewards
        cube_position = self.env.get_cube_position(self.cube_id)

        # Update variables and environment state
        if self.env.cube_position_in_receptacle(cube_position):
            self.process_cube_success(thrown=True)
            self.env.remove_cube(self.cube_id)
        else:
            self.env.available_cube_ids_set.add(self.cube_id)
        self.cube_id = None

class RescueRobot(RobotWithHooks):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.cube_id = None

    def store_new_action(self, action):
        super().store_new_action(action)
        self.potential_cube_id = self.ray_test_cube()

    def process_cube_success(self):
        super().process_cube_success()
        self.cubes_with_reward += 1

    def prepare_rescue_cube(self, cube_id):
        self.cube_id = cube_id
        self.env.available_cube_ids_set.remove(cube_id)

    def rescue_cube(self):
        # Update variables and environment state
        self.process_cube_success()
        self.env.remove_cube(self.cube_id)
        self.cube_id = None

class RobotController:
    DRIVE_STEP_SIZE = 0.01  # 5 mm results in exactly 1 mm per simulation step
    TURN_STEP_SIZE = math.radians(15)  # 15 deg results in exactly 3 deg per simulation step

    def __init__(self, robot):
        self.robot = robot
        self.state = 'idle'
        self.waypoint_index = None  # Index of waypoint we are currently headed towards
        self.prev_position = None  # Position before call to p.stepSimulation()
        self.prev_heading = None
        self.sim_steps = 0
        self.consecutive_turning_sim_steps = None  # Used to detect if robot is stuck and oscillating
        self.manipulation_sim_step_target = 0
        self.manipulation_sim_steps = 0

    def reset(self):
        self.state = 'idle'
        self.waypoint_index = 1
        self.prev_position = None
        self.prev_heading = None
        self.sim_steps = 0
        self.consecutive_turning_sim_steps = 0
        self.robot.reward = 0
        self.robot.drop_reward = 0

    def new_action(self):
        self.state = 'moving'

    def step(self):
        # States: idle, moving, manipulating

        assert not self.state == 'idle'
        self.sim_steps += 1
        self.robot.reward = 0
        self.robot.drop_reward = 0

        if self.state == 'moving':
            current_position, current_heading = self.robot.get_position(), self.robot.get_heading()

            # First check change after sim step
            if self.prev_position is not None:

                # Detect if robot is still moving
                driving = distance(self.prev_position, current_position) > 0.0005  # 0.5 mm
                turning = abs(heading_difference(self.prev_heading, current_heading)) > math.radians(1)  # 1 deg
                self.consecutive_turning_sim_steps = (self.consecutive_turning_sim_steps + 1) if turning else 0
                stuck_oscillating = self.consecutive_turning_sim_steps > 100  # About 60 sim steps is sufficient for turning 180 deg
                not_moving = (not driving and not turning) or stuck_oscillating

                # Check for collisions
                if distance(self.robot.waypoint_positions[0], current_position) > RobotController.DRIVE_STEP_SIZE or not_moving:
                    self.robot.check_for_collisions()

                # Check if step limit exceeded (expect this won't ever happen, but just in case)
                step_limit_exceeded = self.sim_steps > 3200

                if self.robot.collided_with_obstacle or self.robot.collided_with_robot or step_limit_exceeded:
                    self.robot.update_distance()
                    self.state = 'idle'

                if self.state == 'moving' and not_moving:
                    # Reached current waypoint, move on to next waypoint
                    self.robot.update_distance()
                    if self.waypoint_index == len(self.robot.waypoint_positions) - 1:
                        self._done_moving()
                    else:
                        self.waypoint_index += 1

            # If still moving, set constraint for new pose
            if self.state == 'moving':
                new_position, new_heading = current_position, current_heading

                # Determine whether to turn or drive
                heading_diff = heading_difference(current_heading, self.robot.waypoint_headings[self.waypoint_index])
                if abs(heading_diff) > RobotController.TURN_STEP_SIZE:
                    new_heading += math.copysign(1, heading_diff) * RobotController.TURN_STEP_SIZE
                else:
                    curr_waypoint_position = self.robot.waypoint_positions[self.waypoint_index]
                    dx = curr_waypoint_position[0] - current_position[0]
                    dy = curr_waypoint_position[1] - current_position[1]
                    if distance(current_position, curr_waypoint_position) < RobotController.DRIVE_STEP_SIZE:
                        new_position = curr_waypoint_position
                    else:
                        move_sign = 1
                        new_heading = math.atan2(move_sign * dy, move_sign * dx)
                        new_position = (
                            new_position[0] + move_sign * RobotController.DRIVE_STEP_SIZE * math.cos(new_heading),
                            new_position[1] + move_sign * RobotController.DRIVE_STEP_SIZE * math.sin(new_heading),
                            new_position[2]
                        )

                # Set constraint
                self.robot.env.p.changeConstraint(
                    self.robot.cid, jointChildPivot=new_position, jointChildFrameOrientation=heading_to_orientation(new_heading), maxForce=Robot.CONSTRAINT_MAX_FORCE)

            self.prev_position, self.prev_heading = current_position, current_heading

        elif self.state == 'manipulating':
            self.manipulation_sim_steps += 1
            if self.manipulation_sim_steps >= self.manipulation_sim_step_target:
                self.manipulation_sim_step_target = 0
                self.manipulation_sim_steps = 0
                if isinstance(self.robot, ThrowingRobot):
                    self.robot.finish_throw_cube()
                self.state = 'idle'

    def get_intention_path(self):
        return [self.robot.get_position()] + self.robot.waypoint_positions[self.waypoint_index:-1] + [self.robot.target_end_effector_position]

    def get_history_path(self):
        return self.robot.waypoint_positions[:self.waypoint_index] + [self.robot.get_position()]

    def _done_moving(self):
        self.state = 'idle'
        if isinstance(self.robot, LiftingRobot) and self.robot.lift_state == 'lifting':
            if self.robot.try_drop_cube():
                self.robot.drop_reward += 1
        elif isinstance(self.robot, RobotWithHooks):
            if self.robot.potential_cube_id is not None and distance(self.robot.get_position(), self.robot.waypoint_positions[-1]) < RobotController.DRIVE_STEP_SIZE:
                cube_id = self.robot.ray_test_cube()
                if cube_id is not None:
                    if isinstance(self.robot, LiftingRobot):
                        self.robot.lift_cube(cube_id)
                        self.robot.reward += 1
                    elif isinstance(self.robot, ThrowingRobot):
                        self.robot.prepare_throw_cube(cube_id)
                        self.robot.throw_cube()
                        self.state = 'manipulating'
                        self.manipulation_sim_step_target = 100
                    elif isinstance(self.robot, RescueRobot):
                        self.robot.prepare_rescue_cube(cube_id)
                        self.robot.rescue_cube()

def distance(p1, p2):
    return math.sqrt((p2[0] - p1[0])**2 + (p2[1] - p1[1])**2)

def orientation_to_heading(o):
    # Note: Only works for z-axis rotations
    return 2 * math.acos(math.copysign(1, o[2]) * o[3])

def heading_to_orientation(h):
    return pybullet.getQuaternionFromEuler((0, 0, h))

def restrict_heading_range(h):
    return (h + math.pi) % (2 * math.pi) - math.pi

def heading_difference(h1, h2):
    return restrict_heading_range(h2 - h1)

def dot(a, b):
    return a[0] * b[0] + a[1] * b[1]
