import numpy as np
from typing import Tuple
from typing import Optional
from collections import namedtuple

import gym
from gym import spaces
from gym.utils import seeding
gym.logger.set_level(40)  # noqa

from gym_hybrid.agents import BaseAgent
from gym_hybrid.agents import MovingAgent
from gym_hybrid.agents import SlidingAgent
from gym_hybrid.agents import HardMoveAgent

from shapely.geometry import Point, Polygon, LineString


# Action Id
ACCELERATE = 0
TURN = 1
BREAK = 2


Target = namedtuple('Target', ['x', 'y', 'radius'])
Barrier = namedtuple('Barrier', ['x', 'y', 'radius'])



def compute_barrier_polygon(barrier):
    
    barrier_polygons = []
    
    for i in range(len(barrier)):
        barrier_circle = Point(barrier[i].x,barrier[i].y).buffer(barrier[i].radius)
        
        barrier_polygons.append(Polygon(barrier_circle))
        
    return barrier_polygons
    

def compute_field_polygon(field_size):
    
    field_polygon = Polygon([
        (-field_size,-field_size),
        (field_size,-field_size),
        (field_size,field_size),
        (-field_size,field_size),
    ])
    
    return field_polygon
    


def is_point_on_polygon_edge(point, polygon):
    '''
    Whether the point is on all four sides of a polygon
    '''
    for i in range(len(polygon.exterior.coords) - 1):
        edge = LineString([polygon.exterior.coords[i], polygon.exterior.coords[i + 1]])
        if point.distance(edge) < 1e-9: 
            return True
    return False


def is_point_on_circle_edge(point, circle):
    '''
    Check that the point is on the edge of the circle
    '''
    return point.distance(circle.exterior) < 1e-9  


def lidar_detection_distance(agent,barriers,field):
    
    agent_center = Point(agent.x,agent.y)
        
    ray_length = 2
    ray_num = 8
        
    #Results of distance from point to obstacle or boundary intersection
    obs_dis = []
    wal_dis = []
    min_dis = []
        
    for i in range(ray_num):
         
        #agent's perspective
        angle = (agent.theta + (np.pi/4)*i) % (2 * np.pi)
            
        #compute ray end point
        end_x = agent.x + ray_length*np.cos(angle)
        end_y = agent.y + ray_length*np.sin(angle)
        #creat ray
        ray = LineString([agent_center, (end_x, end_y)])
            
        #The minimum distance is initialized as the maximum length of the ray
        obs_min_distance = ray_length
        wal_min_distance = ray_length
            
        #check the intersection with each obstacle
        for obstacle in barriers:
            if obstacle.contains(agent_center):
                obs_min_distance = -1
            else:
                intersection = ray.intersection(obstacle)
                if not intersection.is_empty:
                    if intersection.geom_type == 'Point':
                        distance = agent_center.distance(intersection)
                        obs_min_distance = min(obs_min_distance,distance)
                    elif intersection.geom_type == 'MultiPoint':
                        for point in intersection.geoms:
                            distance = agent_center.distance(point)
                            obs_min_distance = min(obs_min_distance, distance)
                    elif intersection.geom_type == 'LineString':
                        for point in intersection.coords:
                            point = Point(point)
                            point_on_edge = is_point_on_circle_edge(point, obstacle)
                            if point_on_edge:
                                distance = agent_center.distance(point)
                                obs_min_distance = min(obs_min_distance, distance)  
                                
        obs_dis.append(obs_min_distance)
        
        #check the intersection with each field
        for obstacle in [field]:
            if not obstacle.contains(agent_center):
                wal_min_distance = -1
            else:
                intersection = ray.intersection(obstacle)
                if not intersection.is_empty:
                    if intersection.geom_type == 'Point':
                        distance = agent_center.distance(intersection)
                        wal_min_distance = min(wal_min_distance, distance)
                    elif intersection.geom_type == 'MultiPoint':
                        for point in intersection.geoms:
                            distance = agent_center.distance(point)
                            wal_min_distance = min(wal_min_distance, distance)
                    elif intersection.geom_type == 'LineString':
                        for point in intersection.coords:
                            point = Point(point)
                            point_on_edge = is_point_on_polygon_edge(point, obstacle)
                            if point_on_edge:
                                distance = agent_center.distance(point)
                                wal_min_distance = min(wal_min_distance, distance)
                                
        wal_dis.append(wal_min_distance)
        
    #output minimum distance
    min_dis = [min(dis1, dis2) for dis1, dis2 in zip(obs_dis, wal_dis)]
    
    return min_dis

                        
class Action:
    """"
    Action class to store and standardize the action for the environment.
    """
    def __init__(self, id_: int, parameters: list):
        """"
        Initialization of an action.

        Args:
            id_: The id of the selected action.
            parameters: The parameters of an action.
        """
        self.id = id_
        self.parameters = parameters

    @property
    def parameter(self) -> float:
        """"
        Property method to return the parameter related to the action selected.

        Returns:
            The parameter related to this action_id
        """
        if len(self.parameters) == 2:
            return self.parameters[self.id]
        else:
            return self.parameters[0]


class MovingBaseEnv(gym.Env):
    """"
    Gym environment parent class.
    """
    def __init__(
            self,
            seed: Optional[int] = None,
            max_turn: float = np.pi/2,
            max_acceleration: float = 0.5,
            delta_t: float = 0.005,
            max_step: int = 200,
            penalty: float = 0.000,
            break_value: float = 0.1,
    ):
        """Initialization of the gym environment.

        Args:
            seed (int): Seed used to get reproducible results.
            max_turn (float): Maximum turn during one step (in radian).
            max_acceleration (float): Maximum acceleration during one step.
            delta_t (float): Time duration of one step.
            max_step (int): Maximum number of steps in one episode.
            penalty (float): Score penalty given at the agent every step.
            break_value (float): Break value when performing break action.
        """
        # Agent Parameters
        self.max_turn = max_turn
        self.max_acceleration = max_acceleration
        self.break_value = break_value

        # Environment Parameters
        self.delta_t = delta_t
        self.max_step = max_step
        self.field_size = 1.0
        self.target_radius = 0.1
        self.penalty = penalty
        
        #barrier
        self.barrier_radius = 0.07
        self.barrier_num = 8

        # Initialization
        self.seed(seed)
        self.target = None
        self.viewer = None
        self.current_step = None
        self.agent = BaseAgent(break_value=break_value, delta_t=delta_t)

        parameters_min = np.array([0, -1])
        parameters_max = np.array([1, +1])

        self.action_space = spaces.Tuple((spaces.Discrete(3),
                                          spaces.Box(parameters_min, parameters_max)))
        self.observation_space = spaces.Box(np.ones(10+8), -np.ones(10+8))


    def seed(self, seed: Optional[int] = None) -> list:
        self.np_random, seed = seeding.np_random(seed)  # noqa
        return [seed]

    def reset(self) -> list:
        self.current_step = 0

        limit = self.field_size-self.target_radius
        low = [-limit, -limit, self.target_radius]
        high = [limit, limit, self.target_radius]
        self.target = Target(*self.np_random.uniform(low, high))
        
        #barrier
        limit_barrier = self.field_size-self.barrier_radius
        low = [-limit_barrier, -limit_barrier, self.barrier_radius]
        high = [limit_barrier, limit_barrier, self.barrier_radius]
        self.barrier_circles = []
        while len(self.barrier_circles) < self.barrier_num:
            barriers = Barrier(*self.np_random.uniform(low, high))
            distance_barrier = np.sqrt(((self.target.x - barriers.x) ** 2) + ((self.target.y - barriers.y) ** 2))
            if distance_barrier >= self.target_radius + self.barrier_radius:
                self.barrier_circles.append(barriers)
        

        low = [-self.field_size, -self.field_size, 0]
        high = [self.field_size, self.field_size, 2 * np.pi]
        j=1
        while j==1:
            agent = self.np_random.uniform(low, high)
            vio_num = 0
            for i in range(len(self.barrier_circles)):
                dis_agent = np.sqrt(((agent[0] - self.barrier_circles[i].x) ** 2) + ((agent[1] - self.barrier_circles[i].y) ** 2))
                if dis_agent < self.barrier_radius:
                    vio_num = vio_num +1
            if vio_num ==0:
                j=0
                
        self.circle_barrier_polygons = compute_barrier_polygon(self.barrier_circles)
        
        self.field_polygons = compute_field_polygon(self.field_size)
                
        self.agent.reset(*agent)

        return self.get_state()

    def step(self, raw_action: Tuple[int, list]) -> Tuple[list, float, bool, dict]:
        action = Action(*raw_action)
        last_distance = self.distance
        self.current_step += 1

        if action.id == TURN:
            rotation = self.max_turn * max(min(action.parameter, 1), -1)
            self.agent.turn(rotation)
        elif action.id == ACCELERATE:
            acceleration = self.max_acceleration * max(min(action.parameter, 1), 0)
            self.agent.accelerate(acceleration)
        elif action.id == BREAK:
            self.agent.break_()

        if self.distance < self.target_radius and self.agent.speed == 0:
            reward = self.get_reward(last_distance, True)
            done = True
        elif abs(self.agent.x) > self.field_size or abs(self.agent.y) > self.field_size or self.current_step > self.max_step:
            reward = -1
            done = True
        else:
            reward = self.get_reward(last_distance)
            done = False
            
        num_violation = 0
        for i in range(len(self.barrier_circles)):
            if np.sqrt(((self.agent.x - self.barrier_circles[i].x) ** 2) + ((self.agent.y - self.barrier_circles[i].y) ** 2))<self.barrier_radius:
                num_violation = num_violation + 1
        cost = num_violation*2

        return self.get_state(), reward, cost, done, {}

    def get_state(self) -> list:
        state_primary_env = [
            self.agent.x,
            self.agent.y,
            self.agent.speed,
            np.cos(self.agent.theta),
            np.sin(self.agent.theta),
            self.target.x,
            self.target.y,
            self.distance,
            0 if self.distance > self.target_radius else 1,
            self.current_step / self.max_step
        ]

        
        state_lidar = lidar_detection_distance(self.agent,self.circle_barrier_polygons,self.field_polygons)
        
        state = np.array(state_primary_env+state_lidar)
        
            
        return state

    def get_reward(self, last_distance: float, goal: bool = False) -> float:
        return last_distance - self.distance - self.penalty + (1 if goal else 0)

    @property
    def distance(self) -> float:
        return self.get_distance(self.agent.x, self.agent.y, self.target.x, self.target.y)

    @staticmethod
    def get_distance(x1: float, y1: float, x2: float, y2: float) -> float:
        return np.sqrt(((x1 - x2) ** 2) + ((y1 - y2) ** 2))
    
    def render(self, mode='human'):
        screen_width = 400
        screen_height = 400
        unit_x = screen_width / 2
        unit_y = screen_height / 2
        agent_radius = 0.05
        ray_length = 2

        if self.viewer is None:
            from gym.envs.classic_control import rendering
            self.viewer = rendering.Viewer(screen_width, screen_height)

            agent = rendering.make_circle(unit_x * agent_radius)
            self.agent_trans = rendering.Transform(translation=(unit_x * (1 + self.agent.x), unit_y * (1 + self.agent.y)))  # noqa
            agent.add_attr(self.agent_trans)
            agent.set_color(0.1, 0.3, 0.9)
            self.viewer.add_geom(agent)

            t, r, m = 0.1 * unit_x, 0.04 * unit_y, 0.06 * unit_x
            arrow = rendering.FilledPolygon([(t, 0), (m, r), (m, -r)])
            self.arrow_trans = rendering.Transform(rotation=self.agent.theta)  # noqa
            arrow.add_attr(self.arrow_trans)
            arrow.add_attr(self.agent_trans)
            arrow.set_color(0, 0, 0)
            self.viewer.add_geom(arrow)

            target = rendering.make_circle(unit_x * self.target_radius)
            target_trans = rendering.Transform(translation=(unit_x * (1 + self.target.x), unit_y * (1 + self.target.y)))
            target.add_attr(target_trans)
            target.set_color(1, 0.5, 0.5)
            self.viewer.add_geom(target)
            
            for i in range(len(self.barrier_circles)):
                barrier = rendering.make_circle(unit_x * self.barrier_radius)
                barrier_trans = rendering.Transform(translation=(unit_x * (1 + self.barrier_circles[i].x), unit_y * (1 + self.barrier_circles[i].y)))
                barrier.add_attr(barrier_trans)
                barrier.set_color(0.8,0.6,0)
                self.viewer.add_geom(barrier)  
                
            self.rays = []
            for i in range(8):
                ray_angle = (self.agent.theta + (np.pi/4)*i) % (2 * np.pi)
                start_x = unit_x * (1 + self.agent.x)
                start_y = unit_y * (1 + self.agent.y)
                end_x = start_x + unit_x * ray_length * np.cos(ray_angle)
                end_y = start_y + unit_y * ray_length * np.sin(ray_angle)
                ray = rendering.Line((start_x, start_y), (end_x, end_y))
                ray.set_color(1, 0, 0)
                self.viewer.add_geom(ray)
                self.rays.append(ray)
            
        self.arrow_trans.set_rotation(self.agent.theta)
        self.agent_trans.set_translation(unit_x * (1 + self.agent.x), unit_y * (1 + self.agent.y))
        
        for i in range(8):
            ray_angle = (self.agent.theta + (np.pi/4)*i) % (2 * np.pi)
            start_x = unit_x * (1 + self.agent.x)
            start_y = unit_y * (1 + self.agent.y)
            end_x = start_x + unit_x * ray_length * np.cos(ray_angle)
            end_y = start_y + unit_y * ray_length * np.sin(ray_angle)
            self.rays[i].start = (start_x, start_y)
            self.rays[i].end = (end_x, end_y)
        
        return self.viewer.render(return_rgb_array=mode == 'rgb_array')

    def close(self):
        if self.viewer:
            self.viewer.close()
            self.viewer = None
            
class SlidingBaseEnv(gym.Env):
    """"
    Gym environment parent class.
    """
    def __init__(
            self,
            seed: Optional[int] = None,
            max_turn: float = np.pi/2,
            max_acceleration: float = 0.5,
            delta_t: float = 0.005,
            max_step: int = 200,
            penalty: float = 0.000,
            break_value: float = 0.1,
    ):
        """Initialization of the gym environment.

        Args:
            seed (int): Seed used to get reproducible results.
            max_turn (float): Maximum turn during one step (in radian).
            max_acceleration (float): Maximum acceleration during one step.
            delta_t (float): Time duration of one step.
            max_step (int): Maximum number of steps in one episode.
            penalty (float): Score penalty given at the agent every step.
            break_value (float): Break value when performing break action.
        """
        # Agent Parameters
        self.max_turn = max_turn
        self.max_acceleration = max_acceleration
        self.break_value = break_value

        # Environment Parameters
        self.delta_t = delta_t
        self.max_step = max_step
        self.field_size = 1.0
        self.target_radius = 0.1
        self.penalty = penalty
        
        #barrier
        self.barrier_radius = 0.07
        self.barrier_num = 8

        # Initialization
        self.seed(seed)
        self.target = None
        self.viewer = None
        self.current_step = None
        self.agent = BaseAgent(break_value=break_value, delta_t=delta_t)

        parameters_min = np.array([0, -1])
        parameters_max = np.array([1, +1])

        self.action_space = spaces.Tuple((spaces.Discrete(3),
                                          spaces.Box(parameters_min, parameters_max)))
        self.observation_space = spaces.Box(np.ones(10+8), -np.ones(10+8))


    def seed(self, seed: Optional[int] = None) -> list:
        self.np_random, seed = seeding.np_random(seed)  # noqa
        return [seed]

    def reset(self) -> list:
        self.current_step = 0

        limit = self.field_size-self.target_radius
        low = [-limit, -limit, self.target_radius]
        high = [limit, limit, self.target_radius]
        self.target = Target(*self.np_random.uniform(low, high))
        
        #barrier
        limit_barrier = self.field_size-self.barrier_radius
        low = [-limit_barrier, -limit_barrier, self.barrier_radius]
        high = [limit_barrier, limit_barrier, self.barrier_radius]
        self.barrier_circles = []
        while len(self.barrier_circles) < self.barrier_num:
            barriers = Barrier(*self.np_random.uniform(low, high))
            distance_barrier = np.sqrt(((self.target.x - barriers.x) ** 2) + ((self.target.y - barriers.y) ** 2))
            if distance_barrier >= self.target_radius + self.barrier_radius:
                self.barrier_circles.append(barriers)
        

        low = [-self.field_size, -self.field_size, 0]
        high = [self.field_size, self.field_size, 2 * np.pi]
        j=1
        while j==1:
            agent = self.np_random.uniform(low, high)
            vio_num = 0
            for i in range(len(self.barrier_circles)):
                dis_agent = np.sqrt(((agent[0] - self.barrier_circles[i].x) ** 2) + ((agent[1] - self.barrier_circles[i].y) ** 2))
                if dis_agent < self.barrier_radius:
                    vio_num = vio_num +1
            if vio_num ==0:
                j=0
                
        self.circle_barrier_polygons = compute_barrier_polygon(self.barrier_circles)
        
        self.field_polygons = compute_field_polygon(self.field_size)
                
        self.agent.reset(*agent)

        return self.get_state()

    def step(self, raw_action: Tuple[int, list]) -> Tuple[list, float, bool, dict]:
        action = Action(*raw_action)
        last_distance = self.distance
        self.current_step += 1

        if action.id == TURN:
            rotation = self.max_turn * max(min(action.parameter, 1), -1)
            self.agent.turn(rotation)
        elif action.id == ACCELERATE:
            acceleration = self.max_acceleration * max(min(action.parameter, 1), 0)
            self.agent.accelerate(acceleration)
        elif action.id == BREAK:
            self.agent.break_()

        if self.distance < self.target_radius and self.agent.speed == 0:
            reward = self.get_reward(last_distance, True)
            done = True
        elif abs(self.agent.x) > self.field_size or abs(self.agent.y) > self.field_size or self.current_step > self.max_step:
            reward = -1
            done = True
        else:
            reward = self.get_reward(last_distance)
            done = False
            
        num_violation = 0
        for i in range(len(self.barrier_circles)):
            if np.sqrt(((self.agent.x - self.barrier_circles[i].x) ** 2) + ((self.agent.y - self.barrier_circles[i].y) ** 2))<self.barrier_radius:
                num_violation = num_violation + 1
        cost = num_violation

        return self.get_state(), reward, cost, done, {}

    def get_state(self) -> list:
        state_primary_env = [
            self.agent.x,
            self.agent.y,
            self.agent.speed,
            np.cos(self.agent.theta),
            np.sin(self.agent.theta),
            self.target.x,
            self.target.y,
            self.distance,
            0 if self.distance > self.target_radius else 1,
            self.current_step / self.max_step
        ]

        
        state_lidar = lidar_detection_distance(self.agent,self.circle_barrier_polygons,self.field_polygons)
        
        state = np.array(state_primary_env+state_lidar)
        
            
        return state

    def get_reward(self, last_distance: float, goal: bool = False) -> float:
        return last_distance - self.distance - self.penalty + (1 if goal else 0)

    @property
    def distance(self) -> float:
        return self.get_distance(self.agent.x, self.agent.y, self.target.x, self.target.y)

    @staticmethod
    def get_distance(x1: float, y1: float, x2: float, y2: float) -> float:
        return np.sqrt(((x1 - x2) ** 2) + ((y1 - y2) ** 2))
    
    def render(self, mode='human'):
        screen_width = 400
        screen_height = 400
        unit_x = screen_width / 2
        unit_y = screen_height / 2
        agent_radius = 0.05
        ray_length = 2

        if self.viewer is None:
            from gym.envs.classic_control import rendering
            self.viewer = rendering.Viewer(screen_width, screen_height)

            agent = rendering.make_circle(unit_x * agent_radius)
            self.agent_trans = rendering.Transform(translation=(unit_x * (1 + self.agent.x), unit_y * (1 + self.agent.y)))  # noqa
            agent.add_attr(self.agent_trans)
            agent.set_color(0.1, 0.3, 0.9)
            self.viewer.add_geom(agent)

            t, r, m = 0.1 * unit_x, 0.04 * unit_y, 0.06 * unit_x
            arrow = rendering.FilledPolygon([(t, 0), (m, r), (m, -r)])
            self.arrow_trans = rendering.Transform(rotation=self.agent.theta)  # noqa
            arrow.add_attr(self.arrow_trans)
            arrow.add_attr(self.agent_trans)
            arrow.set_color(0, 0, 0)
            self.viewer.add_geom(arrow)

            target = rendering.make_circle(unit_x * self.target_radius)
            target_trans = rendering.Transform(translation=(unit_x * (1 + self.target.x), unit_y * (1 + self.target.y)))
            target.add_attr(target_trans)
            target.set_color(1, 0.5, 0.5)
            self.viewer.add_geom(target)
            
            for i in range(len(self.barrier_circles)):
                barrier = rendering.make_circle(unit_x * self.barrier_radius)
                barrier_trans = rendering.Transform(translation=(unit_x * (1 + self.barrier_circles[i].x), unit_y * (1 + self.barrier_circles[i].y)))
                barrier.add_attr(barrier_trans)
                barrier.set_color(0.8,0.6,0)
                self.viewer.add_geom(barrier)  
                
            self.rays = []
            for i in range(8):
                ray_angle = (self.agent.theta + (np.pi/4)*i) % (2 * np.pi)
                start_x = unit_x * (1 + self.agent.x)
                start_y = unit_y * (1 + self.agent.y)
                end_x = start_x + unit_x * ray_length * np.cos(ray_angle)
                end_y = start_y + unit_y * ray_length * np.sin(ray_angle)
                ray = rendering.Line((start_x, start_y), (end_x, end_y))
                ray.set_color(1, 0, 0)
                self.viewer.add_geom(ray)
                self.rays.append(ray)
            
        self.arrow_trans.set_rotation(self.agent.theta)
        self.agent_trans.set_translation(unit_x * (1 + self.agent.x), unit_y * (1 + self.agent.y))
        
        for i in range(8):
            ray_angle = (self.agent.theta + (np.pi/4)*i) % (2 * np.pi)
            start_x = unit_x * (1 + self.agent.x)
            start_y = unit_y * (1 + self.agent.y)
            end_x = start_x + unit_x * ray_length * np.cos(ray_angle)
            end_y = start_y + unit_y * ray_length * np.sin(ray_angle)
            self.rays[i].start = (start_x, start_y)
            self.rays[i].end = (end_x, end_y)
        
        return self.viewer.render(return_rgb_array=mode == 'rgb_array')

    def close(self):
        if self.viewer:
            self.viewer.close()
            self.viewer = None


class MovingEnv(MovingBaseEnv):
    def __init__(
            self,
            seed: int = None,
            max_turn: float = np.pi/2,
            max_acceleration: float = 0.5,
            delta_t: float = 0.005,
            max_step: int = 200,
            penalty: float = 0.000,
            break_value: float = 0.1,
    ):

        super(MovingEnv, self).__init__(
            seed=seed,
            max_turn=max_turn,
            max_acceleration=max_acceleration,
            delta_t=delta_t,
            max_step=max_step,
            penalty=penalty,
            break_value=break_value,
        )

        self.agent = MovingAgent(
            break_value=break_value,
            delta_t=delta_t,
        )
        

class SlidingEnv(SlidingBaseEnv):
    def __init__(
            self,
            seed: int = None,
            max_turn: float = np.pi/2,
            max_acceleration: float = 0.5,
            delta_t: float = 0.005,
            max_step: int = 200,
            penalty: float = 0.000,
            break_value: float = 0.1
    ):

        super(SlidingEnv, self).__init__(
            seed=seed,
            max_turn=max_turn,
            max_acceleration=max_acceleration,
            delta_t=delta_t,
            max_step=max_step,
            penalty=penalty,
            break_value=break_value
        )

        self.agent = SlidingAgent(
            break_value=break_value,
            delta_t=delta_t
        )
        

class HardMoveEnv(gym.Env):
    """"
    HardMove environment. Please refer to https://arxiv.org/abs/2109.05490 for details.
    """

    def __init__(
        self,
        num_actuators: int = 4,
        seed: Optional[int] = None,
        max_turn: float = np.pi / 2,
        max_acceleration: float = 0.5,
        delta_t: float = 0.005,
        max_step: int = 25,
        penalty: float = 0.000,
        break_value: float = 0.1,
    ):
        """Initialization of the gym environment.

        Args:
            seed (int): Seed used to get reproducible results.
            max_turn (float): Maximum turn during one step (in radian).
            max_acceleration (float): Maximum acceleration during one step.
            delta_t (float): Time duration of one step.
            max_step (int): Maximum number of steps in one episode.
            penalty (float): Score penalty given at the agent every step.
            break_value (float): Break value when performing break action.
        """
        # Agent Parameters
        self.num_actuators = num_actuators
        self.max_turn = max_turn
        self.max_acceleration = max_acceleration
        self.break_value = break_value

        # Environment Parameters
        self.delta_t = delta_t
        self.max_step = max_step
        self.field_size = 1.0
        self.target_radius = 0.1
        self.penalty = penalty

        #barrier
        self.barrier_radius = 0.1
        self.barrier_num = 8

        # Initialization
        self.seed(seed)
        self.target = None
        self.viewer = None
        self.current_step = None
        self.agent = HardMoveAgent(break_value=break_value, delta_t=delta_t, num_actuators=self.num_actuators)

        parameters_min = np.array([-1 for i in range(self.num_actuators)])
        parameters_max = np.array([+1 for i in range(self.num_actuators)])

        self.action_space = spaces.Tuple(
            (spaces.Discrete(int(2 ** self.num_actuators)), spaces.Box(parameters_min, parameters_max))
        )
        self.observation_space = spaces.Box(np.ones(10+8), -np.ones(10+8))

    def seed(self, seed: Optional[int] = None) -> list:
        self.np_random, seed = seeding.np_random(seed)  # noqa
        return [seed]

    def reset(self) -> list:
        self.current_step = 0

        limit = self.field_size - self.target_radius
        low = [-limit, -limit, self.target_radius]
        high = [limit, limit, self.target_radius]
        self.target = Target(*self.np_random.uniform(low, high))

        #barrier
        limit_barrier = self.field_size-self.barrier_radius
        low = [-limit_barrier, -limit_barrier, self.barrier_radius]
        high = [limit_barrier, limit_barrier, self.barrier_radius]
        self.barrier_circles = []
        while len(self.barrier_circles) < self.barrier_num:
            barriers = Barrier(*self.np_random.uniform(low, high))
            distance_barrier = np.sqrt(((self.target.x - barriers.x) ** 2) + ((self.target.y - barriers.y) ** 2))
            if distance_barrier >= self.target_radius + self.barrier_radius:
                self.barrier_circles.append(barriers)
    

        low = [-self.field_size, -self.field_size, 0]
        high = [self.field_size, self.field_size, 2 * np.pi]
        j=1
        while j==1:
            agent = self.np_random.uniform(low, high)
            vio_num = 0
            for i in range(len(self.barrier_circles)):
                dis_agent = np.sqrt(((agent[0] - self.barrier_circles[i].x) ** 2) + ((agent[1] - self.barrier_circles[i].y) ** 2))
                if dis_agent < self.barrier_radius:
                    vio_num = vio_num +1
            if vio_num ==0:
                j=0

        self.circle_barrier_polygons = compute_barrier_polygon(self.barrier_circles)

        self.field_polygons = compute_field_polygon(self.field_size)

        self.agent.reset(*agent)

        return self.get_state()

    def step(self, raw_action: Tuple[int, list]) -> Tuple[list, float, bool, dict]:
        move_direction_meta = raw_action[0]  # shape (1,) in {2**n}
        move_distances = raw_action[1]  # shape (2**n,)
        last_distance = self.distance
        self.current_step += 1

        self.agent.move(move_direction_meta, move_distances)
        if self.distance < self.target_radius:
            reward = self.get_reward(last_distance, True)
            done = True
        elif abs(self.agent.x) > self.field_size or abs(self.agent.y
                                                        ) > self.field_size or self.current_step > self.max_step:
            reward = -1
            done = True
        else:
            reward = self.get_reward(last_distance)
            done = False

        num_violation = 0
        for i in range(len(self.barrier_circles)):
            if np.sqrt(((self.agent.x - self.barrier_circles[i].x) ** 2) + ((self.agent.y - self.barrier_circles[i].y) ** 2))<self.barrier_radius:
                num_violation = num_violation + 1
        cost = num_violation * 10

        return self.get_state(), reward, cost, done, {}

    def get_state(self) -> list:
        state = [
            self.agent.x, self.agent.y, self.agent.speed,
            np.cos(self.agent.theta),
            np.sin(self.agent.theta), self.target.x, self.target.y, self.distance,
            0 if self.distance > self.target_radius else 1, self.current_step / self.max_step
        ]

        state_lidar = lidar_detection_distance(self.agent,self.circle_barrier_polygons,self.field_polygons)
        state = np.array(state+state_lidar)

        return state

    def get_reward(self, last_distance: float, goal: bool = False) -> float:
        return last_distance - self.distance - self.penalty + (1 if goal else 0)

    @property
    def distance(self) -> float:
        return self.get_distance(self.agent.x, self.agent.y, self.target.x, self.target.y)

    @staticmethod
    def get_distance(x1: float, y1: float, x2: float, y2: float) -> float:
        return np.sqrt(((x1 - x2) ** 2) + ((y1 - y2) ** 2)).item()
    
    # def render(self, mode='human'):
    #     screen_width = 400
    #     screen_height = 400
    #     unit_x = screen_width / 2
    #     unit_y = screen_height / 2
    #     agent_radius = 0.05

    #     if self.viewer is None:
    #         from gym.envs.classic_control import rendering
    #         self.viewer = rendering.Viewer(screen_width, screen_height)

    #         agent = rendering.make_circle(unit_x * agent_radius)

    def close(self):
        if self.viewer:
            self.viewer.close()
            self.viewer = None
