from math import *
import numpy as np
import torch
import random
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
np.random.seed(0)
torch.manual_seed(0)



class dynamics():
    def __init__(self,goal,obstacle): 
        self.x_lim=7.0
        self.y_lim=12.0
        self.x_initial=3.5
        self.y_initial=1.0
        self.state=np.array([self.x_initial,self.y_initial])
        self.dim_states=2
        self.dim_actions=2
        self.interval=0.1
        self.goal=goal
        self.obstacle=obstacle

    def reset(self, state="NULL"):
        if state=="NULL": 
            x= np.random.uniform(0,self.x_lim)
            y= np.random.uniform(0,self.y_lim)
            self.state=np.array([x,y])
            return self.state
        else:
            self.state=state
            return self.state

    def is_done(self):
        if abs(self.state[0]-self.goal[0])<=0.5 and abs(self.state[1]-self.goal[1])<=0.5:
            return 1
        if self.state[0]<=0.0 or self.state[0]>=self.x_lim or self.state[1]<=0.0 or self.state[1]>=self.y_lim:
            return 1
        if self.state[0]>=self.obstacle[0] and self.state[0]<=self.obstacle[0]+3.0 and self.state[1]>=self.obstacle[1] and self.state[1]<=self.obstacle[1]+1.0:
            return 1
        return 0           

    def reward(self):
        if sqrt((self.state[0]-self.goal[0])**2+(self.state[1]-self.goal[1])**2)<=0.15:
            return 100.0
        if self.state[0]>=self.obstacle[0] and self.state[0]<=self.obstacle[0]+5.0 and self.state[1]>=self.obstacle[1] and self.state[1]<=self.obstacle[1]+0.2:
            return -10.0
        else:
            return 0.0

    def cost_feature(self):
        if self.state[0]>=self.obstacle[0] and self.state[0]<=self.obstacle[0]+5.0 and self.state[1]>=self.obstacle[1] and self.state[1]<=self.obstacle[1]+0.2:
            return np.array([10.0])
        else:
            return np.array([0.0])

    def step(self, action):
        x=self.interval*(action[0]/sqrt(action[0]**2+action[1]**2))+self.state[0]
        y=self.interval*(action[1]/sqrt(action[0]**2+action[1]**2))+self.state[1]
        self.state=np.array([x,y])        
        return self.state, self.reward(), self.is_done(), self.cost_feature()

    def draw_trajectory(self, trajectory_set):
        fig,ax=plt.subplots()
        ax.axis('scaled')
        ax.set_xticks(np.linspace(0,int(self.x_lim),int(self.x_lim+1)))
        ax.set_yticks(np.linspace(0,int(self.y_lim),int(self.y_lim+1)))
        ax.axis([0,int(self.x_lim),0,int(self.y_lim)])
        #ax.grid(linestyle='-',color='black')
        for m in range(len(trajectory_set)):
            trajectory=trajectory_set[m]
            x=[]
            y=[]
            for i in range(len(trajectory)):
                x.append(trajectory[i][0])
                y.append(trajectory[i][1])
            plt.plot(x,y,'tab:blue',linewidth=2)
            print('the length of the trajectory', len(trajectory))
        plt.tick_params(left = False, right = False, labelleft = False, labelbottom = False, bottom = False)
        goal=plt.Circle((self.goal[0],self.goal[1]),0.15,color='g')
        obstacle=plt.Rectangle((self.obstacle[0],self.obstacle[1]),5,0.2,edgecolor='black',facecolor='black')
        ax.add_patch(goal)
        ax.add_patch(obstacle)
        plt.show()
        #b=np.load('expert_trajectory.npy', allow_pickle=True)
        #print(len(b))






