from math import *
import numpy as np
import torch
import random

class dynamic1():
    def __init__(self, state="NULL"): 
        self.state=np.array([0,0])
        self.num_states=2
        self.num_actions=2
        self.interval=0.1
        if state=="NULL": 
            x= np.random.uniform(0,9)
            y= np.random.uniform(0,13)
            self.state=np.array([x,y])
        else:
            self.state=state

    def reset(self, state="NULL"):
        if state=="NULL": 
            x= np.random.uniform(0,9)
            y= np.random.uniform(0,13)
            self.state=np.array([x,y])
            return self.state
        else:
            self.state=state
            return self.state

    def feature(self):
        if self.state[0]>=7 and self.state[0]<=8 and self.state[1]>=12 and self.state[1]<=13:
           return np.mat([50,50]).T
        elif self.state[0]<0 and self.state[0]>9 and self.state[1]<0 and self.state[1]>13:
           return np.mat([-0.5,-0.5]).T
        else:
           return np.mat([0.0001*self.state[0],0.0001*self.state[0]]).T

    def is_out_scope(self):
        if self.state[0]<=0 or self.state[0]>=9 or self.state[1]<=0 or self.state[1]>=13:
            return 1
        else:
            return 0

    def is_in_goal(self):
        if self.state[0]>=7 and self.state[0]<=8 and self.state[1]>=12 and self.state[1]<=13:
            return 1
        else:
            return 0

    def step(self, action, omega, theta):
        x=self.interval*(action[0]/sqrt(action[0]**2+action[1]**2))+self.state[0]
        y=self.interval*(action[1]/sqrt(action[0]**2+action[1]**2))+self.state[1]
        theta_x=int(x)
        theta_y=int(y)
        self.state=np.array([x,y])
        reward=0
        if self.is_out_scope():
            reward=reward-5 
        else: 
            reward=reward+np.dot(omega.T,self.feature()).item()-theta[theta_x,theta_y]*5.0
        return self.state, reward, self.is_in_goal(), self.is_out_scope()

class dynamic2():
    def __init__(self, state="NULL"): 
        self.state=np.array([0,0])
        self.num_states=2
        self.num_actions=2
        self.interval=0.1
        self.terminal_reward=100
        if state=="NULL": 
            x= np.random.uniform(0,9)
            y= np.random.uniform(0,13)
            self.state=np.array([x,y])
        else:
            self.state=state

    def reset(self, state="NULL"):
        if state=="NULL": 
            x= np.random.uniform(0,9)
            y= np.random.uniform(0,13)
            self.state=np.array([x,y])
            return self.state
        else:
            self.state=state
            return self.state

    def feature(self):
        if self.state[0]>=1 and self.state[0]<=2 and self.state[1]>=12 and self.state[1]<=13:
           return np.mat([50,50]).T
        elif self.state[0]<0 and self.state[0]>9 and self.state[1]<0 and self.state[1]>13:
           return np.mat([-1.0,-1.0]).T
        else:
           return np.mat([0.0001*(9-self.state[0]),0.0001*self.state[0]]).T

    def is_out_scope(self):
        if self.state[0]<=0 or self.state[0]>=9 or self.state[1]<=0 or self.state[1]>=13:
            return 1
        else:
            return 0

    def is_in_goal(self):
        if self.state[0]>=1 and self.state[0]<=2 and self.state[1]>=12 and self.state[1]<=13:
            return 1
        else:
            return 0

    def step(self, action, omega, theta):
        x=self.interval*(action[0]/sqrt(action[0]**2+action[1]**2))+self.state[0]
        y=self.interval*(action[1]/sqrt(action[0]**2+action[1]**2))+self.state[1]
        self.state=np.array([x,y])
        theta_x=int(x)
        theta_y=int(y)
        reward=0
        if self.is_out_scope():
            reward=reward-5  
        else: 
            reward=reward+np.dot(omega.T,self.feature()).item()-theta[theta_x,theta_y]*5.0
        return self.state, reward, self.is_in_goal(), self.is_out_scope()

#if __name__ == "__main__":



