
class Optimal_Policy(object):
    
    def __init__(self, env, N):
        self.N = N
        self.env = env
        
    def run(self):
        print("Go to the Right!")
        episode_return = []
        
        for k in range(1,self.N+1):
            self.env.reset()
            done = 0
            R = 0
                        
            while not done:
                # s = self.env.state
                # h = self.env.timestep
                a = 1
                r, s_, done = self.env.advance(a)                
                R += r            
            episode_return.append(R)
            
        return episode_return
        