import numpy as np
import torch
import gym


def eval_policy(policy, env, seed, mean, std, norm, eval_episodes=10):
	eval_env = env

	reward_list = []
	for seed_offset in range(5):
		eval_env.seed(seed + seed_offset)
		avg_reward = 0.
		for _ in range(eval_episodes):
			per_reward = 0
			state, done = eval_env.reset(), False
			while not done:
				if norm:
					state = (np.array(state).reshape(1,-1) - mean)/std
				else:
					state = np.array(state).reshape(1, -1)
				action = policy.select_action(state)
				state, reward, done, _ = eval_env.step(action)
				avg_reward += reward
				per_reward += reward

		avg_reward /= eval_episodes
		reward_list.append(avg_reward)

	print("---------------------------------------")
	print(f"Evaluation over {eval_episodes} episodes: {np.mean(reward_list):.3f}")
	print(reward_list)
	print("---------------------------------------")
	return [sum(reward_list) / len(reward_list)]