import numpy as np, sys, random


class GFootball:
	"""Wrapper around the Environment to expose a cleaner interface for RL

		Parameters:
			env_name (str): Env name


	"""
	def __init__(self, frameskip, T, config, action_dim, num_agents, render, trace_path, tracedump):
		"""
		A base template for all environment wrappers.
		"""
		#Initialize world with requiste params

		self.frameskip = frameskip
		self.T = T
		self.istep = 0
		self.action_dim = action_dim
		self.num_agents = num_agents

		#env.unwrapped.observation()

		import gfootball.env as football_env
		self.env = football_env.create_environment(
			#env_name='academy_3_vs_1_with_keeper',
			env_name=config,
			rewards='scoring, checkpoints',
			number_of_left_players_agent_controls=num_agents,
			representation='simple115',
			render=render,
			logdir=trace_path,
			write_goal_dumps=tracedump,
			write_full_episode_dumps=tracedump,
			dump_frequency=1
			)


		#Action Space
		self.action_low = -1.0
		self.action_high = 1.0

		#Action tracker
		self.episode_actions = [0] * action_dim


	def reset(self):
		"""Method overloads reset
			Parameters:
				None

			Returns:
				next_obs (list): Next state
		"""
		self.istep = 0
		if self.num_agents == 1:
			return [self.env.reset()]
		return self.env.reset()


	def step(self, action): #Expects a numpy action
		"""Take an action to forward the simulation

			Parameters:
				action (ndarray): action to take in the env

			Returns:
				next_obs (list): Next state
				reward (float): Reward for this step
				done (bool): Simulation done?
				info (None): Template from OpenAi gym (doesnt have anything)
		"""
		action_n = [int(act) for act in action]
		next_state_n, reward_n, done_n, info = self.env.step(action_n)
		if self.num_agents == 1:
			next_state_n = [next_state_n]
			reward_n = [reward_n]
		done_n = [done_n] * self.num_agents 
		return next_state_n, reward_n, done_n, info['score_reward']


	def render(self):
		self.env.render()


	def shaped_entropy_bonus(self):
		"""
				0: action_idle,
				1: action_left,
				2: action_top_left,
				3: action_top,
				4: action_top_right,
				5: action_right,
				6: action_bottom_right,
				7: action_bottom,
				8: action_bottom_left,
				9: action_long_pass,
				10: action_high_pass,
				11: action_short_pass,
				12: action_shot,
				13: action_sprint,
				14: action_release_direction,
				15: action_release_sprint,
				16: action_sliding,
				17: action_dribble,
				18: action_release_dribble,
			"""
		action_dist = self.episode_actions

		#action_dist.sort()
		#action_dist = action_dist[-7:]
		action_dist = [(a / sum(action_dist)) for a in action_dist]

		#Biased entropy count
		action_dist[9] /= 2.0
		action_dist[10]/= 2.0
		action_dist[11]/=5.0
		action_dist[16]/=2.0
		action_dist[17]/=2.0
		action_dist[18]/=2.0

		action_dist = [a**2 for a in action_dist]

		return -sum(action_dist)



class Navigation:
	"""Wrapper around the Environment to expose a cleaner interface for RL

		Parameters:
			env_name (str): Env name


	"""
	def __init__(self, args):
		"""
		A base template for all environment wrappers.
		"""
		#Initialize world with requiste params
		self.args = args
		self.pos = None
		self.goal=None

		self.wdim = 10
		self.ep_len = self.wdim * 3
		self.i = 0




	def reset(self):

		self.pos = [random.randint(-self.wdim, self.wdim), random.randint(-self.wdim, self.wdim)]
		#self.pos = [0,0]


		rand = random.random()
		if rand < 0.25:
			self.goal = [self.pos[0] + self.wdim, self.pos[1] + self.wdim]
		elif rand < 0.5:
			self.goal = [self.pos[0] -self.wdim, self.pos[1] + self.wdim]
		elif rand < 0.75:
			self.goal = [self.pos[0] + self.wdim, self.pos[1] -self.wdim]
		else:
			self.goal = [-self.wdim + self.pos[0] , -self.wdim + self.pos[1]]

		#self.goal = [self.wdim, self.wdim]

		self.i = 0

		obs = np.array(self.pos + self.goal)
		obs = np.expand_dims(obs, 0)
		return obs



	def step(self, action): #Expects a numpy action
		"""Take an action to forward the simulation

			Parameters:
				action (ndarray): action to take in the env

			Returns:
				next_obs (list): Next state
				reward (float): Reward for this step
				done (bool): Simulation done?
				info (None): Template from OpenAi gym (doesnt have anything)
		"""
		self.i+=1
		#print(action)
		action = int(action[0])

		if action == 0:
			self.pos[1] += 1
		elif action == 1:
			self.pos[0] += 1
		elif action == 2:
			self.pos[1] -= 1
		elif action == 3:
			self.pos[0] -= 1
		else: pass

		r = abs(self.pos[0] - self.goal[0]) + abs(self.pos[1] - self.goal[1])
		#r = -1 if r < 4 else 0
		reward = np.array([-r])

		next_obs = np.array(self.pos + self.goal)
		next_obs = np.expand_dims(next_obs, 0)

		done = self.i >= self.ep_len

		return next_obs, reward, done, None



	def render(self):

		pass





# env = Football(None)
# env.reset()
# i = 0
# while True:
# 	action = env.env.action_space.sample()
# 	s,r,d,_ = env.step(action)
# 	i+=1
# 	print(i, r)
# 	if d: break






#
# class SimpleSpread:
# 	"""Wrapper around the Environment to expose a cleaner interface for RL
#
# 		Parameters:
# 			env_name (str): Env name
#
#
# 	"""
# 	def __init__(self, args, num_envs=1):
# 		"""
# 		A base template for all environment wrappers.
# 		"""
# 		#Initialize world with requiste params
# 		self.args = args
# 		self.num_envs = num_envs
# 		self.i = 0
# 		self.T = 20
#
# 		from envs.maddpg_envs.make_env import make_env
#
# 		self.universe = [] #Universe - collection of all envs running in parallel
# 		for _ in range(num_envs):
# 			env = make_env(args.config.config)
# 			self.universe.append(env)
#
# 		self.global_reward = [0.0 for _ in range(num_envs)]
#
#
#
# 	def reset(self):
# 		"""Method overloads reset
# 			Parameters:
# 				None
#
# 			Returns:
# 				next_obs (list): Next state
# 		"""
# 		#Reset Global Reward and dones
# 		self.global_reward = [0.0 for _ in range(self.num_envs)]
# 		self.i = 0
#
# 		#Get joint observation
# 		joint_obs = []
# 		for env in self.universe:
# 			obs = env.reset()
# 			joint_obs.append(obs)
#
# 		joint_obs = np.stack(joint_obs, axis=1)
# 		return joint_obs
#
#
# 	def step(self, action): #Expects a numpy action
# 		"""Take an action to forward the simulation
#
# 			Parameters:
# 				action (ndarray): action to take in the env
#
# 			Returns:
# 				next_obs (list): Next state
# 				reward (float): Reward for this step
# 				done (bool): Simulation done?
# 				info (None): Template from OpenAi gym (doesnt have anything)
# 		"""
#
# 		joint_obs = []; joint_reward = []; joint_done = []
# 		self.i+=1
#
# 		for universe_id, env in enumerate(self.universe):
#
# 			#If this particular env instance in universe is already done:
# 			next_state, reward, _, _ = env.step(action[:,universe_id,:])
# 			done = self.i > self.T
# 			joint_obs.append(next_state); joint_reward.append(reward); joint_done.append(done)
# 			self.global_reward[universe_id] += sum(reward) / ((len(reward) * self.T))
#
#
# 		joint_obs = np.stack(joint_obs, axis=1)
# 		joint_reward = np.stack(joint_reward, axis=1)
#
# 		return joint_obs, joint_reward, joint_done, self.global_reward if done else [None for _ in range(self.num_envs)]
#
#
#
# 	def render(self, env_id=None):
# 		if env_id == None:
# 			rand_univ = np.random.randint(0, len(self.universe))
# 		else: rand_univ = env_id
#
# 		self.universe[rand_univ].render()
#
#
# class RoverDomainPython:
# 	"""Wrapper around the Environment to expose a cleaner interface for RL
#
# 		Parameters:
# 			env_name (str): Env name
#
#
# 	"""
# 	def __init__(self, args, num_envs):
# 		"""
# 		A base template for all environment wrappers.
# 		"""
# 		#Initialize world with requiste params
# 		self.args = args
#
# 		from envs.rover_domain.rover_domain_python import RoverDomainVel
#
# 		self.universe = [] #Universe - collection of all envs running in parallel
# 		for _ in range(num_envs):
# 			env = RoverDomainVel(args.config)
# 			self.universe.append(env)
#
# 		#Action Space
# 		self.action_low = -1.0
# 		self.action_high = 1.0
#
#
# 	def reset(self):
# 		"""Method overloads reset
# 			Parameters:
# 				None
#
# 			Returns:
# 				next_obs (list): Next state
# 		"""
# 		joint_obs = []
# 		for env in self.universe:
# 			obs = env.reset()
# 			joint_obs.append(obs)
#
# 		joint_obs = np.stack(joint_obs, axis=1)
# 		#returns [agent_id, universe_id, obs]
#
# 		return joint_obs
#
#
# 	def step(self, action): #Expects a numpy action
# 		"""Take an action to forward the simulation
#
# 			Parameters:
# 				action (ndarray): action to take in the env
#
# 			Returns:
# 				next_obs (list): Next state
# 				reward (float): Reward for this step
# 				done (bool): Simulation done?
# 				info (None): Template from OpenAi gym (doesnt have anything)
# 		"""
#
# 		joint_obs = []; joint_reward = []; joint_done = []; joint_global = []
# 		for universe_id, env in enumerate(self.universe):
# 			next_state, reward, done, info = env.step(action[:,universe_id,:])
# 			joint_obs.append(next_state); joint_reward.append(reward); joint_done.append(done); joint_global.append(info)
#
# 		joint_obs = np.stack(joint_obs, axis=1)
# 		joint_reward = np.stack(joint_reward, axis=1)
#
# 		return joint_obs, joint_reward, joint_done, joint_global
#
#
#
# 	def render(self):
#
# 		rand_univ = np.random.randint(0, len(self.universe))
# 		try: self.universe[rand_univ].render()
# 		except: 'Error rendering'
#
#
# class MotivateDomain:
# 	"""Wrapper around the Environment to expose a cleaner interface for RL
#
# 		Parameters:
# 			env_name (str): Env name
#
#
# 	"""
# 	def __init__(self, args, num_envs):
# 		"""
# 		A base template for all environment wrappers.
# 		"""
# 		#Initialize world with requiste params
# 		self.args = args
#
# 		from envs.rover_domain.motivate_domain import MotivateDomain
#
# 		self.universe = [] #Universe - collection of all envs running in parallel
# 		for _ in range(num_envs):
# 			env = MotivateDomain(args.config)
# 			self.universe.append(env)
#
# 		#Action Space
# 		self.action_low = -1.0
# 		self.action_high = 1.0
#
#
# 	def reset(self):
# 		"""Method overloads reset
# 			Parameters:
# 				None
#
# 			Returns:
# 				next_obs (list): Next state
# 		"""
# 		joint_obs = []
# 		for env in self.universe:
# 			obs = env.reset()
# 			joint_obs.append(obs)
#
# 		joint_obs = np.stack(joint_obs, axis=1)
# 		#returns [agent_id, universe_id, obs]
#
# 		return joint_obs
#
#
# 	def step(self, action): #Expects a numpy action
# 		"""Take an action to forward the simulation
#
# 			Parameters:
# 				action (ndarray): action to take in the env
#
# 			Returns:
# 				next_obs (list): Next state
# 				reward (float): Reward for this step
# 				done (bool): Simulation done?
# 				info (None): Template from OpenAi gym (doesnt have anything)
# 		"""
#
# 		joint_obs = []; joint_reward = []; joint_done = []; joint_global = []
# 		for universe_id, env in enumerate(self.universe):
# 			next_state, reward, done, info = env.step(action[:,universe_id,:])
# 			joint_obs.append(next_state); joint_reward.append(reward); joint_done.append(done); joint_global.append(info)
#
# 		joint_obs = np.stack(joint_obs, axis=1)
# 		joint_reward = np.stack(joint_reward, axis=1)
#
# 		return joint_obs, joint_reward, joint_done, joint_global
#
#
#
# 	def render(self):
#
# 		rand_univ = np.random.randint(0, len(self.universe))
# 		self.universe[rand_univ].render()
# 		print(self.universe[rand_univ].poi_visitor_list)
#
#
#
# class DM_Soccer:
# 	"""Wrapper around the Environment to expose a cleaner interface for RL
# 		Parameters:
# 			env_name (str): Env name
# 	"""
# 	def __init__(self, env_name, ALGO):
# 		"""
# 		A base template for all environment wrappers.
# 		"""
# 		from dm_control.locomotion import soccer as dm_soccer
#
# 		import gym
# 		self.env = dm_soccer.load(team_size=2, time_limit=10.)
# 		self.action_specs = env.action_spec()
# 		self.ALGO = ALGO
#
#
#
#
# 	def reset(self):
# 		"""Method overloads reset
# 			Parameters:
# 				None
# 			Returns:
# 				next_obs (list): Next state
# 		"""
# 		return self.env.reset()
#
#
# 	def step(self, action: object): #Expects a numpy action
# 		"""Take an action to forward the simulation
# 			Parameters:
# 				action (ndarray): action to take in the env
# 			Returns:
# 				next_obs (list): Next state
# 				reward (float): Reward for this step
# 				done (bool): Simulation done?
# 				info (None): Template from OpenAi gym (doesnt have anything)
# 		"""
#
# 		if self.ALGO == "SAC": action = (action + 1.0) / 2.0  # [-1, 1] => [0, 1]
#
# 		action = self.action_low + action * (self.action_high - self.action_low)
# 		return self.env.step(action)
#
# 	def render(self):
# 		self.env.render()


