import os
import sys
from sys import platform
if platform == "linux" or platform == "linux2":
	import tty
	import termios
import numpy as np
import cv2
import sounddevice as sd
import gym
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from Envs.audioLoader import audioLoader
from ..RSI2.RL_env_RSI2 import RLEnvRSI2, Task


class RLEnvRSI3(RLEnvRSI2):
	def __init__(self):
		RLEnvRSI2.__init__(self)

		# observation space
		d = {
			'image': gym.spaces.Box(low=0, high=255, shape=self.config.img_dim, dtype='uint8'),
			'occupancy': gym.spaces.Box(low=0, high=255, shape=(1, self.config.RLVisibleGrid, self.config.RLVisibleGrid), dtype='uint8'),
			'goal_sound': gym.spaces.Box(low=-np.inf, high=np.inf, shape=self.config.sound_dim, dtype=np.float32),
			'current_sound': gym.spaces.Box(low=-np.inf, high=np.inf, shape=self.config.sound_dim, dtype=np.float32),
			# experiments have been done to show that x,z,orientation do not affect the accuracy
			# in fact, the robot should not be allowed to have this information because the agent can just remember the
			# location and not rely on vision
			#'robot_pose': gym.spaces.Box(low=-np.inf, high=np.inf,shape=(3,), dtype=np.float32),
			'goal_sound_label': gym.spaces.Box(low=0, high=self.config.taskNum+1, shape=(1,), dtype=np.int32),
			# the observations below will be filled in vec_pretext_normalize
			'goal_sound_feat': gym.spaces.Box(low=-np.inf, high=np.inf, shape=(self.config.representationDim,), dtype=np.float32),
			'image_feat': gym.spaces.Box(low=-np.inf, high=np.inf, shape=(self.config.representationDim,),
											  dtype=np.float32),
		}

		self.observation_space = gym.spaces.Dict(d)

	def gen_obs(self):
		"""
		:return: a dict containing various type of observations
		"""
		# update object metadata
		self.updateObjMeta(list(self.objMeta.keys()))
		self.checkVisible()
		self.agentMeta=self.controller.last_event.metadata["agent"]
		rgb_image=self.controller.last_event.frame
		self.saveEpisodeImage(rgb_image)

		image=rgb_image
		image=cv2.resize(rgb_image, (96, 96))

		s=[self.agentMeta['position']['x'], self.agentMeta['position']['z']]

		self.local_occupancy=self.get_local_occupancy_map(x=s[0], z=s[1], y=self.agentMeta['rotation']['y'])

		# sound_positive: the current sound heard by the agent
		# sound_negative: the sound that is not the current heard sound
		# sound_positive_ground_truth: the ground truth label for the sound heard by the agent
		sound_positive, sound_negative, current_sound_label, positive_audio = \
			self.get_positive_negative(get_negative=False, generate_audio=True)

		if self.envStepCounter==0: # prepare the goal sound
			if self.config.RLTrain or self.config.render:
				# select an audio according to the task
				self.goal_sound, self.goal_audio, self.transcription=self.audio.getAudioFromTask(self.np_random, self.task, Task)
			else: 
				self.goal_sound, self.goal_audio, self.transcription=self.audio.getAudioFromTask(self.np_random, self.task, Task)
			
			if self.config.render or self.config.RLTrain == False:
				if self.goal_audio is not None and self.config.render:
					sd.play(self.goal_audio, self.audio.fs)
				print('Goal intent is', self.task.loc+' '+self.task.obj+' '+self.task.act)
		else:
			self.goal_sound=np.ones_like(self.goal_sound)*np.inf

		if self.config.render and positive_audio is not None:
			sd.play(positive_audio, self.audio.fs)

		obs = {
			'image': np.transpose(image, (2, 0, 1)),
			'occupancy':np.transpose(np.expand_dims(self.local_occupancy, -1), (2,0,1)),
			'goal_sound': self.goal_sound,
			'current_sound': sound_positive,
			'goal_sound_label': self.taskID,
			'goal_sound_feat': np.zeros((self.config.representationDim, )),
			'image_feat': np.zeros((self.config.representationDim, )),
		}

		return obs, sound_positive, sound_negative

	def step(self, action):
		act=[]
		infoDict = {}

		if self.config.RLManualControl:
			self.keyboardControl()

		else:
			action_str = self.config.allActions[int(action)]
			self.exeAction(action_str)


		self.controller.step("Pass")  # fix the design choice that images from the Unity window lag by 1 step
		# update counters
		self.envStepCounter = self.envStepCounter + 1
		# get new obs
		obs, sound_positive, sound_negative = self.gen_obs()

		if self.config.use3rdCam:
			self.update3rdCam("Update")

		r =self.rewards() # calculate reward
		self.reward = sum(r)
		self.episodeReward = self.episodeReward + self.reward
		self.done = self.termination()

		if not self.config.RLTrain:
			if self.checkTaskDone():
				self.goal_area_count = self.goal_area_count + 1
			if self.done:
				infoDict['goal_area_count']=self.goal_area_count
				print('goal area count-------------------------', self.goal_area_count)
				self.goal_area_count = 0

		return obs, self.reward, self.done, infoDict # reset will be called if done


	def drawBoundingBox(self):
		[p.remove() for p in list(self.envAx.patches)]
		for bb in self.bb_dict.values():

			rect = patches.Rectangle((bb[0], bb[1]), bb[2] - bb[0], bb[3] - bb[1], linewidth=1, edgecolor='r',
									 facecolor='none')
			# Add the patch to the Axes
			self.envAx.add_patch(rect)

	def getBoundingBox(self):
		detection = self.controller.last_event.instance_detections2D
		self.bb_dict={}
		for key in self.objMeta.keys():
			objID=self.objMeta[key]['objectId']

			if objID in detection:

				raw_bb=detection[objID]
				# make the region bigger
				pixelAmount=self.config.pretextBBPixelAmount
				bb=[max(0, raw_bb[0]-pixelAmount), # upper left x
					max(0, raw_bb[1]-pixelAmount), # upper left y
					min(raw_bb[2]+pixelAmount, self.config.img_dim[2]-1), # Lower Right x
					min(raw_bb[3]+pixelAmount, self.config.img_dim[1]-1)] # Lower Right y
				self.bb_dict[key]=bb
