import os
import sys
import json
import numpy as np
from PIL import Image
from datetime import datetime
from env.thor_env import ThorEnv
from eval.eval import Eval
import torch
import networks.CLIP.clip.clip as clip
from queue import Queue
from collections import deque
from wrapt_timeout_decorator import *
import random

# @timeout(30)
def va_interact(args, env, action_words, interact_mask):
    return env.va_interact(action_words, interact_mask=interact_mask, smooth_nav=args.smooth_nav, debug=args.debug)

class EvalSubgoals(Eval):
    '''
    evaluate subgoals by teacher-forching expert demonstrations
    '''

    # subgoal types
    ALL_SUBGOALS = ['GotoLocation', 'PickupObject', 'PutObject', 'CoolObject', 'HeatObject', 'CleanObject', 'SliceObject', 'ToggleObject']
    goal_tensor_record = {}
    flag = False
    
    @classmethod
    def get_goal_tensor(self, goal_ids, device):
        if self.flag == False:
            self.clip, _ = clip.load("RN50", device=device)
            for param in self.clip.parameters():
                param.requires_grad = False
            
            self.clip.share_memory()
            self.flag = True
            
        self.device = torch.device(device)
        
        goal_ids_str = "_".join([str(int(i)) for i in goal_ids[0]])
        if goal_ids_str not in self.goal_tensor_record:
            with torch.no_grad():
                goal_tensor = self.clip.encode_text(goal_ids.to(torch.int).to(self.device)).to(torch.float32)
            self.goal_tensor_record[goal_ids_str] = goal_tensor.detach()
        
        return self.goal_tensor_record[goal_ids_str]
    
    def __init__(self, args, agent, manager):
        super().__init__(args, agent, manager)
    
    @classmethod
    def run(self, model, resnet, task_queue, args, lock, successes, failures, results):
        '''
        evaluation loop
        '''
        # start THOR
        env = ThorEnv()

        # make subgoals list
        subgoals_to_evaluate = self.ALL_SUBGOALS if args.subgoals.lower() == "all" else args.subgoals.split(',')
        subgoals_to_evaluate = [sg for sg in subgoals_to_evaluate if sg in self.ALL_SUBGOALS]
        print ("Subgoals to evaluate: %s" % str(subgoals_to_evaluate))

        # create empty stats per subgoal
        for sg in subgoals_to_evaluate:
            successes[sg] = list()
            failures[sg] = list()

        while True:
            if task_queue.qsize() == 0:
                break

            task = task_queue.get()

            try:
                traj = model.load_task_json(task)
                r_idx = task['repeat_idx']
                subgoal_idxs = [sg['high_idx'] for sg in traj['plan']['high_pddl'] if sg['discrete_action']['action'] in subgoals_to_evaluate]
                
                # eval_idx = random.sample(subgoal_idxs, 1)[0]
                for eval_idx in subgoal_idxs:
                    print("No. of trajectories left: %d" % (task_queue.qsize()))
                    
                    failure_ct = 0
                    while failure_ct < 5:
                        flag = self.evaluate(env, model, resnet, eval_idx, r_idx, traj, args, lock, successes, failures, results)
                        if flag:
                            break
                        else:
                            env.response_queue = Queue(maxsize=1)
                            env.stop()
                            env = ThorEnv()
                            failure_ct += 1
                    if not flag:
                            print("Too many failures...")
            
            except Exception as e:
                import traceback
                traceback.print_exc()
                print("Error: " + repr(e))
        
        # stop THOR
        env.stop()
    
    @classmethod
    def evaluate(self, env, model, resnet, eval_idx, r_idx, traj_data, args, lock, successes, failures, results):
        # reset model
        # model.reset()

        # setup scene
        reward_type = 'dense'
        flag = self.setup_scene(env, traj_data, r_idx, args, reward_type=reward_type)
        if not flag:
            return False
        
        # model.reset()

        # expert demonstration to reach eval_idx-1 
        expert_init_actions = [a['discrete_action'] for a in traj_data['plan']['low_actions'] if a['high_idx'] < eval_idx]

        # subgoal info
        subgoal_action = traj_data['plan']['high_pddl'][eval_idx]['discrete_action']['action']
        subgoal_instr = traj_data['turk_annotations']['anns'][r_idx]['high_descs'][eval_idx]
        subgoal_ids = clip.tokenize(traj_data['turk_annotations']['anns'][r_idx]['high_descs'][eval_idx])
        subgoal_tensor = self.get_goal_tensor(subgoal_ids, args.device)
        
        # print subgoal info
        print("Evaluating: %s\nSubgoal %s (%d)\nInstr: %s" % (traj_data['root'], subgoal_action, eval_idx, subgoal_instr))

        # extract language features
        feat = model.featurize([traj_data], load_mask=False)

        # previous action for teacher-forcing during expert execution (None is used for initialization)
        prev_action = None
        prev_action_words = []

        done, subgoal_success = False, False
        fails = 0
        t = 0
        in_t = 0
        reward = 0

        block_size = 12
        timesteps = 0
        window = 12
        rtgs = [1]
        
        h_0 = torch.zeros(2, 1, args.feature_size).to(self.device)
        c_0 = torch.zeros(2, 1, args.feature_size).to(self.device)
        ht = (h_0, c_0)
        out = None
        
        self.state_buffer = deque([], maxlen=block_size)
        
        while not done:
            # break if max_steps reached
            if t >= args.max_steps + len(expert_init_actions):
                break
            
            if in_t>=args.max_steps_taken:
                break

            # extract visual feats
            curr_image = Image.fromarray(np.uint8(env.last_event.frame))
            
            # curr_image.save('test_current.jpg', dpi=(300, 300))
            
            feat['frames'] = resnet.featurize([curr_image], batch=1).unsqueeze(0)

            # expert teacher-forcing upto subgoal
            if t < len(expert_init_actions):
                # get expert action
                action = expert_init_actions[t]
                subgoal_completed = traj_data['plan']['low_actions'][t+1]['high_idx'] != traj_data['plan']['low_actions'][t]['high_idx']
                compressed_mask = action['args']['mask'] if 'mask' in action['args'] else None
                mask = env.decompress_mask(compressed_mask) if compressed_mask is not None else None

                # forward model
                if not args.skip_model_unroll_with_expert:
                    subgoal_ids = clip.tokenize(subgoal_instr)
                    model.step(feat, subgoal_ids, prev_action=prev_action)
                    prev_action = action['action'] if not args.no_teacher_force_unroll_with_expert else None
                
                # curr_image = Image.fromarray(np.uint8(env.last_event.frame))
                # curr_image.save('test_current.jpg', dpi=(300, 300))
                
                # curr_image = Image.fromarray(np.uint8(env.last_event.instance_segmentation_frame))
                # curr_image.save('test_current_seg.jpg', dpi=(300, 300))
                
                # execute expert action
                success, _, _, err, _ = va_interact(args, env, action['action'], interact_mask=mask)
                if not success:
                    if isinstance(err, TimeoutError):
                        print("Timeout in expert")
                        return False
                    
                    print ("expert initialization failed")
                    break

                # update transition reward
                t_reward, t_done = env.get_transition_reward()

            # subgoal evaluation
            else:
                in_t += 1
                subgoal_ids = subgoal_ids.view(1, -1)
                self.state_buffer.append(feat['frames'][0][0].to(torch.device('cuda')))
                states = torch.stack(list(self.state_buffer), 0)

                action, action_words, mask = model.step(states, subgoal_tensor, rtgs, timesteps, actions=prev_action)
                
                if prev_action == None:
                    prev_action = [action]
                else:
                    prev_action += [action]
                
                timesteps += 1
                # mask = np.squeeze(mask, axis=0) if self.model.has_interaction(action_words) else None
                mask = None

                prev_action_words.append(action_words)
                
                # debug
                if args.debug:
                    print("Pred: ", action)

                if action_words not in self.TERMINAL_TOKENS:
                    # use predicted action and mask (if provided) to interact with the env
                    t_success, _, _, err, _ = va_interact(args, env, action_words, interact_mask=mask)
                    
                    if not t_success:
                        if isinstance(err, TimeoutError):
                            print("Timeout in agent")
                            return False
                        
                        fails += 1
                        if fails >= args.max_fails:
                            print("Interact API failed %d times" % (fails) + "; latest error '%s'" % err)
                            break

                # next time-step
                t_reward, t_done = env.get_transition_reward()
                reward += t_reward

                rtgs += [rtgs[-1] - 0]

                # update subgoals
                curr_subgoal_idx = env.get_subgoal_idx()
                if curr_subgoal_idx == eval_idx:
                    subgoal_success = True
                    break

                # terminal tokens predicted
                if action in self.TERMINAL_TOKENS:
                    print("predicted %s" % action)
                    break

            # increment time index
            t += 1

        # metrics
        pl = float(t - len(expert_init_actions)) + 1 # +1 for last action
        expert_pl = len([ll for ll in traj_data['plan']['low_actions'] if ll['high_idx'] == eval_idx])

        s_spl = (1 if subgoal_success else 0) * min(1., expert_pl / (pl + sys.float_info.epsilon))
        plw_s_spl = s_spl * expert_pl

        # log success/fails
        lock.acquire()

        # results
        for sg in self.ALL_SUBGOALS:
            results[sg] = {
                    'sr': 0.,
                    'successes': 0.,
                    'evals': 0.,
                    'sr_plw': 0.
            }

        log_entry = {'trial': traj_data['task_id'],
                     'type': traj_data['task_type'],
                     'repeat_idx': int(r_idx),
                     'subgoal_idx': int(eval_idx),
                     'subgoal_type': subgoal_action,
                     'subgoal_instr': subgoal_instr,
                     'subgoal_success_spl': float(s_spl),
                     'subgoal_path_len_weighted_success_spl': float(plw_s_spl),
                     'subgoal_path_len_weight': float(expert_pl),
                     'reward': float(reward)}
        if subgoal_success:
            sg_successes = successes[subgoal_action]
            sg_successes.append(log_entry)
            successes[subgoal_action] = sg_successes
        else:
            sg_failures = failures[subgoal_action]
            sg_failures.append(log_entry)
            failures[subgoal_action] = sg_failures

        # save results
        print("-------------")
        subgoals_to_evaluate = list(successes.keys())
        subgoals_to_evaluate.sort()
        for sg in subgoals_to_evaluate:
            num_successes, num_failures = len(successes[sg]), len(failures[sg])
            num_evals = len(successes[sg]) + len(failures[sg])
            if num_evals > 0:
                sr = float(num_successes) / num_evals
                total_path_len_weight = sum([entry['subgoal_path_len_weight'] for entry in successes[sg]]) + \
                                        sum([entry['subgoal_path_len_weight'] for entry in failures[sg]])
                sr_plw = float(sum([entry['subgoal_path_len_weighted_success_spl'] for entry in successes[sg]]) +
                                    sum([entry['subgoal_path_len_weighted_success_spl'] for entry in failures[sg]])) / total_path_len_weight

                results[sg] = {
                    'sr': sr,
                    'successes': num_successes,
                    'evals': num_evals,
                    'sr_plw': sr_plw
                }

                # print("%s ==========" % sg)
                print("SR: %d/%d = %.3f" % (num_successes, num_evals, sr))
                print("PLW SR: %.3f" % (sr_plw))
                # print(traj_data['turk_annotations']['anns'][r_idx]['high_descs'][eval_idx])
                # print(prev_action_words)
                # print(f"Expert actions: {expert_actions}")
        print("------------")

        lock.release()
        return True

    def create_stats(self):
        '''
        storage for success, failure, and results info
        '''
        self.successes, self.failures = self.manager.dict(), self.manager.dict()
        self.results = self.manager.dict()

    def save_results(self):
        results = {'eval successes': dict(self.successes),
                   'eval failures': dict(self.failures),
                   'eval results': dict(self.results)}
        
        # results = {'eval results': dict(self.results)}
        
        self.results = results
        
        # names = self.args.model_name.split('_')
        model_path = f"{self.args.model_type}-{self.args.if_clip}"
        save_path = os.path.dirname(self.args.model_path)
        
        if not os.path.exists(f'{save_path}/results/{model_path}'):
            os.makedirs(f'{save_path}/results/{model_path}')
            
        save_path = f'{save_path}/results/{model_path}/{datetime.now().strftime("%m%d_%H%M_")}_{model_path}_{self.args.eval_split}.json'
        
        with open(save_path, 'w') as r:
            json.dump(results, r, indent=4, sort_keys=True)
        
        try:
            return self.results['eval results']['GotoLocation']['sr'], self.results['eval results']['GotoLocation']['sr_plw']
        except:
            print("No legal results")
            return 0, 0