import os
import sys
base_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../..'))
sys.path.append(base_path)

import numpy as np
import pickle
from tqdm import tqdm
from data.used.OP_V2.load_raw_data import load_existing_raw_data, raw_data_to_problem
from environment.used.Env_op_v4 import OP_V4
from environment.used.BaseEnv_COP import DataProblem, RawData
from utils.utils import create_folder_if_not_exist, split_rawdata
from utils.COP_slover import calc_op_total
import multiprocessing as mp

def raw_data_to_traj(info):
    global train_data
    global prompt_data
    global test_data
    i, traj_type, get_problem = info
    if traj_type == 'train':
        dataset = train_data
    elif traj_type == 'prompt':
        dataset = prompt_data
    elif traj_type == 'test':
        dataset = test_data
    else:
        raise NotImplementedError

    try:
        problem, answer, cost = dataset.problem_list[i], dataset.answer_list[i], dataset.cost_list[i] 
        answer = np.array(answer) + 1
        cost = -cost
        assert abs(calc_op_total(problem['prize'], answer-1) - cost) < 1e-5

        # 重置环境
        env = OP_V4(node_num=problem['prize'].shape[0])
        observation, info = env.reset(options={
            'problem_info': (None, problem, answer),
            'problem_obj':(cost, 1),
            'use_default_policy_obj': False
        })
        prefix = env.get_prefix()
        prefix_mask = env.get_prefix_mask()
        assert (answer == env.real_answer).all()

        # 生成 MDP 轨迹
        # 注意 random obj 未知，生成轨迹中的 DB1 reward 不可靠，因此只保留 AM reward
        prefix_masks, obss, acts, rewards = [prefix_mask, ], [observation, ], [], []
        for action in answer[:-1]:
            observation, reward, terminated, truncated, info = env.step(action)
            prefix_mask = env.get_prefix_mask()
            acts.append(action)
            rewards.append(reward['AM'])
            obss.append(observation)
            prefix_masks.append(prefix_mask)
            assert not (terminated or truncated)
            for k, v in prefix_mask.items():
                assert len(v) == len(prefix[k])
        action = answer[-1]
        observation, reward, terminated, truncated, info = env.step(action)
        assert abs(reward['AM'] - 1) <= 1e-4 and abs(reward['DB1'] - 1) <= 1e-4    # 由于直接用 real_answer 作为 model_answer, 无论 DB1 reward 是多少，reward['DB1'] == 1
        assert terminated and not truncated
        acts.append(action)
        rewards.append(1)
        assert len(prefix_masks) == len(obss) == len(acts) == len(rewards)
    except Exception as e:
        if get_problem:
            return None, None, None
        return None

    # 处理成 d4rl 格式保存
    obss_visited = np.vstack([obs['visited'] for obs in obss])
    obss_current_position = np.array([obs['current_position'] for obs in obss])
    obss_prize = np.vstack([obs['prize'] for obs in obss])
    obss_length = np.array([obs['length'] for obs in obss])
    prefix_mask_depot = np.array([mask['pos_depot'] for mask in prefix_masks])
    prefix_mask_node = np.vstack([mask['pos_node'] for mask in prefix_masks])
    prefix_mask_prize = np.vstack([mask['prize'] for mask in prefix_masks])

    episode = {
        'prefix': prefix,
        'prefix_masks': {
            'pos_depot': prefix_mask_depot,                 # (time_steps, 2)
            'pos_node': prefix_mask_node,                   # (time_steps, num_nodes*2)
            'prize': prefix_mask_prize,                     # (time_steps, num_nodes)
        },   
        'observations': {
            'visited': obss_visited,                        # (time_steps, 2)
            'prize': obss_prize,                            # (time_steps, node_num)
            'length': obss_length,                          # (time_steps, )
            'current_position': obss_current_position,      # (time_steps, 2)
        },
        'actions': np.array(acts).astype(np.int32),         # (time_steps, )
        'rewards': np.array(rewards).astype(np.float32),    # (time_steps, )
        'terminals': np.array([False] * (len(rewards)-1) + [True], dtype=bool)  # 'terminals' 字段只是模仿 d4rl 的数据形式，当前没有作用
    }
    
    if get_problem:
        return episode, problem, answer
    return episode

def raw_data_to_traj_multiprocessing(raw_data:RawData, process_num:int=10, data_num:int=0, data_type:str='train', get_problem:bool=False):
    data_num = len(raw_data.answer_list) if data_num == 0 else min(data_num, len(raw_data.answer_list))
    with mp.Pool(processes=process_num) as pool:
        results = tqdm(
            pool.imap_unordered(raw_data_to_traj, [(i, data_type, get_problem) for i in range(data_num)]),
            total=data_num,
        )  # 'total' is redundant here but can be useful when the size of the iterable is unobvious
        results = list(results)

    if not get_problem:
        results = [epi for epi in results if epi is not None]
        return results, None
    else:
        episodes = []
        problems = DataProblem(problem_list=[], answer_list=[])
        for episode, problem, answer in results:
            if episode is not None and problem is not None and answer is not None:
                episodes.append(episode)
                problems.problem_list.append(problem)
                problems.answer_list.append(answer)
        return episodes, problems

if __name__ == "__main__":
    # 构造环境
    node_num = 20
    dataset_name = 'OP_V4'
    gen_test_episodes = False
    get_trian_problem = True
    worker_num = 50

    # 用于保存数据的文件夹
    create_folder_if_not_exist(f'{base_path}/data/used/{dataset_name}')

    # 加载三类原始数
    train_data, test_data, prompt_data = load_existing_raw_data(node_num)

    # prefix 模型可以用未使用的 prompt data 评估性能  
    prompt_problem = raw_data_to_problem(prompt_data)                                 
    with open(f'{base_path}/data/used/{dataset_name}/op{node_num}_prompt_problem.pkl', 'wb') as f:
        pickle.dump(prompt_problem, f)    
        print(f'prompt_problems saved')

    # 训练中未见的测试问题
    test_problem = raw_data_to_problem(test_data) 
    with open(f'{base_path}/data/used/{dataset_name}/op{node_num}_problem.pkl', 'wb') as f:
        pickle.dump(test_problem, f)    
        print(f'test_problems saved')

    # 生成轨迹
    prompt_episodes, _ = raw_data_to_traj_multiprocessing(                              
        prompt_data, worker_num, data_type='prompt'
    ) 
    train_episodes, train_problem = raw_data_to_traj_multiprocessing(                   
        train_data, worker_num, data_type='train', get_problem=get_trian_problem
    )    
    if gen_test_episodes:                           
        test_episodes, _ = raw_data_to_traj_multiprocessing(                            
            test_data, node_num, data_type='test'
        )    

    # 提示MDP轨迹
    with open(f'{base_path}/data/used/{dataset_name}/op{node_num}_prompt.pkl', 'wb') as f:
        pickle.dump(prompt_episodes, f)    
        print(f'prompt_traj saved')

    # 训练MDP轨迹
    with open(f'{base_path}/data/used/{dataset_name}/op{node_num}_train.pkl', 'wb') as f:
        pickle.dump(train_episodes, f)    
        print(f'train_traj saved')

    # （可选）训练数据对应的问题，这些问题上的测试性能应该接近 100%
    if get_trian_problem:
        assert train_problem is not None
        with open(f'{base_path}/data/used/{dataset_name}/op{node_num}_train_problem.pkl', 'wb') as f:
            pickle.dump(train_problem, f)    
            print(f'train_problems saved')

    # （可选）训练中未见测试问题对应的最优MDP轨迹，这些数据上的损失反映拟合的泛化质量
    if gen_test_episodes: 
        assert test_episodes is not None
        with open(f'{base_path}/data/used/{dataset_name}/op{node_num}_test.pkl', 'wb') as f:
            pickle.dump(test_episodes, f)    
            print(f'test_traj saved')   