#这个代码用于计算从每一个状态（包括起始点s）到dcstar的策略
import numpy as np
import matplotlib.pyplot as plt
from policy_iteration_APF import PolicyIterationPlanner, load_policy
from policy_iteration_dcstar import dcstar_PolicyIterationPlanner
import os
import seaborn as sns
from matplotlib.gridspec import GridSpec
import os

def save_trajectory_to_file(path, filename):
    """
    将轨迹保存到文本文件中
    Args:
        path: 轨迹点列表，每个点是(x, y)坐标
        filename: 保存的文件名
    """
    # 确保目录存在
    os.makedirs(os.path.dirname(filename), exist_ok=True)
    
    with open(filename, 'w') as f:
        f.write("# Trajectory coordinates (x, y)\n")
        for point in path:
            f.write(f"{point[0]}, {point[1]}\n")

def load_states_from_file(filename):
    """
    从文本文件中读取D、dcstar、gi_index
    Args:
        filename: 文件名
    Returns:
        tuple: (D, dcstar, gi_index)，其中状态以元组形式返回
    """
    def parse_coord(s):
        # 解析(x,y)格式的坐标
        x, y = s.strip('()').split(',')
        return (int(x), int(y))
    
    D = []
    dcstar = None
    gi_index = None
    current_list = None
    
    with open(filename, 'r') as f:
        for line in f:
            line = line.strip()
            if line.startswith('D ='):
                current_list = D
            elif line.startswith('dcstar ='):
                # 直接处理dcstar行
                coords = line.replace('dcstar =', '').strip()
                if coords and coords != '[]':
                    dcstar = parse_coord(coords)
                continue
            elif line.startswith('gi_index ='):
                # 处理gi_index行
                value = line.replace('gi_index =', '').strip()
                gi_index = int(value)
                continue
            elif line and line != '[' and line != ']':  # 忽略空行和括号
                # 移除末尾的逗号并分割坐标对
                coords = line.strip(' ,').split(', ')
                for coord in coords:
                    if coord:  # 确保不是空字符串
                        if current_list is not None:
                            current_list.append(parse_coord(coord))

    return D, dcstar, gi_index
class ValueDeceptiveModelPlanner:
    def __init__(self, true_planner, fake_planners, dcstar_planner, delta=0.0, sigma=0.0, gi_index=0):
        """
        初始化Value-based Deceptive Model规划器
        Args:
            true_planner: 真实目标的规划器
            fake_planners: 假目标规划器的列表
            dcstar_planner: dcstar规划器
            delta: δ阈值
            sigma: σ系数(欺骗系数)
        """
        self.true_planner = true_planner
        self.fake_planners = fake_planners
        self.dcstar_planner = dcstar_planner
        self.delta = delta
        self.observation_sequence = []  # 观测序列O，存储(state, action)对
        
        # 初始化可视化相关属性
        self.probability_history = {'true': [], **{f'fake_{i}': [] for i in range(len(fake_planners))}}
        self.steps = 0
    def calculate_q_gain(self, state, action_idx, observation, planner):
        """
        计算Q增益 G_ri(s,a) = Q_ri(s,a) - R_ri(O)
        其中R_ri(O)是观测序列的残差期望奖励
        Args:
            state: 当前状态 (x, y)
            action: 动作索引
            planner: 对应目标的规划器
        Returns:
            float: Q增益值
        """
        current_q = planner.q_table[state[1], state[0], action_idx]
        
        # 计算观测序列的残差期望奖励
        if len(observation) > 0:
            last_state, last_action = observation[-1]
            first_state, first_action = observation[0]
            residual_reward = planner.q_table[last_state[1], last_state[0], last_action] #-\
                        #    planner.q_table[first_state[1], first_state[0], first_action]  #这里是一个改进！
        else:
            residual_reward = 0
            
        return current_q - residual_reward
    
    def get_valid_actions(self, state, action_model):
        """获取有效的动作集合A+(s)，即Q增益为正的动作"""
        if action_model == 1: #AM模型
            valid_actions = []
            for action_idx in range(len(self.true_planner.actions)):
                if not self.true_planner.is_valid_action(state, self.true_planner.actions[action_idx]):
                    continue                
                q_gain = self.calculate_q_gain(state, action_idx, self.observation_sequence, self.true_planner)
                if q_gain > self.delta:  # 只保留Q增益大于阈值的动作
                    valid_actions.append(action_idx)
        if action_model == 2: #Ambiguity-VDM
            valid_actions = []
            for action_idx in range(len(self.dcstar_planner.actions)):
                if not self.dcstar_planner.is_valid_action(state, self.dcstar_planner.actions[action_idx]):
                    continue
                q_gain = self.calculate_q_gain(state, action_idx, self.observation_sequence, self.dcstar_planner)
                if q_gain > self.delta:  # 只保留Q增益大于阈值的动作
                    valid_actions.append(action_idx)
        return valid_actions

    def calculate_value_improvement(self, planner, state, action_idx):
        """
        计算某个动作带来的价值提升
        Args:
            state: 当前状态
            action_idx: 动作索引
        Returns:
            float: 价值提升量
        """
        action = planner.actions[action_idx]
        next_state = planner.get_next_state(state, action)
        # 计算真实目标价值的提升
        current_value = planner.value_function[state[1], state[0]]
        next_value = planner.value_function[next_state[1], next_state[0]]

        return next_value - current_value

    def prune_actions(self, state, planner):
        """
        获取有效的动作集合(必须保证Vdcstar有提升)
        """
        valid_actions = []        
        for action_idx in range(len(planner.actions)):
            if not planner.is_valid_action(state, planner.actions[action_idx]):
                continue
            if self.calculate_value_improvement(planner, state, action_idx) > 0:
                valid_actions.append(action_idx)
        return valid_actions
    
    def calculate_goal_probabilities_action_value(self, observation):
        """
        计算在当前观测序列O下的目标概率分布
        P(ri|O) = exp{Δ_ri(O)} / Σ_r∈R exp{Δ_r(O)} · P(ri)
        Args:
            observation: 观测序列（包括当前考虑的状态-动作对）
        Returns:
            dict: 包含每个目标的概率
        """
        goal_differences = {}
        total_exp = 0
        
        # 计算真目标的Q差异
        q_diff = 0
        if observation:
            for obs_state, obs_action in observation:
                q_value = self.true_planner.q_table[obs_state[1], obs_state[0], obs_action]
                max_q = max(self.true_planner.q_table[obs_state[1], obs_state[0]])
                q_diff += q_value - max_q
                
        goal_differences['true'] = np.exp(q_diff)
        total_exp += goal_differences['true']
        print("true:", q_diff)
        # 计算每个假目标的Q差异
        for i, fake_planner in enumerate(self.fake_planners):
            q_diff = 0
            if observation:
                for obs_state, obs_action in observation:
                    q_value = fake_planner.q_table[obs_state[1], obs_state[0], obs_action]
                    max_q = max(fake_planner.q_table[obs_state[1], obs_state[0]])
                    q_diff += q_value - max_q
                    
            goal_differences[f'fake_{i}'] = np.exp(q_diff)
            total_exp += goal_differences[f'fake_{i}']
        print("false:", q_diff)
        # 计算概率分布（假设先验概率P(ri)相等）
        goal_probabilities = {}
        if total_exp > 0:
            for goal_type in goal_differences:
                goal_probabilities[goal_type] = goal_differences[goal_type] / total_exp
        print("goal_probabilities:", goal_probabilities)
        return goal_probabilities
    
    def calculate_goal_probabilities_state_value(self, observation):
        """使用状态值函数计算目标概率"""
        if not observation:
            return {'true': 1.0/(len(self.fake_planners) + 1), 
                   **{f'fake_{i}': 1.0/(len(self.fake_planners) + 1) 
                      for i in range(len(self.fake_planners))}}

        goal_differences = {}
        total_exp = 0
        
        # 计算真实目标的差异
        obs_state, obs_action = observation[-1]
        next_state = tuple(np.array(obs_state) + self.true_planner.actions[obs_action])
        state_value_diff = (self.true_planner.value_function[next_state[1], next_state[0]] - 
                          self.true_planner.value_function[self.true_planner.start[1], 
                                                         self.true_planner.start[0]])
        goal_differences['true'] = np.exp(state_value_diff)
        total_exp += goal_differences['true']
        
        # 计算假目标的差异
        for i, fake_planner in enumerate(self.fake_planners):
            next_state = tuple(np.array(obs_state) + fake_planner.actions[obs_action])
            state_value_diff = (fake_planner.value_function[next_state[1], next_state[0]] -
                              fake_planner.value_function[self.true_planner.start[1],
                                                        self.true_planner.start[0]])
            goal_differences[f'fake_{i}'] = np.exp(state_value_diff)
            total_exp += goal_differences[f'fake_{i}']
            
        return {goal: diff / total_exp for goal, diff in goal_differences.items()}
    
    def update_probability_history(self, state, action):
        """更新概率历史记录"""
        self.observation_sequence.append((state, action))
        self.steps += 1
        probs = self.calculate_goal_probabilities_state_value(self.observation_sequence)
        for goal_type in self.probability_history:
            self.probability_history[goal_type].append(probs.get(goal_type, 0))

    def select_action(self, state, stage, action_model, sigma, gi_index):
        """
        对于Honest和AM模型，不需要两阶段
        """
        if action_model == 0:  # Honest
            valid_actions = []
            for action_idx in range(len(self.true_planner.actions)):
                if not self.true_planner.is_valid_action(state, self.true_planner.actions[action_idx]):
                    continue
                valid_actions.append(action_idx)
            if not valid_actions:
                print("No valid actions available.")
                return 0  # 返回默认动作
                # 选择能够最大化真实目标价值提升的动作
            max_improvement = float('-inf')
            best_action = valid_actions[0]

            for action_idx in valid_actions:
                improvement = self.calculate_value_improvement(self.true_planner, state, action_idx)
                if improvement > max_improvement:
                    max_improvement = improvement
                    best_action = action_idx
        elif action_model == 1:  # AM模型
            """
            基于熵最大化选择动作
            π^D(O,s) = arg max_a∈A+(s) -κΣ_ri∈R P(ri|O∪(s,a))log₂P(ri|O∪(s,a))
            """  
            valid_actions = self.get_valid_actions(state, action_model)
            if not valid_actions:
                print("No valid actions available.")
                return 0  # 返回默认动作
            # 计算每个动作的信息熵
            max_entropy = float('-inf')
            for action_idx in valid_actions:
                # 创建包含当前动作的观测序列
                test_observation = self.observation_sequence.copy()
                test_observation.append((state, action_idx))

                # 计算选择该动作后的目标概率分布
                # goal_probs = self.calculate_goal_probabilities_action_value(test_observation)
                goal_probs = self.calculate_goal_probabilities_state_value(test_observation)
            
                # 计算熵
                entropy = 0
                for p in goal_probs.values():
                    if p > 0:
                        entropy -= p * np.log2(p)
                
                # 加权熵（乘以κ系数）
                weighted_entropy = entropy
                
                # 更新最小熵的动作
                if weighted_entropy > max_entropy:
                    max_entropy = weighted_entropy
                    best_action = action_idx
        ##这里要分两个阶段
        # 第一个是从起点到dcstar(和model有关系)
        # 第二个是从dcstar到goal

        else:  # action_model = 1,2,3
            if stage == 1:  # 第一阶段，从起点到dcstar
                if action_model == 2:  # Ambiguity-VDM
                    """
                    基于熵最大化选择动作
                    π^D(O,s) = arg max_a∈A+(s) -κΣ_ri∈R P(ri|O∪(s,a))log₂P(ri|O∪(s,a))
                    """
                    valid_actions = self.get_valid_actions(state, action_model)
                    if not valid_actions:
                        print("No valid actions available.")
                        return 0  # 返回默认动作 
                    # 计算每个动作的信息熵
                    max_entropy = float('-inf')
                    best_action = valid_actions[0]
                    
                    for action_idx in valid_actions:
                        # 创建包含当前动作的观测序列
                        test_observation = self.observation_sequence.copy()
                        test_observation.append((state, action_idx))
                        
                        # 计算选择该动作后的目标概率分布
                        # goal_probs = self.calculate_goal_probabilities_action_value(test_observation)
                        goal_probs = self.calculate_goal_probabilities_state_value(test_observation)
                    
                        # 计算熵
                        entropy = 0
                        for p in goal_probs.values():
                            if p > 0:
                                entropy -= p * np.log2(p)
                        
                        # 加权熵（乘以κ系数）
                        weighted_entropy = entropy
                        
                        # 更新最小熵的动作
                        if weighted_entropy > max_entropy:
                            max_entropy = weighted_entropy
                            best_action = action_idx

                elif action_model == 3:  # Exaggeration(δ=0)-VDM
                    valid_actions = self.prune_actions(state, self.dcstar_planner)
                    if not valid_actions:
                        print("No valid actions available.")
                        return 0  # 返回默认动作

                    max_improvement = float('-inf')
                    best_action = valid_actions[0]
                    # 选择能够最大化Vdcstar的动作 
                    for action_idx in valid_actions:
                        improvement =  self.calculate_value_improvement(self.dcstar_planner, state, action_idx) + sigma * (self.calculate_value_improvement(self.fake_planners[gi_index], state, action_idx) - self.calculate_value_improvement(self.true_planner, state, action_idx))
                        if improvement > max_improvement:
                            max_improvement = improvement
                            best_action = action_idx
            else:  # 第二阶段，从dcstar到goal
                valid_actions = []
                for action_idx in range(len(self.true_planner.actions)):
                    if not self.true_planner.is_valid_action(state, self.true_planner.actions[action_idx]):
                        continue
                    valid_actions.append(action_idx)  # 所有可以执行的动作都是有效的
                # 选择能够最大化真实目标价值提升的动作
                max_improvement = float('-inf')
                best_action = valid_actions[0]

                for action_idx in valid_actions:
                    improvement = self.calculate_value_improvement(self.true_planner, state, action_idx)
                    if improvement > max_improvement:
                        max_improvement = improvement
                        best_action = action_idx

        return best_action   
    
    def plan_path(self, start_state, dcstar, action_model, sigma, gi_index):
        """生成基于ValueDeceptive Model的路径"""
        current_state = start_state
        self.observation_sequence = []  # 初始化观测序列
        path = [current_state]
        # Honest智能体和AM智能体完全使用select_action函数中的第二段（从dcstar到goal)的动作选择模式
        if action_model == 0:  ##################### Honest
            while current_state != tuple(self.true_planner.goal):
                # 选择动作
                action_idx = self.select_action(current_state, stage=2, action_model=action_model, sigma=sigma, gi_index=gi_index)
                action = self.true_planner.actions[action_idx]
                # 更新概率历史
                self.update_probability_history(current_state, action_idx)
                
                # 执行动作
                next_state = self.true_planner.get_next_state(current_state, action)
                if next_state == current_state:  # 如果无法移动
                    break
                    
                current_state = next_state
                path.append(current_state)
                
                # 防止无限循环
                if len(path) > self.true_planner.width * self.true_planner.height:
                    print("Warning: Path planning stopped due to maximum length reached")
                    break
        elif action_model == 1: ################### AM模型           
            while current_state != tuple(self.true_planner.goal):
                # 基于熵最大化选择动作
                action_idx = self.select_action(current_state, stage=2, action_model=action_model, sigma=sigma, gi_index=gi_index)
                action = self.true_planner.actions[action_idx]
                # 更新概率历史
                self.update_probability_history(current_state, action_idx)                
                # 执行动作
                next_state = self.true_planner.get_next_state(current_state, action)
                if next_state == current_state:  # 如果无法移动
                    break

                current_state = next_state
                path.append(current_state)
                
                # 防止无限循环
                if len(path) > self.true_planner.width * self.true_planner.height:
                    break
        else:   #############action_model = 2Ambiguity-VDM、3Exaggeration(δ=？)-VDM，需要分段
            #第一段（从start到dcstar)
            if action_model == 2:    ########### Ambiguity-VDM 
                while current_state != dcstar:
                    # 基于熵最大化选择动作
                    action_idx = self.select_action(current_state, stage=1, action_model=action_model, sigma=sigma, gi_index=gi_index)
                    action = self.dcstar_planner.actions[action_idx]
                    # 更新概率历史
                    self.update_probability_history(current_state, action_idx)
                    # 执行动作
                    next_state = self.dcstar_planner.get_next_state(current_state, action)
                    if next_state == current_state:  # 如果无法移动
                        break

                    current_state = next_state
                    path.append(current_state)
                    
                    # 防止无限循环
                    if len(path) > self.dcstar_planner.width * self.dcstar_planner.height:
                        break
            if action_model == 3:  ############ Exaggeration(δ=0)-VDM
                while current_state != dcstar:
                    # 选择动作
                    action_idx = self.select_action(current_state, stage=1, action_model=action_model, sigma=sigma, gi_index=gi_index)
                    action = self.dcstar_planner.actions[action_idx]

                    # 更新概率历史
                    self.update_probability_history(current_state, action_idx)
                    
                    # 执行动作
                    next_state = self.dcstar_planner.get_next_state(current_state, action)
                    if next_state == current_state:  # 如果无法移动
                        break
  
                    current_state = next_state
                    path.append(current_state)
                    
                    # 防止无限循环
                    if len(path) > self.true_planner.width * self.true_planner.height:
                        print("Warning: Path planning stopped due to maximum length reached")
                        break  
            #第二段（从dcstar到goal）
            while current_state != tuple(self.true_planner.goal):
                # 选择动作
                action_idx = self.select_action(current_state, stage=2, action_model=action_model, sigma=sigma, gi_index=gi_index) #第二段就是Honest直接抵达目标状态
                action = self.true_planner.actions[action_idx]
                    
                # 更新概率历史
                self.update_probability_history(current_state, action_idx)
                    
                # 执行动作
                next_state = self.true_planner.get_next_state(current_state, action)
                if next_state == current_state:  # 如果无法移动
                        break
                        
                current_state = next_state
                path.append(current_state)
                    
                # 防止无限循环
                if len(path) > self.true_planner.width * self.true_planner.height:
                    print("Warning: Path planning stopped due to maximum length reached")
                    break
        return path

    def _plot_probability_curve(self, ax):
        """绘制概率变化曲线"""
        x = range(self.steps)
        
        # 绘制真实目标概率
        ax.plot(x, self.probability_history['true'], 
                label='True Goal', color=self.colors['true'],
                linewidth=2.5, marker='o', markersize=6)
        
        # 绘制假目标概率
        for i in range(len(self.fake_planners)):
            ax.plot(x, self.probability_history[f'fake_{i}'],
                    label=f'Fake Goal {i+1}', color=self.fake_colors[i],
                    linewidth=2.5, marker='o', markersize=6)
        
        ax.set_xlabel('Steps', fontsize=12)
        ax.set_ylabel('Probability', fontsize=12)
        ax.set_title('Goal Probability Evolution', fontsize=14, pad=20)
        ax.grid(True, alpha=0.3)
        ax.set_ylim(-0.05, 1.05)
        ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
        ax.tick_params(labelsize=10)

def visualize_path_with_probabilities(map_env, path, start_point, true_goal, fake_goals, planner, title="Path and True Goal Probability"):
    """
    创建一个包含路径图和概率变化图的组合可视化
    """
    # 创建一个2x1的子图布局
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 16))
    
    # 1. 在上面的子图中绘制路径
    # 设置背景颜色为浅灰色
    ax1.set_facecolor('#f8f9fa')
    
    # 创建网格底图
    height, width = map_env.shape
    for i in range(width + 1):
        ax1.axvline(i - 0.5, color='#dee2e6', linewidth=0.5)
    for i in range(height + 1):
        ax1.axhline(i - 0.5, color='#dee2e6', linewidth=0.5)
    
    # 绘制障碍物
    for y in range(height):
        for x in range(width):
            if map_env[y, x]:
                rect = plt.Rectangle(
                    (x-0.5, y-0.5), 1, 1,
                    facecolor='#343a40',
                    edgecolor='#212529',
                    linewidth=1,
                    alpha=0.8
                )
                ax1.add_patch(rect)
    
    # 绘制路径（使用红色实线）
    if path and len(path) > 1:
        path_array = np.array(path)
        ax1.plot(path_array[:, 0], path_array[:, 1], 
                 '-', color='red', linewidth=2.5)
        ax1.scatter(path_array[:, 0], path_array[:, 1], 
                   color='red', s=50, zorder=5)
    
    # 绘制起点（橙色）
    ax1.plot(start_point[0], start_point[1], 'o', color='orange',
             markersize=15, label='Start', zorder=6)
    
    # 绘制真实目标点（深蓝色）
    ax1.plot(true_goal[0], true_goal[1], 'o', color='darkblue',
             markersize=15, label='True Goal', zorder=6)
    
    # 绘制所有假目标点（浅蓝色）
    for fake_goal in fake_goals:
        ax1.plot(fake_goal[0], fake_goal[1], 'o', 
                color='lightblue',
                markersize=15, 
                label='Fake Goal', 
                zorder=6)
    
    # 设置图形属性
    ax1.set_title('Path Visualization', pad=15, fontsize=14)
    ax1.grid(True, alpha=0.2)
    ax1.legend(loc='upper right')
    ax1.set_xlim(-0.5, width-0.5)
    ax1.set_ylim(-0.5, height-0.5)
    ax1.set_aspect('equal')

    # 2. 在下面的子图中绘制概率变化
    total_steps = len(planner.probability_history['true'])
    x = np.linspace(0, 100, total_steps)  # 转换为百分比
    
    # 绘制真实目标概率（深蓝色）
    ax2.plot(x, planner.probability_history['true'], 
            label='True Goal', color='darkblue',
            linewidth=2.5, marker='o', markersize=6)
    
    # 绘制假目标概率（浅蓝色）
    for i in range(len(planner.fake_planners)):
        ax2.plot(x, planner.probability_history[f'fake_{i}'],
                label=f'Fake Goal {i+1}', color='lightblue',
                linewidth=2.5, marker='o', markersize=6)
    
    # 设置坐标轴和标签
    ax2.set_xlabel(f'Path Percentage (Total Steps: {total_steps})', fontsize=12)
    ax2.set_ylabel('Probability', fontsize=12)
    ax2.set_title('Goal Probability Evolution', fontsize=14, pad=20)
    ax2.grid(True, alpha=0.3)
    ax2.set_ylim(-0.05, 1.05)
    ax2.legend()
    ax2.tick_params(labelsize=10)

    # 调整子图之间的间距
    plt.tight_layout()
    plt.show()

def visualize_probability_evolution(planner, save_path='goal_probability_evolution.png'):
    """
    创建并保存目标概率演变的折线图
    """
    plt.figure(figsize=(12, 8))
    ax = plt.gca()
    
    total_steps = len(planner.probability_history['true'])
    x = np.linspace(0, 100, total_steps)  # 转换为百分比
    
    # 绘制真实目标概率（深蓝色）
    plt.plot(x, planner.probability_history['true'], 
            label='True Goal', color='darkblue',
            linewidth=2.5, marker='o', markersize=6)
    
    # 绘制假目标概率（浅蓝色）
    for i in range(len(planner.fake_planners)):
        plt.plot(x, planner.probability_history[f'fake_{i}'],
                label=f'Fake Goal {i+1}', color='lightblue',
                linewidth=2.5, marker='o', markersize=6)
    
    # 设置坐标轴和标签
    plt.xlabel(f'Path Progress ({total_steps} Steps)', fontsize=12)
    plt.ylabel('Probability', fontsize=12)
    plt.title('Goal Probability Evolution', fontsize=14, pad=20)
    
    # 设置x轴范围和刻度
    plt.xlim(0, 100)
    plt.xticks(np.linspace(0, 100, 11))  # 0%, 10%, 20%, ..., 100%
    
    # 设置y轴范围
    plt.ylim(-0.05, 1.05)
    
    # 添加网格
    plt.grid(True, alpha=0.3)
    
    # 优化图例
    plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    plt.tick_params(labelsize=10)
    
    # 调整布局以确保图例完全显示
    plt.tight_layout()
    
    # 保存图像
    plt.savefig(save_path, dpi=300, bbox_inches='tight')
    plt.close()

def visualize_path(map_env, path, start_point, true_goal, fake_goals, title="Value Deceptive Path", save_path='value_deceptive_path.png'):
    """使用红色路径和指定颜色的标记点可视化路径并保存"""
    plt.figure(figsize=(12, 10))
    ax = plt.gca()
    
    # 设置背景颜色为浅灰色
    ax.set_facecolor('#f8f9fa')
    
    # 创建网格底图
    height, width = map_env.shape
    for i in range(width + 1):
        ax.axvline(i - 0.5, color='#dee2e6', linewidth=0.5)
    for i in range(height + 1):
        ax.axhline(i - 0.5, color='#dee2e6', linewidth=0.5)
    
    # 绘制障碍物
    for y in range(height):
        for x in range(width):
            if map_env[y, x]:
                rect = plt.Rectangle(
                    (x-0.5, y-0.5), 1, 1,
                    facecolor='#343a40',
                    edgecolor='#212529',
                    linewidth=1,
                    alpha=0.8
                )
                ax.add_patch(rect)
    
    # 绘制路径（使用红色实线）
    if path and len(path) > 1:
        path_array = np.array(path)
        plt.plot(path_array[:, 0], path_array[:, 1], 
                '-', color='red', linewidth=2.5)
        plt.scatter(path_array[:, 0], path_array[:, 1], 
                   color='red', s=50, zorder=5)
    
    # 绘制起点（橙色）
    plt.plot(start_point[0], start_point[1], 'o', color='orange',
             markersize=15, label='Start', zorder=6)
    
    # 绘制真实目标点（深蓝色）
    plt.plot(true_goal[0], true_goal[1], 'o', color='darkblue',
             markersize=15, label='True Goal', zorder=6)
    
    # 绘制所有假目标点（浅蓝色）
    for fake_goal in fake_goals:
        plt.plot(fake_goal[0], fake_goal[1], 'o', 
                color='lightblue',
                markersize=15, 
                label='Fake Goal', 
                zorder=6)
    
    # 设置图形属性
    plt.title(title, pad=15, fontsize=14)
    plt.grid(True, alpha=0.2)
    plt.legend(loc='upper right')
    plt.xlim(-0.5, width-0.5)
    plt.ylim(-0.5, height-0.5)
    plt.gca().set_aspect('equal')
    
    # 保存图像
    plt.savefig(save_path, dpi=300, bbox_inches='tight')
    plt.close()

def main(action_model=0, sigma=0):
    # 从文件读取问题配置
    with open('problem_configs.txt', 'r', encoding='utf-8') as f:
        config_content = f.read()
    # 去掉开头的"problem_configs = "部分
    config_content = config_content.replace("problem_configs = ", "")
    # 使用eval将字符串转换为字典（确保输入文件内容可信）
    problem_configs = eval(config_content)
    
    # 为每个地图生成策略
    for problem_name, config in problem_configs.items():
        map_name = config["map"]
        print(f"\nProcessing problem: {problem_name} with map {map_name}")
        start = config["start"]
        true_goal = config["true_goal"]
        fake_goals = config["fake_goals"]
    
        # 真目标：加载策略、创建规划器
        true_policy_file = f"{map_name.split('.')[0]}_true_goal_{true_goal[0]}_{true_goal[1]}_policy.npz"
        true_planner = PolicyIterationPlanner(map_name, start, true_goal)
        true_planner.policy, true_planner.value_function, true_planner.q_table = load_policy(true_policy_file)
        
        # 假目标：加载策略、创建规划器
        fake_planners = []
        for fake_goal in fake_goals:
            fake_policy_file = f"{map_name.split('.')[0]}_fake_goal_{fake_goal[0]}_{fake_goal[1]}_policy.npz"
            fake_planner = PolicyIterationPlanner(map_name, start, fake_goal)
            fake_planner.policy, fake_planner.value_function, fake_planner.q_table = load_policy(fake_policy_file)
            fake_planners.append(fake_planner)

        # dcstar：加载两个策略、创建规划器
        D, dcstar, gi_index = load_states_from_file(f"{problem_name}_D_dcstar_gi.txt")
        dcstar_policy_file = f"{problem_name}_dcstar_{dcstar[0]}_{dcstar[1]}_policy.npz"
        dcstar_planner = dcstar_PolicyIterationPlanner(map_name, start, dcstar)
        dcstar_planner.policy, dcstar_planner.value_function, dcstar_planner.q_table = load_policy(dcstar_policy_file)
        # 创建Value Deceptive Model规划器
        print("Creating Value Deceptive Model planner...")
        vdm_planner = ValueDeceptiveModelPlanner(
            true_planner=true_planner,
            fake_planners=fake_planners,
            dcstar_planner=dcstar_planner,  # 使用dcstar规划器
            delta=0,  # 设置δ阈值
            sigma=0,  # 设置σ系数
            gi_index=gi_index  # 设置gi索引
        )
        # 创建保存路径
        trajectorys_dir = "trajectorys"
        if not os.path.exists(trajectorys_dir):
            os.makedirs(trajectorys_dir)
            
        # 生成基于Value Deceptive Model的路径
        print("Planning value deceptive path...")
        vdm_path = vdm_planner.plan_path(start, dcstar, action_model=action_model, sigma=sigma, gi_index=gi_index)

        # 生成文件名
        trajectory_img_filename = os.path.join(trajectorys_dir, f"{problem_name}_action{action_model}_trajectory.png")
        probability_filename = os.path.join(trajectorys_dir, f"{problem_name}_action{action_model}_probability.png")
        trajectory_txt_filename = os.path.join(trajectorys_dir, f"{problem_name}_action{action_model}_trajectory.txt")

        # 保存概率演变图
        print("Generating and saving probability evolution plot...")
        visualize_probability_evolution(vdm_planner, probability_filename)

        # 保存路径图和轨迹文本
        print("Generating and saving path visualization and trajectory data...")
        visualize_path(true_planner.map, vdm_path, start, true_goal, fake_goals, 
                    save_path=trajectory_img_filename)
        save_trajectory_to_file(vdm_path, trajectory_txt_filename)
            
        # 打印统计信息
        print(f"Value deceptive path length: {len(vdm_path)}")
        print(f"Value deceptive path reached true goal: {tuple(vdm_path[-1]) == tuple(true_goal)}")
    else:
        print("Action model not supported. Please choose a valid action model (0-4).")


if __name__ == "__main__":
    # action_model = 0  # Honest
    # action_model = 1  # AM
    # action_model = 2  # Ambiguity-VDM
    # action_model = 3  # Exaggeration(δ=0)-VDM
    # main(action_model=0)
    # main(action_model=1)
    # main(action_model=2)
    main(action_model=3,sigma=0.66)  # Exaggeration(δ=1)-VDM