# import json
# import base64
# import os
# import html # <-- 新增导入

# # --- 1. 配置区域 ---
# # [必须修改] 指向你带有ID的JSON文件
# json_file_path = "dataset_with_ids.json" 

# MAX_ITEMS_TO_VISUALIZE = 500
# output_html_file = "review_visualization_with_item.html"

# # --- 2. 核心功能代码 ---

# def image_to_base64(image_path):
#     """读取图片文件并将其转换为Base64编码的字符串"""
#     try:
#         with open(image_path, "rb") as image_file:
#             return f"data:image/png;base64,{base64.b64encode(image_file.read()).decode('utf-8')}"
#     except Exception:
#         return ""

# def generate_review_visualization():
#     """主函数，从文件加载数据并生成包含完整Item信息的可视化页面"""
#     print(f"正在从文件加载JSON数据: {json_file_path}")
#     if not os.path.exists(json_file_path):
#         print(f"错误：JSON文件未找到！请检查路径: {os.path.abspath(json_file_path)}")
#         return
#     with open(json_file_path, "r", encoding="utf-8") as f:
#         full_data = json.load(f)

#     data_to_process = full_data[:MAX_ITEMS_TO_VISUALIZE]
    
#     html_content = """
#     <!DOCTYPE html>
#     <html lang="zh-CN">
#     <head>
#         <meta charset="UTF-8">
#         <title>答案审查可视化 (带完整Item)</title>
#         <style>
#             body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif; background-color: #f0f2f5; margin: 0; padding: 20px; }
#             .container { max-width: 900px; margin: auto; }
#             .item-card { background: #fff; border: 1px solid #ddd; border-radius: 8px; margin-bottom: 20px; padding: 20px; box-shadow: 0 2px 4px rgba(0,0,0,0.1); }
#             .item-card h2 { display: flex; justify-content: space-between; align-items: center; margin-top:0; }
#             .item-card .unique-id { font-size: 0.8em; color: #6c757d; background-color: #f8f9fa; padding: 3px 8px; border-radius: 4px; font-family: monospace; }
#             .item-card img { max-width: 100%; border-radius: 4px; margin-bottom: 15px; }
#             .answer-box { font-weight: bold; font-size: 1.2em; padding: 5px 12px; border-radius: 5px; display: inline-block; margin-left: 10px; }
#             .correct { color: #28a745; background-color: #eaf6ec; }
#             .incorrect { color: #dc3545; background-color: #fdeeee; }
#             details { margin-top: 15px; border: 1px solid #eee; border-radius: 4px; }
#             summary { padding: 10px; cursor: pointer; background-color: #f8f9fa; font-weight: bold; }
#             pre { background-color: #2b2b2b; color: #f8f8f2; padding: 15px; border-radius: 4px; white-space: pre-wrap; word-wrap: break-word; font-family: 'Courier New', Courier, monospace; font-size: 0.9em; }
#         </style>
#     </head>
#     <body>
#         <div class="container">
#             <h1>答案审查可视化 (带完整Item)</h1>
#     """
    
#     for item in data_to_process:
#         # 提取关键信息
#         unique_id = item.get("unique_id", "无ID")
#         gt = item.get("output")
#         extracted = item.get("extracted_answer")
#         is_correct = (gt and extracted and gt == extracted)
        
#         # --- 新增：将整个 item 字典格式化为漂亮的 JSON 字符串 ---
#         # 使用 html.escape 防止JSON中的特殊字符破坏HTML结构
#         item_json_string = html.escape(json.dumps(item, indent=4, ensure_ascii=False))

#         html_content += f"""
#         <div class="item-card">
#             <h2>
#                 <span>数据审查</span>
#                 <span class="unique-id" title="唯一ID，用于快速定位">{unique_id}</span>
#             </h2>
#             <img src="{image_to_base64(item.get('images', {}).get('img_hist0', ''))}" alt="Image not found">
#             <p><strong>指令:</strong> {item.get("instruction", "")}</p>
#             <p><strong>标准答案 (Ground Truth):</strong> <span class="answer-box">{gt}</span></p>
#             <p><strong>提取答案 (Extracted):</strong> <span class="answer-box {'correct' if is_correct else 'incorrect'}">{extracted}</span></p>
            
#             <details>
#                 <summary>查看完整Item数据</summary>
#                 <pre><code>{item_json_string}</code></pre>
#             </details>
#         </div>
#         """
        
#     html_content += "</div></body></html>"
    
#     with open(output_html_file, "w", encoding="utf-8") as f:
#         f.write(html_content)
        
#     print(f"✨ 可视化完成！请在浏览器中打开文件: {os.path.abspath(output_html_file)}")

# if __name__ == "__main__":
#     generate_review_visualization()


# import json
# import os
# from tqdm import tqdm
# import base64
# import io
# try:
#     from PIL import Image
# except ImportError:
#     print("错误：缺少Pillow库。请先运行 'pip install Pillow' 进行安装。")
#     exit()

# # --- 1. 配置区域 ---

# # [必须修改] 指向你带有ID的JSON文件
# INPUT_JSON_PATH = r"D:\ecnu\2025\nvi\refine\base_test_task2.json" 

# # [可选修改] 输出的HTML文件名
# OUTPUT_HTML_FILE = "dataset_editor_fast.html"

# # [重要-性能] 设置图片在网页中显示的目标宽度（像素）
# # 较小的值（如400）会使页面加载飞快，但图片清晰度会降低。
# # 较大的值（如800）图片更清晰，但文件会稍大。
# IMAGE_TARGET_WIDTH = 400


# def resize_and_encode_image(image_path, target_width):
#     """
#     读取图片、按比例缩放，并将其转换为Base64编码的字符串。
#     """
#     if not image_path or not os.path.exists(image_path):
#         return ""
        
#     try:
#         with Image.open(image_path) as img:
#             # 保持宽高比进行缩放
#             original_width, original_height = img.size
#             if original_width == 0: return "" # 避免除零错误
            
#             aspect_ratio = original_height / original_width
#             target_height = int(target_width * aspect_ratio)
            
#             # 使用高质量的LANCZOS算法进行缩放
#             resized_img = img.resize((target_width, target_height), Image.Resampling.LANCZOS)
            
#             # 将缩放后的图片保存在内存中的字节流里
#             buffer = io.BytesIO()
#             resized_img.save(buffer, format="PNG") # 使用PNG格式以保证质量
#             image_bytes = buffer.getvalue()
            
#             # 对内存中的字节进行Base64编码
#             return base64.b64encode(image_bytes).decode('utf-8')

#     except Exception as e:
#         print(f"警告：处理图片 {image_path} 时发生错误: {e}")
#         return ""


# def generate_interactive_editor():
#     """主函数，生成一个加载速度极快的交互式HTML页面。"""
    
#     print(f"正在从 {INPUT_JSON_PATH} 加载数据...")
#     with open(INPUT_JSON_PATH, "r", encoding="utf-8") as f:
#         data = json.load(f)

#     # --- 关键优化：在Python中预处理图片，进行缩放和编码 ---
#     print(f"正在预处理图片，将其统一缩放至宽度 {IMAGE_TARGET_WIDTH}px 并进行编码...")
#     for item in tqdm(data, desc="图片优化处理"):
#         image_path = item.get("images", {}).get("img_hist0", "")
#         # 调用新的、带缩放功能的函数
#         item["base64_image"] = resize_and_encode_image(image_path, target_width=IMAGE_TARGET_WIDTH)
    
#     json_data_for_html = json.dumps(data, ensure_ascii=False)

#     # --- HTML 模板 (与上一版相同，无需改动) ---
#     html_template = f"""
#     <!DOCTYPE html>
#     <html lang="zh-CN">
#     <head>
#         <meta charset="UTF-8">
#         <title>交互式答案审查与修正工具 (快速加载版)</title>
#         <style>
#             body {{ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif; background-color: #f0f2f5; margin: 0; padding: 20px; }}
#             .container {{ max-width: 900px; margin: auto; }}
#             .header {{ position: sticky; top: 0; background-color: rgba(255, 255, 255, 0.95); padding: 15px; border-radius: 8px; box-shadow: 0 2px 8px rgba(0,0,0,0.1); z-index: 10; margin-bottom: 20px; text-align: center; }}
#             .header button {{ font-size: 1.1em; padding: 10px 20px; margin: 0 10px; border: none; border-radius: 5px; cursor: pointer; }}
#             .export-btn {{ background-color: #28a745; color: white; }}
#             .filter-btn {{ background-color: #6c757d; color: white; }}
#             .item-card {{ background: #fff; border: 1px solid #ddd; border-radius: 8px; margin-bottom: 20px; padding: 20px; box-shadow: 0 2px 4px rgba(0,0,0,0.1); }}
#             .card-header {{ display: flex; justify-content: space-between; align-items: center; margin-bottom: 15px; }}
#             .card-header h2 {{ margin: 0; }}
#             .card-header .unique-id {{ font-size: 0.8em; color: #6c757d; background-color: #f8f9fa; padding: 3px 8px; border-radius: 4px; font-family: monospace; }}
#             .delete-btn {{ background-color: #dc3545; color: white; border: none; padding: 5px 10px; font-size: 0.9em; border-radius: 4px; cursor: pointer; }}
#             .item-card img {{ max-width: 100%; border-radius: 4px; margin-bottom: 15px; }}
#             .answer-section p {{ display: flex; align-items: center; justify-content: space-between; }}
#             .radio-group label {{ margin-left: 15px; font-size: 1.2em; cursor: pointer; }}
#             .answer-box {{ font-weight: bold; font-size: 1.2em; padding: 5px 12px; border-radius: 5px; display: inline-block; }}
#             .correct {{ color: #28a745; background-color: #eaf6ec; }}
#             .incorrect {{ color: #dc3545; background-color: #fdeeee; }}
#             details {{ margin-top: 15px; border: 1px solid #eee; border-radius: 4px; }}
#             summary {{ padding: 8px; cursor: pointer; background-color: #f8f9fa; font-weight: bold; }}
#             pre {{ background-color: #2b2b2b; color: #f8f8f2; padding: 15px; border-radius: 4px; white-space: pre-wrap; word-wrap: break-word; font-size: 0.9em; }}
#         </style>
#     </head>
#     <body>
#         <div class="container">
#             <div class="header">
#                 <button class="export-btn" onclick="exportJSON()">导出修改后的JSON</button>
#                 <button class="filter-btn" onclick="filterCards('all')">显示全部</button>
#                 <button class="filter-btn" onclick="filterCards('mismatch')">只看错误项</button>
#             </div>
#             <div id="card-container"></div>
#         </div>

#         <script>
#             let dataset = {json_data_for_html};
#             const cardContainer = document.getElementById('card-container');

#             // JavaScript部分完全无需修改，因为图片数据已经由Python处理好了
#             function renderCards() {{
#                 cardContainer.innerHTML = '';
#                 dataset.forEach(item => {{
#                     const isCorrect = item.output === item.extracted_answer;
#                     const cardClass = isCorrect ? 'item-card' : 'item-card mismatch';
#                     const card = document.createElement('div');
#                     card.className = cardClass;
#                     card.id = `card-${{item.unique_id}}`;
#                     const itemJsonString = JSON.stringify(item, null, 4);
                    
#                     card.innerHTML = `
#                         <div class="card-header">
#                             <h2><span class="unique-id">${{item.unique_id}}</span></h2>
#                             <button class="delete-btn" onclick="deleteItem('${{item.unique_id}}')">删除</button>
#                         </div>
#                         <img src="data:image/png;base64,${{item.base64_image}}" alt="图片加载失败或不存在">
#                         <p><strong>指令:</strong> ${{item.instruction || ''}}</p>
#                         <div class="answer-section">
#                             <p><strong>标准答案 (可修改):</strong><span class="radio-group">${{['A', 'B', 'C', 'D'].map(opt => `<label><input type="radio" name="gt_${{item.unique_id}}" value="${{opt}}" ${{item.output === opt ? 'checked' : ''}} onchange="updateAnswer('${{item.unique_id}}', '${{opt}}')">${{opt}}</label>`).join('')}}</span></p>
#                             <p><strong>提取答案 (Extracted):</strong><span class="answer-box ${{isCorrect ? 'correct' : 'incorrect'}}">${{item.extracted_answer || 'N/A'}}</span></p>
#                         </div>
#                         <details><summary>查看完整Item数据</summary><pre><code>${{itemJsonString.replace(/</g, "&lt;").replace(/>/g, "&gt;")}}</code></pre></details>
#                     `;
#                     cardContainer.appendChild(card);
#                 }});
#             }}
#             function updateAnswer(uniqueId, newAnswer) {{ /* ... */ }}
#             function deleteItem(uniqueId) {{ /* ... */ }}
#             function filterCards(filterType) {{ /* ... */ }}
#             function exportJSON() {{ /* ... */ }}
            
#             // 为了简洁，省略了JS函数的具体实现，它们和上一版完全相同
#             function updateAnswer(uniqueId, newAnswer){{const item = dataset.find(d => d.unique_id === uniqueId);if (item) {{item.output = newAnswer;console.log(`Updated ${{uniqueId}}: new answer is ${{newAnswer}}`);}}}}
#             function deleteItem(uniqueId){{if (confirm(`您确定要删除 ID 为 "${{uniqueId}}" 的这条数据吗？`)){{const index = dataset.findIndex(d => d.unique_id === uniqueId);if (index > -1){{dataset.splice(index, 1);const cardElement = document.getElementById(`card-${{uniqueId}}`);if (cardElement){{cardElement.remove();}}console.log(`Deleted item ${{uniqueId}}`);}}}}}}
#             function filterCards(filterType){{const allCards = document.querySelectorAll('.item-card');allCards.forEach(card => {{if (filterType === 'all' || (filterType === 'mismatch' && card.classList.contains('mismatch'))){{card.style.display = 'block';}} else {{card.style.display = 'none';}}}});}}
#             function exportJSON(){{const cleanDataset = dataset.map(item => {{const newItem = {{...item}};delete newItem.base64_image;return newItem;}});const filename = 'corrected_dataset.json';const jsonStr = JSON.stringify(cleanDataset, null, 4);const blob = new Blob([jsonStr], {{ type: 'application/json' }});const url = URL.createObjectURL(blob);const a = document.createElement('a');a.href = url;a.download = filename;document.body.appendChild(a);a.click();document.body.removeChild(a);URL.revokeObjectURL(url);alert(`JSON文件已开始下载！共包含 ${{cleanDataset.length}} 条数据。`);}}

#             renderCards();
#         </script>
#     </body>
#     </html>
#     """

#     with open(OUTPUT_HTML_FILE, "w", encoding="utf-8") as f:
#         f.write(html_template)
        
#     print(f"✨ 交互式编辑器生成完毕！请在浏览器中打开文件: {os.path.abspath(OUTPUT_HTML_FILE)}")


# if __name__ == "__main__":
#     generate_interactive_editor()


import matplotlib.pyplot as plt
import numpy as np

# --- 1. 准备数据 ---
# 请在这里替换成您的真实数据
# 12个类别的名称
categories = [
    'Bed', 'Chair', 'Table', 'Sofa', 'Cabinet', 'Door',
    'Window', 'Lamp', 'TV', 'Fireplace', 'Stairs', 'Plant'
]

# 三个数据集的数据 (每个数据集包含12个类别的值)
# 注意：为了演示效果，我编造了三个规模差异明显的数据集
dataset1_data = {
    'name': 'Dataset A',
    'values': [8000, 15000, 12000, 7000, 9000, 11000, 13000, 6000, 5000, 2000, 3000, 4000]
}
dataset2_data = {
    'name': 'Dataset B',
    'values': [25000, 40000, 35000, 20000, 30000, 32000, 38000, 18000, 15000, 7000, 9000, 12000]
}
# 假设这是您自己的数据集，规模更大
dataset3_data = {
    'name': 'Ours',
    'values': [150000, 250000, 200000, 120000, 180000, 220000, 240000, 110000, 90000, 40000, 60000, 80000]
}

# --- 2. 绘图设置 ---
# 设置条形的位置
x = np.arange(len(categories))  # a list of positions for the category groups
width = 0.25  # the width of the bars

# 创建图和坐标轴
fig, ax = plt.subplots(figsize=(16, 8))

# 绘制三个数据集的条形 (使用饱和度更低的颜色)
rects1 = ax.bar(x - width, dataset1_data['values'], width, label=dataset1_data['name'], color='#a1c9f4')
rects2 = ax.bar(x, dataset2_data['values'], width, label=dataset2_data['name'], color='#b2e0a2')
rects3 = ax.bar(x + width, dataset3_data['values'], width, label=dataset3_data['name'], color='#fbc49f')

# --- 3. 美化图表 (适用于论文) ---
# 添加标题和轴标签
ax.set_title('Comparison of Dataset Scales by Category', fontsize=18, fontweight='bold')
ax.set_ylabel('Occupancy Count (Log Scale)', fontsize=14)
ax.set_xlabel('Object Categories', fontsize=14)

# 设置X轴的刻度和标签
ax.set_xticks(x)
ax.set_xticklabels(categories, rotation=45, ha="right", fontsize=12)

# **关键步骤：使用对数刻度来凸显尺度差异**
ax.set_yscale('log')

# 添加图例
ax.legend(fontsize=12)

# 添加网格线，使图表更易读
ax.grid(axis='y', linestyle='--', alpha=0.7)

# 自动调整布局，防止标签重叠
fig.tight_layout()

# --- 4. 保存和显示图表 ---
# 保存为高分辨率PDF文件 (推荐用于LaTeX/Word)
plt.savefig("dataset_scale_comparison.pdf", format='pdf', bbox_inches='tight')
# 或者保存为高DPI的PNG图片
# plt.savefig("dataset_scale_comparison.png", dpi=300, bbox_inches='tight')

# 显示图表
plt.show()

# 也可以计算并打印每个数据集的总数，用于在论文中报告
total_1 = sum(dataset1_data['values'])
total_2 = sum(dataset2_data['values'])
total_3 = sum(dataset3_data['values'])

print(f"Total Occupancy for {dataset1_data['name']}: {total_1:,}")
print(f"Total Occupancy for {dataset2_data['name']}: {total_2:,}")
print(f"Total Occupancy for {dataset3_data['name']}: {total_3:,}")

