#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import json
from collections import Counter
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import os

# 配置：输入 / 输出 文件路径
META_PATH      = './data/leetcode/dataset/problem-meta.json'
OUTPUT_DIR     = './data/leetcode/'
DIFF_PIE_FILE  = 'difficulty_distribution.png'
TAG_PIE_FILE   = 'tag_distribution.png'
TOP_N_TAGS     = 10  # 只展示出现次数前 N 的标签，其余归为 Other

os.makedirs(OUTPUT_DIR, exist_ok=True)
diff_output_path = os.path.join(OUTPUT_DIR, DIFF_PIE_FILE)
tag_output_path  = os.path.join(OUTPUT_DIR, TAG_PIE_FILE)

# 1. 读取 JSON
with open(META_PATH, 'r', encoding='utf-8') as f:
    data = json.load(f)

# 2. 统计难度分布
difficulties = [info.get('difficulty', '').upper() for info in data.values()]
diff_counter = Counter(difficulties)

# 3. 统计所有标签
tag_counter = Counter()
for info in data.values():
    for tag in info.get('topicTags', []):
        tag_counter[tag] += 1

# 4. 打印结果
print("=== Difficulty Distribution ===")
for level in ['EASY', 'MEDIUM', 'HARD']:
    print(f"{level:6s}: {diff_counter.get(level, 0)}")

print("\n=== Tag Counts ===")
for tag, cnt in tag_counter.most_common():
    print(f"{tag:20s}: {cnt}")

# 5. 绘制并保存难度分布饼图
labels = []
sizes  = []
for level in ['EASY', 'MEDIUM', 'HARD']:
    cnt = diff_counter.get(level, 0)
    if cnt > 0:
        labels.append(level)
        sizes.append(cnt)

plt.figure(figsize=(6,6))
plt.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90)
plt.title("Problem Difficulty Distribution")
plt.axis('equal')
plt.savefig(diff_output_path, dpi=300, bbox_inches='tight')
plt.close()
print(f'✅ 难度分布饼图已保存到：{diff_output_path}')

# 6. 准备标签分布数据（Top N + Other）
most_common = tag_counter.most_common(TOP_N_TAGS)
top_labels, top_counts = zip(*most_common)
other_count = sum(tag_counter.values()) - sum(top_counts)

pie_labels = list(top_labels) + (['Other'] if other_count>0 else [])
pie_sizes  = list(top_counts) + ([other_count] if other_count>0 else [])

cmap = cm.get_cmap('tab20')
colors = cmap(range(len(pie_sizes)))

# 7. 绘制并保存标签分布饼图
plt.figure(figsize=(8,8))
plt.pie(pie_sizes, labels=pie_labels, autopct='%1.1f%%', startangle=90, colors=colors)
plt.title(f"Top {TOP_N_TAGS} Tags Distribution")
plt.axis('equal')
plt.savefig(tag_output_path, dpi=300, bbox_inches='tight')
plt.close()
print(f'✅ 标签分布饼图已保存到：{tag_output_path}')
