{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "47b25cef",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "==== 16 种 MBTI 类型统计 ====\n",
      "ENTP: 210 (6.2%)\n",
      "ESTP: 215 (6.3%)\n",
      "INFP: 224 (6.6%)\n",
      "ENFP: 212 (6.2%)\n",
      "ENFJ: 213 (6.3%)\n",
      "ISFJ: 210 (6.2%)\n",
      "ESFP: 207 (6.1%)\n",
      "ISTJ: 206 (6.1%)\n",
      "ESFJ: 218 (6.4%)\n",
      "ESTJ: 212 (6.2%)\n",
      "ISFP: 219 (6.4%)\n",
      "ISTP: 210 (6.2%)\n",
      "INFJ: 212 (6.2%)\n",
      "INTP: 214 (6.3%)\n",
      "INTJ: 212 (6.2%)\n",
      "ENTJ: 210 (6.2%)\n",
      "\n",
      "==== 四个维度统计 ====\n",
      "E: 1697 (49.9%)\n",
      "I: 1707 (50.1%)\n",
      "S: 1697 (49.9%)\n",
      "N: 1707 (50.1%)\n",
      "T: 1689 (49.6%)\n",
      "F: 1715 (50.4%)\n",
      "J: 1693 (49.7%)\n",
      "P: 1711 (50.3%)\n"
     ]
    }
   ],
   "source": [
    "import json\n",
    "from collections import Counter\n",
    "\n",
    "# 读取 JSON 文件\n",
    "with open(\"test.json\", \"r\", encoding=\"utf-8\") as f:\n",
    "    data = json.load(f)\n",
    "\n",
    "# 提取 type 字段\n",
    "types = [item[\"type\"].upper() for item in data if \"type\" in item]\n",
    "\n",
    "# 1. 统计每种 MBTI 类型数量\n",
    "type_count = Counter(types)\n",
    "total_types = sum(type_count.values())\n",
    "\n",
    "# 2. 分维度统计\n",
    "dimension_count = {\n",
    "    \"E\": 0, \"I\": 0,\n",
    "    \"S\": 0, \"N\": 0,\n",
    "    \"T\": 0, \"F\": 0,\n",
    "    \"J\": 0, \"P\": 0\n",
    "}\n",
    "\n",
    "for t in types:\n",
    "    dimension_count[t[0]] += 1  # E/I\n",
    "    dimension_count[t[1]] += 1  # S/N\n",
    "    dimension_count[t[2]] += 1  # T/F\n",
    "    dimension_count[t[3]] += 1  # J/P\n",
    "\n",
    "# ========== 输出结果 ==========\n",
    "print(\"==== 16 种 MBTI 类型统计 ====\")\n",
    "for mbti, count in type_count.items():\n",
    "    print(f\"{mbti}: {count} ({count/total_types:.1%})\")\n",
    "\n",
    "print(\"\\n==== 四个维度统计 ====\")\n",
    "for dim, count in dimension_count.items():\n",
    "    print(f\"{dim}: {count} ({count/total_types:.1%})\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "8bfca2a6",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "==== 16 种 MBTI 类型统计 ====\n",
      "INTP: 332458 (26.4%)\n",
      "ENTP: 108536 (8.6%)\n",
      "ENTJ: 50234 (4.0%)\n",
      "INFP: 111786 (8.9%)\n",
      "INTJ: 311165 (24.7%)\n",
      "INFJ: 119530 (9.5%)\n",
      "ENFJ: 17575 (1.4%)\n",
      "ISFJ: 11517 (0.9%)\n",
      "ISTP: 57552 (4.6%)\n",
      "ISTJ: 41010 (3.3%)\n",
      "ESTP: 8893 (0.7%)\n",
      "ENFP: 60466 (4.8%)\n",
      "ESFJ: 3461 (0.3%)\n",
      "ESTJ: 4856 (0.4%)\n",
      "ESFP: 7343 (0.6%)\n",
      "ISFP: 14853 (1.2%)\n",
      "\n",
      "==== 四个维度统计 ====\n",
      "E: 261364 (20.7%)\n",
      "I: 999871 (79.3%)\n",
      "S: 149485 (11.9%)\n",
      "N: 1111750 (88.1%)\n",
      "T: 914704 (72.5%)\n",
      "F: 346531 (27.5%)\n",
      "J: 559348 (44.3%)\n",
      "P: 701887 (55.7%)\n"
     ]
    }
   ],
   "source": [
    "import json\n",
    "from collections import Counter\n",
    "\n",
    "# 读取 JSON 文件\n",
    "with open(\"filtered_processed_comments_300.json\", \"r\", encoding=\"utf-8\") as f:\n",
    "    data = json.load(f)\n",
    "\n",
    "# 提取 type 字段\n",
    "types = [item[\"type\"].upper() for item in data if \"type\" in item]\n",
    "\n",
    "# 1. 统计每种 MBTI 类型数量\n",
    "type_count = Counter(types)\n",
    "total_types = sum(type_count.values())\n",
    "\n",
    "# 2. 分维度统计\n",
    "dimension_count = {\n",
    "    \"E\": 0, \"I\": 0,\n",
    "    \"S\": 0, \"N\": 0,\n",
    "    \"T\": 0, \"F\": 0,\n",
    "    \"J\": 0, \"P\": 0\n",
    "}\n",
    "\n",
    "for t in types:\n",
    "    dimension_count[t[0]] += 1  # E/I\n",
    "    dimension_count[t[1]] += 1  # S/N\n",
    "    dimension_count[t[2]] += 1  # T/F\n",
    "    dimension_count[t[3]] += 1  # J/P\n",
    "\n",
    "# ========== 输出结果 ==========\n",
    "print(\"==== 16 种 MBTI 类型统计 ====\")\n",
    "for mbti, count in type_count.items():\n",
    "    print(f\"{mbti}: {count} ({count/total_types:.1%})\")\n",
    "\n",
    "print(\"\\n==== 四个维度统计 ====\")\n",
    "for dim, count in dimension_count.items():\n",
    "    print(f\"{dim}: {count} ({count/total_types:.1%})\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "e0d2225d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Traits      Initial      Modified   0   1   2 MBTI Type\n",
      "   I/E 6676 (77.0%) 17069 (50.2%) NaN NaN NaN       NaN\n",
      "       1999 (23.0%) 16963 (49.8%) NaN NaN NaN       NaN\n",
      "   S/N 1197 (13.8%) 16968 (49.9%) NaN NaN NaN       NaN\n",
      "       7478 (86.2%) 17064 (50.1%) NaN NaN NaN       NaN\n",
      "   T/F 3981 (45.9%) 16898 (49.7%) NaN NaN NaN       NaN\n",
      "       4694 (54.1%) 17134 (50.3%) NaN NaN NaN       NaN\n",
      "   P/J 5241 (60.4%) 17104 (50.3%) NaN NaN NaN       NaN\n",
      "       3434 (39.6%) 16928 (49.7%) NaN NaN NaN       NaN\n",
      "   NaN          NaN           NaN                   NaN\n",
      "   NaN   190 (2.2%)   2126 (6.2%) NaN NaN NaN      ENFJ\n",
      "   NaN   675 (7.8%)   2117 (6.2%) NaN NaN NaN      ENFP\n",
      "   NaN   231 (2.7%)   2105 (6.2%) NaN NaN NaN      ENTJ\n",
      "   NaN   685 (7.9%)   2102 (6.2%) NaN NaN NaN      ENTP\n",
      "   NaN    42 (0.5%)   2177 (6.4%) NaN NaN NaN      ESFJ\n",
      "   NaN    48 (0.6%)   2068 (6.1%) NaN NaN NaN      ESFP\n",
      "   NaN    39 (0.4%)   2120 (6.2%) NaN NaN NaN      ESTJ\n",
      "   NaN    89 (1.0%)   2148 (6.3%) NaN NaN NaN      ESTP\n",
      "   NaN 1470 (16.9%)   2120 (6.2%) NaN NaN NaN      INFJ\n",
      "   NaN 1832 (21.1%)   2235 (6.6%) NaN NaN NaN      INFP\n",
      "   NaN 1091 (12.6%)   2115 (6.2%) NaN NaN NaN      INTJ\n",
      "   NaN 1304 (15.0%)   2144 (6.3%) NaN NaN NaN      INTP\n",
      "   NaN   166 (1.9%)   2103 (6.2%) NaN NaN NaN      ISFJ\n",
      "   NaN   271 (3.1%)   2188 (6.4%) NaN NaN NaN      ISFP\n",
      "   NaN   205 (2.4%)   2062 (6.1%) NaN NaN NaN      ISTJ\n",
      "   NaN   337 (3.9%)   2102 (6.2%) NaN NaN NaN      ISTP\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "\n",
    "# ================== 原始数据 ==================\n",
    "initial_type_count = {\n",
    "    'INFP': 1832, 'INFJ': 1470, 'INTP': 1304, 'INTJ': 1091,\n",
    "    'ENTP': 685, 'ENFP': 675, 'ISTP': 337, 'ISFP': 271,\n",
    "    'ENTJ': 231, 'ISTJ': 205, 'ENFJ': 190, 'ISFJ': 166,\n",
    "    'ESTP': 89, 'ESFP': 48, 'ESFJ': 42, 'ESTJ': 39\n",
    "}\n",
    "initial_dimension_count = {'E': 1999, 'I': 6676, 'S': 1197, 'N': 7478,\n",
    "                           'T': 3981, 'F': 4694, 'J': 3434, 'P': 5241}\n",
    "\n",
    "# ================== 增强数据 ==================\n",
    "modified_type_count = {\n",
    "    'INFP': 2235, 'ISFP': 2188, 'ESFJ': 2177, 'ESTP': 2148,\n",
    "    'INTP': 2144, 'ENFJ': 2126, 'INFJ': 2120, 'ESTJ': 2120,\n",
    "    'ENFP': 2117, 'INTJ': 2115, 'ENTJ': 2105, 'ISFJ': 2103,\n",
    "    'ENTP': 2102, 'ISTP': 2102, 'ESFP': 2068, 'ISTJ': 2062\n",
    "}\n",
    "modified_dimension_count = {'E': 16963, 'I': 17069, 'S': 16968, 'N': 17064,\n",
    "                            'T': 16898, 'F': 17134, 'J': 16928, 'P': 17104}\n",
    "\n",
    "# ================== 总数 ==================\n",
    "initial_total = sum(initial_type_count.values())\n",
    "modified_total = sum(modified_type_count.values())\n",
    "\n",
    "# ================== Traits 表 ==================\n",
    "traits_data = [\n",
    "    [\"I/E\", f\"{initial_dimension_count['I']} ({initial_dimension_count['I']/initial_total:.1%})\",\n",
    "            f\"{modified_dimension_count['I']} ({modified_dimension_count['I']/modified_total:.1%})\"],\n",
    "    [\"\",    f\"{initial_dimension_count['E']} ({initial_dimension_count['E']/initial_total:.1%})\",\n",
    "            f\"{modified_dimension_count['E']} ({modified_dimension_count['E']/modified_total:.1%})\"],\n",
    "    [\"S/N\", f\"{initial_dimension_count['S']} ({initial_dimension_count['S']/initial_total:.1%})\",\n",
    "            f\"{modified_dimension_count['S']} ({modified_dimension_count['S']/modified_total:.1%})\"],\n",
    "    [\"\",    f\"{initial_dimension_count['N']} ({initial_dimension_count['N']/initial_total:.1%})\",\n",
    "            f\"{modified_dimension_count['N']} ({modified_dimension_count['N']/modified_total:.1%})\"],\n",
    "    [\"T/F\", f\"{initial_dimension_count['T']} ({initial_dimension_count['T']/initial_total:.1%})\",\n",
    "            f\"{modified_dimension_count['T']} ({modified_dimension_count['T']/modified_total:.1%})\"],\n",
    "    [\"\",    f\"{initial_dimension_count['F']} ({initial_dimension_count['F']/initial_total:.1%})\",\n",
    "            f\"{modified_dimension_count['F']} ({modified_dimension_count['F']/modified_total:.1%})\"],\n",
    "    [\"P/J\", f\"{initial_dimension_count['P']} ({initial_dimension_count['P']/initial_total:.1%})\",\n",
    "            f\"{modified_dimension_count['P']} ({modified_dimension_count['P']/modified_total:.1%})\"],\n",
    "    [\"\",    f\"{initial_dimension_count['J']} ({initial_dimension_count['J']/initial_total:.1%})\",\n",
    "            f\"{modified_dimension_count['J']} ({modified_dimension_count['J']/modified_total:.1%})\"]\n",
    "]\n",
    "traits_df = pd.DataFrame(traits_data, columns=[\"Traits\", \"Initial\", \"Modified\"])\n",
    "\n",
    "# ================== Types 表 ==================\n",
    "types_data = []\n",
    "for t in sorted(initial_type_count.keys()):\n",
    "    init_val = initial_type_count[t]\n",
    "    mod_val = modified_type_count[t]\n",
    "    types_data.append([\n",
    "        t,\n",
    "        f\"{init_val} ({init_val/initial_total:.1%})\",\n",
    "        f\"{mod_val} ({mod_val/modified_total:.1%})\"\n",
    "    ])\n",
    "types_df = pd.DataFrame(types_data, columns=[\"MBTI Type\", \"Initial\", \"Modified\"])\n",
    "\n",
    "# ================== 合并成一个大表 ==================\n",
    "# 上半部分 Traits，下半部分 Types\n",
    "final_table = pd.concat([\n",
    "    traits_df,\n",
    "    pd.DataFrame([[\"\", \"\", \"\"]]),   # 空行\n",
    "    types_df\n",
    "], ignore_index=True)\n",
    "\n",
    "# ================== 显示 ==================\n",
    "print(final_table.to_string(index=False))\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
