{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "7cabc15d-63a2-40c5-968b-19e1eb7707d7",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import re\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import random"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9b8a0cb8-2141-4210-9937-4bfdeb2670db",
   "metadata": {},
   "source": [
    "# Dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "5fc90e55-3cf8-4196-a45e-40619bb39df4",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "--2024-09-23 10:40:01--  https://raw.githubusercontent.com/flageval-baai/FlagEval/master/csem/SLPWC_v1.csv\n",
      "正在解析主机 raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.109.133, 185.199.111.133, ...\n",
      "正在连接 raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... 已连接。\n",
      "已发出 HTTP 请求，正在等待回应... 200 OK\n",
      "长度： 87711 (86K) [text/plain]\n",
      "正在保存至: “./Data/SLPWC_v1.csv”\n",
      "\n",
      "     0K .......... .......... .......... .......... .......... 58%  435K 0s\n",
      "    50K .......... .......... .......... .....                100% 1.72M=0.1s\n",
      "\n",
      "2024-09-23 10:40:02 (634 KB/s) - 已保存 “./Data/SLPWC_v1.csv” [87711/87711])\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "if not os.path.exists('Data'): os.makedirs('Data')\n",
    "os.system(\"wget -P ./Data https://raw.githubusercontent.com/flageval-baai/FlagEval/master/csem/SLPWC_v1.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "2bb0fd9a-3210-4ace-8b9f-24c504f2f153",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>问题</th>\n",
       "      <th>选项1</th>\n",
       "      <th>选项2</th>\n",
       "      <th>选项3</th>\n",
       "      <th>选项4</th>\n",
       "      <th>答案</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>以下哪句话中“配方”的意思(或用法)与其他句子不同。</td>\n",
       "      <td>通过配方，方程变成了完全平方式。</td>\n",
       "      <td>还没有把握是否能找到一个配方。</td>\n",
       "      <td>还是原来的配方，还是原来的味道，客人们都吃的非常的开心。</td>\n",
       "      <td>我们自己生产护发产品，都使用草药配方。</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>以下哪句话中“装饰”的意思(或用法)与其他句子不同。</td>\n",
       "      <td>装饰的华丽可以显示出一个人的富有</td>\n",
       "      <td>这位装修工用极其现代化的陈设装饰这家旅馆。</td>\n",
       "      <td>极具艺术气息的装饰画，如摩登女郎般跃然于空中，打破传统限制</td>\n",
       "      <td>商店的橱窗里陈列着许多漂亮的装饰品。</td>\n",
       "      <td>B</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>以下哪句话中“陆”的意思(或用法)与其他句子不同。</td>\n",
       "      <td>上课铃声响了，正在游戏的同学们陆续地回教室准备上课。</td>\n",
       "      <td>图上部的横线段表示陆地。</td>\n",
       "      <td>陆上侵蚀常夷平成准平原。</td>\n",
       "      <td>经济萧条正在大陆上蔓延。</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>以下哪句话中“黄牛”的意思(或用法)与其他句子不同。</td>\n",
       "      <td>我去年就被迫向黄牛党买票。</td>\n",
       "      <td>如果年检费涨价了，年检站肯定会为客户提供一站式的年检服务，让车主不用再找“黄牛”代办。</td>\n",
       "      <td>黄牛们坚持漫天讨价。</td>\n",
       "      <td>黄牛虽然没有荷兰牧场的黑白花牛漂亮，但是它却很勤劳，和家乡里的人一样。</td>\n",
       "      <td>D</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>以下哪句话中“骨干”的意思(或用法)与其他句子不同。</td>\n",
       "      <td>哥哥参加工作不久，就成了研究所的业务骨干。</td>\n",
       "      <td>加强对国有骨干企业领导人员的管理</td>\n",
       "      <td>将每棵树作为一个树冠很矮的骨干枝来处理</td>\n",
       "      <td>我们应该把这些教学骨干大胆地提拔起来。</td>\n",
       "      <td>C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>295</th>\n",
       "      <td>以下哪句话中“点子”的意思(或用法)与其他句子不同。</td>\n",
       "      <td>我们需要一些新的点子来创新我们的产品</td>\n",
       "      <td>在头脑风暴会议中，大家纷纷贡献自己的点子。</td>\n",
       "      <td>我的衣服上溅上了几滴油点子</td>\n",
       "      <td>他总是能够迅速提出独特的点子解决问题</td>\n",
       "      <td>C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>296</th>\n",
       "      <td>以下哪句话中“相机”的意思(或用法)与其他句子不同。</td>\n",
       "      <td>同时，军区大院南门外的胜利路两端也各放了一个连，准备相机行事。</td>\n",
       "      <td>可是我等了你两天你也没出来，我刚把相机放回去你就出来了！</td>\n",
       "      <td>照相要取好景物的范围，选好照相的位置及照相机的光圈和拍摄的速度等。</td>\n",
       "      <td>目前使用扫描仪、数码摄像机和数码照相机作为图片和影像输入的也越来越多。</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>297</th>\n",
       "      <td>以下哪句话中“划分”的意思(或用法)与其他句子不同。</td>\n",
       "      <td>在划分文章段落层次时，我对承上启下的语句的位置总是划不到位。</td>\n",
       "      <td>划分人民内部矛盾和敌我矛盾。</td>\n",
       "      <td>双方激烈地辩论着如何划分词类范围的问题。</td>\n",
       "      <td>老师给各个小组都划分好了卫生区</td>\n",
       "      <td>B</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>298</th>\n",
       "      <td>以下哪句话中“红领巾”的意思(或用法)与其他句子不同。</td>\n",
       "      <td>鲜艳的红领巾在少先队员的胸前飘动着。</td>\n",
       "      <td>红领巾代表着少年先锋队员。</td>\n",
       "      <td>回想我刚戴上红领巾的时候，多么自豪！</td>\n",
       "      <td>升国旗的时候，少先队员要佩带红领巾。</td>\n",
       "      <td>B</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>299</th>\n",
       "      <td>以下哪句话中“贯通”的意思(或用法)与其他句子不同。</td>\n",
       "      <td>一篇好的文章应该前后贯通，条理分明。</td>\n",
       "      <td>洞中泉流跟冰壶、双龙上下相贯通。</td>\n",
       "      <td>他把学习和实践融会贯通了。</td>\n",
       "      <td>在学习中要学会融会贯通，不要只是生吞活剥一些知识。</td>\n",
       "      <td>B</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>300 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                              问题                              选项1  \\\n",
       "0     以下哪句话中“配方”的意思(或用法)与其他句子不同。                 通过配方，方程变成了完全平方式。   \n",
       "1     以下哪句话中“装饰”的意思(或用法)与其他句子不同。                 装饰的华丽可以显示出一个人的富有   \n",
       "2      以下哪句话中“陆”的意思(或用法)与其他句子不同。       上课铃声响了，正在游戏的同学们陆续地回教室准备上课。   \n",
       "3     以下哪句话中“黄牛”的意思(或用法)与其他句子不同。                    我去年就被迫向黄牛党买票。   \n",
       "4     以下哪句话中“骨干”的意思(或用法)与其他句子不同。            哥哥参加工作不久，就成了研究所的业务骨干。   \n",
       "..                           ...                              ...   \n",
       "295   以下哪句话中“点子”的意思(或用法)与其他句子不同。               我们需要一些新的点子来创新我们的产品   \n",
       "296   以下哪句话中“相机”的意思(或用法)与其他句子不同。  同时，军区大院南门外的胜利路两端也各放了一个连，准备相机行事。   \n",
       "297   以下哪句话中“划分”的意思(或用法)与其他句子不同。   在划分文章段落层次时，我对承上启下的语句的位置总是划不到位。   \n",
       "298  以下哪句话中“红领巾”的意思(或用法)与其他句子不同。               鲜艳的红领巾在少先队员的胸前飘动着。   \n",
       "299   以下哪句话中“贯通”的意思(或用法)与其他句子不同。               一篇好的文章应该前后贯通，条理分明。   \n",
       "\n",
       "                                             选项2  \\\n",
       "0                                还没有把握是否能找到一个配方。   \n",
       "1                          这位装修工用极其现代化的陈设装饰这家旅馆。   \n",
       "2                                   图上部的横线段表示陆地。   \n",
       "3    如果年检费涨价了，年检站肯定会为客户提供一站式的年检服务，让车主不用再找“黄牛”代办。   \n",
       "4                               加强对国有骨干企业领导人员的管理   \n",
       "..                                           ...   \n",
       "295                        在头脑风暴会议中，大家纷纷贡献自己的点子。   \n",
       "296                 可是我等了你两天你也没出来，我刚把相机放回去你就出来了！   \n",
       "297                               划分人民内部矛盾和敌我矛盾。   \n",
       "298                                红领巾代表着少年先锋队员。   \n",
       "299                             洞中泉流跟冰壶、双龙上下相贯通。   \n",
       "\n",
       "                                   选项3                                  选项4 答案  \n",
       "0         还是原来的配方，还是原来的味道，客人们都吃的非常的开心。                  我们自己生产护发产品，都使用草药配方。  A  \n",
       "1        极具艺术气息的装饰画，如摩登女郎般跃然于空中，打破传统限制                   商店的橱窗里陈列着许多漂亮的装饰品。  B  \n",
       "2                         陆上侵蚀常夷平成准平原。                         经济萧条正在大陆上蔓延。  A  \n",
       "3                           黄牛们坚持漫天讨价。  黄牛虽然没有荷兰牧场的黑白花牛漂亮，但是它却很勤劳，和家乡里的人一样。  D  \n",
       "4                  将每棵树作为一个树冠很矮的骨干枝来处理                  我们应该把这些教学骨干大胆地提拔起来。  C  \n",
       "..                                 ...                                  ... ..  \n",
       "295                      我的衣服上溅上了几滴油点子                   他总是能够迅速提出独特的点子解决问题  C  \n",
       "296  照相要取好景物的范围，选好照相的位置及照相机的光圈和拍摄的速度等。  目前使用扫描仪、数码摄像机和数码照相机作为图片和影像输入的也越来越多。  A  \n",
       "297               双方激烈地辩论着如何划分词类范围的问题。                      老师给各个小组都划分好了卫生区  B  \n",
       "298                 回想我刚戴上红领巾的时候，多么自豪！                   升国旗的时候，少先队员要佩带红领巾。  B  \n",
       "299                      他把学习和实践融会贯通了。            在学习中要学会融会贯通，不要只是生吞活剥一些知识。  B  \n",
       "\n",
       "[300 rows x 6 columns]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_csv(\"Data/SLPWC_v1.csv\")\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "7108bde1-4909-4d41-bf90-36bb3e4deaa2",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "--2024-09-23 10:46:42--  https://raw.githubusercontent.com/SUDA-LA/Modern-Chinese-Word-Sense-Annotated-dataset/main/wsd_single_sense_sentence_final.json\n",
      "正在解析主机 raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.110.133, 185.199.109.133, ...\n",
      "正在连接 raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... 已连接。\n",
      "已发出 HTTP 请求，正在等待回应... 200 OK\n",
      "长度： 3456317 (3.3M) [application/octet-stream]\n",
      "正在保存至: “./Data/wsd_single_sense_sentence_final.json”\n",
      "\n",
      "     0K .......... .......... .......... .......... ..........  1%  694K 5s\n",
      "    50K .......... .......... .......... .......... ..........  2%  951K 4s\n",
      "   100K .......... .......... .......... .......... ..........  4% 3.35M 3s\n",
      "   150K .......... .......... .......... .......... ..........  5% 4.38M 2s\n",
      "   200K .......... .......... .......... .......... ..........  7% 1.45M 2s\n",
      "   250K .......... .......... .......... .......... ..........  8% 5.40M 2s\n",
      "   300K .......... .......... .......... .......... .......... 10% 5.58M 2s\n",
      "   350K .......... .......... .......... .......... .......... 11% 7.43M 2s\n",
      "   400K .......... .......... .......... .......... .......... 13% 7.91M 1s\n",
      "   450K .......... .......... .......... .......... .......... 14% 9.04M 1s\n",
      "   500K .......... .......... .......... .......... .......... 16% 1.79M 1s\n",
      "   550K .......... .......... .......... .......... .......... 17% 10.4M 1s\n",
      "   600K .......... .......... .......... .......... .......... 19% 8.40M 1s\n",
      "   650K .......... .......... .......... .......... .......... 20% 12.1M 1s\n",
      "   700K .......... .......... .......... .......... .......... 22% 12.0M 1s\n",
      "   750K .......... .......... .......... .......... .......... 23% 12.0M 1s\n",
      "   800K .......... .......... .......... .......... .......... 25% 11.9M 1s\n",
      "   850K .......... .......... .......... .......... .......... 26% 12.1M 1s\n",
      "   900K .......... .......... .......... .......... .......... 28% 12.1M 1s\n",
      "   950K .......... .......... .......... .......... .......... 29%  734K 1s\n",
      "  1000K .......... .......... .......... .......... .......... 31%  750K 1s\n",
      "  1050K .......... .......... .......... .......... .......... 32% 79.8M 1s\n",
      "  1100K .......... .......... .......... .......... .......... 34% 83.2M 1s\n",
      "  1150K .......... .......... .......... .......... .......... 35% 89.9M 1s\n",
      "  1200K .......... .......... .......... .......... .......... 37% 78.2M 1s\n",
      "  1250K .......... .......... .......... .......... .......... 38% 88.5M 1s\n",
      "  1300K .......... .......... .......... .......... .......... 39% 89.8M 1s\n",
      "  1350K .......... .......... .......... .......... .......... 41% 88.8M 1s\n",
      "  1400K .......... .......... .......... .......... .......... 42% 76.2M 1s\n",
      "  1450K .......... .......... .......... .......... .......... 44% 87.6M 1s\n",
      "  1500K .......... .......... .......... .......... .......... 45% 89.0M 0s\n",
      "  1550K .......... .......... .......... .......... .......... 47% 1.21M 1s\n",
      "  1600K .......... .......... .......... .......... .......... 48% 69.1M 0s\n",
      "  1650K .......... .......... .......... .......... .......... 50% 13.5M 0s\n",
      "  1700K .......... .......... .......... .......... .......... 51% 29.8M 0s\n",
      "  1750K .......... .......... .......... .......... .......... 53% 40.2M 0s\n",
      "  1800K .......... .......... .......... .......... .......... 54% 27.8M 0s\n",
      "  1850K .......... .......... .......... .......... .......... 56% 17.2M 0s\n",
      "  1900K .......... .......... .......... .......... .......... 57% 34.9M 0s\n",
      "  1950K .......... .......... .......... .......... .......... 59%  130M 0s\n",
      "  2000K .......... .......... .......... .......... .......... 60%  116M 0s\n",
      "  2050K .......... .......... .......... .......... .......... 62%  132M 0s\n",
      "  2100K .......... .......... .......... .......... .......... 63%  126M 0s\n",
      "  2150K .......... .......... .......... .......... .......... 65%  128M 0s\n",
      "  2200K .......... .......... .......... .......... .......... 66%  112M 0s\n",
      "  2250K .......... .......... .......... .......... .......... 68%  126M 0s\n",
      "  2300K .......... .......... .......... .......... .......... 69%  131M 0s\n",
      "  2350K .......... .......... .......... .......... .......... 71%  134M 0s\n",
      "  2400K .......... .......... .......... .......... .......... 72%  113M 0s\n",
      "  2450K .......... .......... .......... .......... .......... 74%  132M 0s\n",
      "  2500K .......... .......... .......... .......... .......... 75% 3.43M 0s\n",
      "  2550K .......... .......... .......... .......... .......... 77% 6.75M 0s\n",
      "  2600K .......... .......... .......... .......... .......... 78% 9.02M 0s\n",
      "  2650K .......... .......... .......... .......... .......... 79% 12.0M 0s\n",
      "  2700K .......... .......... .......... .......... .......... 81% 9.32M 0s\n",
      "  2750K .......... .......... .......... .......... .......... 82% 7.54M 0s\n",
      "  2800K .......... .......... .......... .......... .......... 84% 11.9M 0s\n",
      "  2850K .......... .......... .......... .......... .......... 85% 11.0M 0s\n",
      "  2900K .......... .......... .......... .......... .......... 87% 12.0M 0s\n",
      "  2950K .......... .......... .......... .......... .......... 88% 12.1M 0s\n",
      "  3000K .......... .......... .......... .......... .......... 90% 8.98M 0s\n",
      "  3050K .......... .......... .......... .......... .......... 91%  782K 0s\n",
      "  3100K .......... .......... .......... .......... .......... 93% 11.7M 0s\n",
      "  3150K .......... .......... .......... .......... .......... 94% 13.8M 0s\n",
      "  3200K .......... .......... .......... .......... .......... 96% 16.0M 0s\n",
      "  3250K .......... .......... .......... .......... .......... 97% 17.0M 0s\n",
      "  3300K .......... .......... .......... .......... .......... 99% 95.5M 0s\n",
      "  3350K .......... .......... .....                           100% 81.3M=0.6s\n",
      "\n",
      "2024-09-23 10:46:44 (5.29 MB/s) - 已保存 “./Data/wsd_single_sense_sentence_final.json” [3456317/3456317])\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "os.system('wget -P ./Data https://raw.githubusercontent.com/SUDA-LA/Modern-Chinese-Word-Sense-Annotated-dataset/main/wsd_single_sense_sentence_final.json')\n",
    "# 注意：通过命令行下载的文件在后续代码会报错，可能是因为文件编码问题\n",
    "# 解决办法：手动下载该文件，然后放在 Data 文件夹下\n",
    "# https://github.com/SUDA-LA/Modern-Chinese-Word-Sense-Annotated-dataset/blob/main/wsd_single_sense_sentence_final.json"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "565714e4-ceb1-4772-a6f9-b17064d040c4",
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "\n",
    "file_name = 'Data/wsd_single_sense_sentence_final.json'\n",
    "\n",
    "with open(file_name, 'r') as file:\n",
    "    data = json.load(file)\n",
    "\n",
    "for word in list(data.keys()):\n",
    "    for meaning in list(data[word].keys()):\n",
    "        if len(data[word][meaning]) == 0: # 如果这个意思没有对应的句子，删除这个意思\n",
    "            # print(word,meaning,'is none')\n",
    "            data[word].pop(meaning)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "5a24b110-e063-4617-b06c-70b18dafe1f3",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "def sample_one(data,word,method):\n",
    "    dw = data[word]\n",
    "    senses = list(dw.keys())\n",
    "    nums = [len(dw[senses[i]]) for i in range(len(senses))]\n",
    "\n",
    "    if len(nums) < 2 or max(nums) < 3: return [] # 至少要有两个意项，并且其中之一至少包含三个句子\n",
    "\n",
    "    # 下面挑选意项和句子\n",
    "\n",
    "    if method == 'v1': # v1：确定型选取\n",
    "    \n",
    "        ids = np.argsort(nums)\n",
    "        sentences = dw[senses[ids[-1]]][0:3] + dw[senses[ids[0]]][0:1]\n",
    "\n",
    "    # 答案所在位置随机打乱\n",
    "    cid = [0,1,2,3]\n",
    "    random.shuffle(cid) # 随机打乱句子顺序\n",
    "    sample = {'问题': \"以下哪句话中“%s”的意思(或用法)与其他句子不同。\" % word,\n",
    "              '选项1': sentences[cid[0]], #dw[senses[ids[-1]]][0],\n",
    "              '选项2': sentences[cid[1]], #dw[senses[ids[-1]]][1],\n",
    "              '选项3': sentences[cid[2]], #dw[senses[ids[-1]]][2],\n",
    "              '选项4': sentences[cid[3]], #dw[senses[ids[0]]][0],\n",
    "              '答案': chr(ord('A')+np.argmax(cid)) #'D'\n",
    "             }\n",
    "    return sample\n",
    "\n",
    "def construct_dataset(data,method='v1'):\n",
    "    samples = [sample_one(data,word,method) for word in data.keys()]\n",
    "    dataset_dict = {item:[] for item in samples[0].keys()}\n",
    "    for sample in samples:\n",
    "        if sample == []:continue\n",
    "        for item in list(dataset_dict.keys()):\n",
    "            dataset_dict[item].append(sample[item])\n",
    "    df = pd.DataFrame.from_dict(dataset_dict,orient='index').T\n",
    "    return df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "3b5bfd25-02f0-4a6c-abb9-b63a3e22f0b9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>问题</th>\n",
       "      <th>选项1</th>\n",
       "      <th>选项2</th>\n",
       "      <th>选项3</th>\n",
       "      <th>选项4</th>\n",
       "      <th>答案</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>以下哪句话中“不论”的意思(或用法)与其他句子不同。</td>\n",
       "      <td>我从来没有埋怨人民，不论是现在或任何时刻。叶</td>\n",
       "      <td>天空收容每一片云彩，不论其美丑，所以天空宽阔无边。大地拥抱每一寸土地，不论其贫富，所以大地广...</td>\n",
       "      <td>幸福的斗争不论它是如何的艰难，它并不是一种痛苦，而是快乐，而只是喜剧。车尔尼雪夫斯</td>\n",
       "      <td>存而不论</td>\n",
       "      <td>D</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>以下哪句话中“不断”的意思(或用法)与其他句子不同。</td>\n",
       "      <td>人生就像战场，不是你死便是我活。社会不断发展，跟不上节奏的都将被淘汰，只有不断地汲取教训，不...</td>\n",
       "      <td>脚步怎样才能不断前时？把脚印留在身后</td>\n",
       "      <td>她的声音异常柔和，像软软的扯不断的丝，唱到悲壮的地方，她的声音又是十分凄厉，像深夜里战场上的号角</td>\n",
       "      <td>所谓高质量人生，其实就是平衡不断遭到破坏和重建。赵鑫</td>\n",
       "      <td>C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>以下哪句话中“不错”的意思(或用法)与其他句子不同。</td>\n",
       "      <td>那剑舞得果然不错，剑过处，习习生风，吹动丁香树上一片片白花瓣飘落下来</td>\n",
       "      <td>天天通知自己一次，我真的很不错</td>\n",
       "      <td>抽象概念作为一种见解倒是不错的，但应用到人们身上，就不那么行得通了。泰戈</td>\n",
       "      <td>事业并不错，错的只是它的低劣的实行者。屠格涅</td>\n",
       "      <td>D</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>以下哪句话中“布局”的意思(或用法)与其他句子不同。</td>\n",
       "      <td>这组高山建筑，布局周密，结构严谨，铜铁铸造，玲珑精巧，造价极高</td>\n",
       "      <td>故宫的建筑富丽堂皇，气魄雄伟。布局严整，达到我国建筑史上的艺术高峰</td>\n",
       "      <td>他把文章的布局稍稍改动了一些</td>\n",
       "      <td>整个建筑宏伟，布局严谨。楼亭仓舍，左右对称，贴金彩画，装饰细腻</td>\n",
       "      <td>C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>以下哪句话中“起步”的意思(或用法)与其他句子不同。</td>\n",
       "      <td>奋斗者的幸福是从痛苦起步的，享乐者的痛苦是从“幸福”开始的</td>\n",
       "      <td>改变，从现在开始；成功，由今天起步</td>\n",
       "      <td>黄河从冰川万丈的巴颜喀拉山北麓起步，一路上接纳着千溪百川，浩浩荡荡，曲波折折，奔向巨浪滔天的...</td>\n",
       "      <td>虽然起步迟，只要不畏挫折，坚持到底，照样能超越他人</td>\n",
       "      <td>C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>857</th>\n",
       "      <td>以下哪句话中“追求”的意思(或用法)与其他句子不同。</td>\n",
       "      <td>他狂热地追求这位姑娘</td>\n",
       "      <td>追求真理</td>\n",
       "      <td>他一生都在追求着光明</td>\n",
       "      <td>追求名利地位</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>858</th>\n",
       "      <td>以下哪句话中“设施”的意思(或用法)与其他句子不同。</td>\n",
       "      <td>孔明如此设施，其中有计</td>\n",
       "      <td>设施完善</td>\n",
       "      <td>工程设施</td>\n",
       "      <td>军事设施</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>859</th>\n",
       "      <td>以下哪句话中“特征”的意思(或用法)与其他句子不同。</td>\n",
       "      <td>宜蒙特征</td>\n",
       "      <td>性格特征</td>\n",
       "      <td>显著特征</td>\n",
       "      <td>主要特征</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>860</th>\n",
       "      <td>以下哪句话中“掌握”的意思(或用法)与其他句子不同。</td>\n",
       "      <td>掌握主动</td>\n",
       "      <td>掌握规律</td>\n",
       "      <td>掌握知识</td>\n",
       "      <td>掌握技术</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>861</th>\n",
       "      <td>以下哪句话中“高层”的意思(或用法)与其他句子不同。</td>\n",
       "      <td>高层岗位</td>\n",
       "      <td>高层住宅</td>\n",
       "      <td>高层领导</td>\n",
       "      <td>高层人物</td>\n",
       "      <td>B</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>862 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                             问题  \\\n",
       "0    以下哪句话中“不论”的意思(或用法)与其他句子不同。   \n",
       "1    以下哪句话中“不断”的意思(或用法)与其他句子不同。   \n",
       "2    以下哪句话中“不错”的意思(或用法)与其他句子不同。   \n",
       "3    以下哪句话中“布局”的意思(或用法)与其他句子不同。   \n",
       "4    以下哪句话中“起步”的意思(或用法)与其他句子不同。   \n",
       "..                          ...   \n",
       "857  以下哪句话中“追求”的意思(或用法)与其他句子不同。   \n",
       "858  以下哪句话中“设施”的意思(或用法)与其他句子不同。   \n",
       "859  以下哪句话中“特征”的意思(或用法)与其他句子不同。   \n",
       "860  以下哪句话中“掌握”的意思(或用法)与其他句子不同。   \n",
       "861  以下哪句话中“高层”的意思(或用法)与其他句子不同。   \n",
       "\n",
       "                                                   选项1  \\\n",
       "0                               我从来没有埋怨人民，不论是现在或任何时刻。叶   \n",
       "1    人生就像战场，不是你死便是我活。社会不断发展，跟不上节奏的都将被淘汰，只有不断地汲取教训，不...   \n",
       "2                   那剑舞得果然不错，剑过处，习习生风，吹动丁香树上一片片白花瓣飘落下来   \n",
       "3                      这组高山建筑，布局周密，结构严谨，铜铁铸造，玲珑精巧，造价极高   \n",
       "4                        奋斗者的幸福是从痛苦起步的，享乐者的痛苦是从“幸福”开始的   \n",
       "..                                                 ...   \n",
       "857                                         他狂热地追求这位姑娘   \n",
       "858                                        孔明如此设施，其中有计   \n",
       "859                                               宜蒙特征   \n",
       "860                                               掌握主动   \n",
       "861                                               高层岗位   \n",
       "\n",
       "                                                   选项2  \\\n",
       "0    天空收容每一片云彩，不论其美丑，所以天空宽阔无边。大地拥抱每一寸土地，不论其贫富，所以大地广...   \n",
       "1                                   脚步怎样才能不断前时？把脚印留在身后   \n",
       "2                                      天天通知自己一次，我真的很不错   \n",
       "3                    故宫的建筑富丽堂皇，气魄雄伟。布局严整，达到我国建筑史上的艺术高峰   \n",
       "4                                    改变，从现在开始；成功，由今天起步   \n",
       "..                                                 ...   \n",
       "857                                               追求真理   \n",
       "858                                               设施完善   \n",
       "859                                               性格特征   \n",
       "860                                               掌握规律   \n",
       "861                                               高层住宅   \n",
       "\n",
       "                                                   选项3  \\\n",
       "0            幸福的斗争不论它是如何的艰难，它并不是一种痛苦，而是快乐，而只是喜剧。车尔尼雪夫斯   \n",
       "1     她的声音异常柔和，像软软的扯不断的丝，唱到悲壮的地方，她的声音又是十分凄厉，像深夜里战场上的号角   \n",
       "2                 抽象概念作为一种见解倒是不错的，但应用到人们身上，就不那么行得通了。泰戈   \n",
       "3                                       他把文章的布局稍稍改动了一些   \n",
       "4    黄河从冰川万丈的巴颜喀拉山北麓起步，一路上接纳着千溪百川，浩浩荡荡，曲波折折，奔向巨浪滔天的...   \n",
       "..                                                 ...   \n",
       "857                                         他一生都在追求着光明   \n",
       "858                                               工程设施   \n",
       "859                                               显著特征   \n",
       "860                                               掌握知识   \n",
       "861                                               高层领导   \n",
       "\n",
       "                                 选项4 答案  \n",
       "0                               存而不论  D  \n",
       "1         所谓高质量人生，其实就是平衡不断遭到破坏和重建。赵鑫  C  \n",
       "2             事业并不错，错的只是它的低劣的实行者。屠格涅  D  \n",
       "3    整个建筑宏伟，布局严谨。楼亭仓舍，左右对称，贴金彩画，装饰细腻  C  \n",
       "4          虽然起步迟，只要不畏挫折，坚持到底，照样能超越他人  C  \n",
       "..                               ... ..  \n",
       "857                           追求名利地位  A  \n",
       "858                             军事设施  A  \n",
       "859                             主要特征  A  \n",
       "860                             掌握技术  A  \n",
       "861                             高层人物  B  \n",
       "\n",
       "[862 rows x 6 columns]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "random.seed(0)\n",
    "df =  construct_dataset(data)\n",
    "df.to_csv('Data/WSD_v1.csv')\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "97955c4c-aacc-4150-93d1-3fee3df0cf7b",
   "metadata": {},
   "outputs": [],
   "source": [
    "random.seed(0)\n",
    "df =  construct_dataset(data)\n",
    "# df.to_csv('Data/WSD_v1.csv')\n",
    "df"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
