{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import json"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "fully_known.json has 34426 questions\n",
      "highly_known.json has 36897 questions\n",
      "weakly_known.json has 25986 questions\n",
      "unknown.json has 71855 questions\n"
     ]
    }
   ],
   "source": [
    "file_list=['fully_known.json','highly_known.json','weakly_known.json','unknown.json']\n",
    "base=''\n",
    "for file in file_list:\n",
    "    with open(os.path.join(base,file),'r') as f:\n",
    "        data=json.load(f)\n",
    "        print(f\"{file} has {len(data)} questions\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "fully_known.json has 30026 questions\n",
      "highly_known.json has 41646 questions\n",
      "weakly_known.json has 19987 questions\n",
      "unknown.json has 77505 questions\n"
     ]
    }
   ],
   "source": [
    "base=''\n",
    "file_list=['fully_known.json','highly_known.json','weakly_known.json','unknown.json']\n",
    "for file in file_list:\n",
    "    with open(os.path.join(base,file),'r') as f:\n",
    "        data=json.load(f)\n",
    "        print(f\"{file} has {len(data)} questions\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "fully_known.json has 34739 questions\n",
      "highly_known.json has 42400 questions\n",
      "weakly_known.json has 24701 questions\n",
      "unknown.json has 67324 questions\n"
     ]
    }
   ],
   "source": [
    "base=''\n",
    "file_list=['fully_known.json','highly_known.json','weakly_known.json','unknown.json']\n",
    "for file in file_list:\n",
    "    with open(os.path.join(base,file),'r') as f:\n",
    "        data=json.load(f)\n",
    "        print(f\"{file} has {len(data)} questions\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "recheck_fully_known_epoch_8.json has 34426 questions, fully_known:27282, highly_known:3952, other:3192\n",
      "recheck_highly_known_epoch_8.json has 36897 questions, fully_known:19059, highly_known:9892, other:7946\n",
      "recheck_weakly_known_epoch_8.json has 25986 questions, fully_known:3091, highly_known:4889, other:18006\n",
      "recheck_unknown_epoch_8.json has 71855 questions, fully_known:527, highly_known:1932, other:69396\n"
     ]
    }
   ],
   "source": [
    "base=''\n",
    "file_list=['recheck_fully_known_epoch_8.json','recheck_highly_known_epoch_8.json','recheck_weakly_known_epoch_8.json','recheck_unknown_epoch_8.json']\n",
    "\n",
    "for file in file_list:\n",
    "    with open(os.path.join(base,file),'r') as f:\n",
    "        data=json.load(f)\n",
    "        fully_num=0\n",
    "        highly_num=0\n",
    "        other_num=0\n",
    "        for data_point in data:\n",
    "            if data_point['P_without_sample']==1:\n",
    "                fully_num+=1\n",
    "            elif data_point['P_without_sample']>0:\n",
    "                highly_num+=1\n",
    "            else:\n",
    "                other_num+=1\n",
    "        print(f\"{file} has {len(data)} questions, fully_known:{fully_num}, highly_known:{highly_num}, other:{other_num}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "fully_known: 49959, fully_known->fully_known:47335, fully_known->highly_known:2527, fully_known->other:97\n",
      "highly_known: 20665, highly_known->fully_known:6131, highly_known->highly_known:13161, highly_known->other:1373\n",
      "other: 98540, other->fully_known:225, other->highly_known:2600, other->other:95715\n"
     ]
    }
   ],
   "source": [
    "base=''\n",
    "file_list=['recheck_fully_known_epoch_8.json','recheck_highly_known_epoch_8.json','recheck_weakly_known_epoch_8.json','recheck_unknown_epoch_8.json']\n",
    "file_list_2=['recheck_fully_known_epoch_8_policy_6_round_1.json','recheck_highly_known_epoch_8_policy_6_round_1.json','recheck_weakly_known_epoch_8_policy_6_round_1.json','recheck_unknown_epoch_8_policy_6_round_1.json']\n",
    "\n",
    "list=[]\n",
    "for file in file_list:\n",
    "    with open(os.path.join(base,file),'r') as f:\n",
    "        data=json.load(f)\n",
    "        for data_point in data:\n",
    "            tmp=dict()\n",
    "            if data_point['P_without_sample']==1:\n",
    "                tmp['origin']='fully_known'\n",
    "            elif data_point['P_without_sample']>0:\n",
    "                tmp['origin']='highly_known'\n",
    "            else:\n",
    "                tmp['origin']='other'\n",
    "            list.append(tmp)\n",
    "\n",
    "num=0\n",
    "for file in file_list_2:\n",
    "    with open(os.path.join(base,file),'r') as f:\n",
    "        data=json.load(f)\n",
    "        for data_point in data:\n",
    "            if data_point['P_without_sample']==1:\n",
    "                list[num]['changed']='fully_known'\n",
    "            elif data_point['P_without_sample']>0:\n",
    "                list[num]['changed']='highly_known'\n",
    "            else:\n",
    "                list[num]['changed']='other'\n",
    "            num+=1\n",
    "fully=[0,0,0]\n",
    "highly=[0,0,0]\n",
    "other=[0,0,0]\n",
    "for item in list:\n",
    "    if item['origin']=='fully_known':\n",
    "        if item['changed']=='fully_known':\n",
    "            fully[0]+=1\n",
    "        elif item['changed']=='highly_known':\n",
    "            fully[1]+=1\n",
    "        else:\n",
    "            fully[2]+=1\n",
    "    elif item['origin']=='highly_known':\n",
    "        if item['changed']=='fully_known':\n",
    "            highly[0]+=1\n",
    "        elif item['changed']=='highly_known':\n",
    "            highly[1]+=1\n",
    "        else:\n",
    "            highly[2]+=1\n",
    "    else:\n",
    "        if item['changed']=='fully_known':\n",
    "            other[0]+=1\n",
    "        elif item['changed']=='highly_known':\n",
    "            other[1]+=1\n",
    "        else:\n",
    "            other[2]+=1\n",
    "fully_num=fully[0]+fully[1]+fully[2]\n",
    "highly_num=highly[0]+highly[1]+highly[2]\n",
    "other_num=other[0]+other[1]+other[2]\n",
    "print(f\"fully_known: {fully_num}, fully_known->fully_known:{fully[0]}, fully_known->highly_known:{fully[1]}, fully_known->other:{fully[2]}\")\n",
    "print(f\"highly_known: {highly_num}, highly_known->fully_known:{highly[0]}, highly_known->highly_known:{highly[1]}, highly_known->other:{highly[2]}\")\n",
    "print(f\"other: {other_num}, other->fully_known:{other[0]}, other->highly_known:{other[1]}, other->other:{other[2]}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "base=''\n",
    "base_1=''\n",
    "file_list=os.listdir(base)\n",
    "\n",
    "fully=[0,0,0]\n",
    "highly=[0,0,0]\n",
    "other=[0,0,0]\n",
    "for file in file_list:\n",
    "    with open(os.path.join(base,file),'r') as f:\n",
    "        data=json.load(f)\n",
    "    with open(os.path.join(base_1,file),'r') as f:\n",
    "        data_1=json.load(f)\n",
    "    length=data.length\n",
    "    for i in range(length):\n",
    "        if data[i]['P_without_sample']==1:\n",
    "            if data_1[i]['P_without_sample']==1:\n",
    "                fully[0]+=1\n",
    "            elif data_1[i]['P_without_sample']>0:\n",
    "                fully[1]+=1\n",
    "            else:\n",
    "                fully[2]+=1\n",
    "        elif data[i]['P_without_sample']>0:\n",
    "            if data_1[i]['P_without_sample']==1:\n",
    "                highly[0]+=1\n",
    "            elif data_1[i]['P_without_sample']>0:\n",
    "                highly[1]+=1\n",
    "            else:\n",
    "                highly[2]+=1\n",
    "        else:\n",
    "            if data_1[i]['P_without_sample']==1:\n",
    "                other[0]+=1\n",
    "            elif data_1[i]['P_without_sample']>0:\n",
    "                other[1]+=1\n",
    "            else:\n",
    "                other[2]+=1\n",
    "\n",
    "print(f\"fully_known: fully_known->fully_known:{fully[0]}, fully_known->highly_known:{fully[1]}, fully_known->other:{fully[2]}\")\n",
    "print(f\"highly_known: highly_known->fully_known:{highly[0]}, highly_known->highly_known:{highly[1]}, highly_known->other:{highly[2]}\")\n",
    "print(f\"other: other->fully_known:{other[0]}, other->highly_known:{other[1]}, other->other:{other[2]}\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "recheck_fully_known.json has 30026 questions, fully_known:25099, highly_known:4165, other:762\n",
      "recheck_highly_known.json has 41646 questions, fully_known:18429, highly_known:17312, other:5905\n",
      "recheck_weakly_known.json has 19987 questions, fully_known:1401, highly_known:7032, other:11554\n",
      "recheck_unknown.json has 77505 questions, fully_known:469, highly_known:5559, other:71477\n"
     ]
    }
   ],
   "source": [
    "base=''\n",
    "file_list=['recheck_fully_known.json','recheck_highly_known.json','recheck_weakly_known.json','recheck_unknown.json']\n",
    "\n",
    "for file in file_list:\n",
    "    with open(os.path.join(base,file),'r') as f:\n",
    "        data=json.load(f)\n",
    "        fully_num=0\n",
    "        highly_num=0\n",
    "        other_num=0\n",
    "        for data_point in data:\n",
    "            if data_point['P_without_sample']==1:\n",
    "                fully_num+=1\n",
    "            elif data_point['P_without_sample']>0:\n",
    "                highly_num+=1\n",
    "            else:\n",
    "                other_num+=1\n",
    "        print(f\"{file} has {len(data)} questions, fully_known:{fully_num}, highly_known:{highly_num}, other:{other_num}\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "qwen-sft",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
