{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# exec_rst"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "import pickle"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['part-00019-de86e216-cb17-4c7d-93dd-b5a080dea8e1-c000.gzdual_exec_result.pkl']"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import os\n",
    "os.listdir(\".../codepo/..._with_time/save/cache/\")[:10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(\".../codepo/..._with_time/save/cache/part-00019-de86e216-cb17-4c7d-93dd-b5a080dea8e1-c000.gzdual_exec_result.pkl\",'rb') as f:\n",
    "    exec_rst = pickle.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "task_id\n",
      "120190\n",
      "test_cases\n",
      "['assert wrapped_print(\"Hello, World!\") == expected_output', 'assert  wrapped_print(\"Hello world\") is None', 'assert  wrapped_print(\"Hello World!\") == None', 'assert  wrapped_print(\"Hello\") == None', 'assert wrapped_print(\"It\\'s alive!\") is None\\nnow = datetime.now()\\nformatted_now = now.strftime(\"%H:%M:%S\")\\nexpected_output = f\"[{formatted_now}] Hello, World!\\\\n\"', 'assert wrapped_print(\"Hello World\") == None', 'assert wrapped_print(\"Hello again\") is None', 'assert wrapped_print(\"Hello, World\") == None', 'assert wrapped_print(\"Test\") == None\\ncurrent_time = datetime.now().strftime(\"%H:%M:%S\")', 'assert wrapped_print(\"World\") == None\\nnow = datetime.now().strftime(\"%H:%M:%S\")', 'assert  wrapped_print(\"Hello world\") == None', 'assert  wrapped_print(\"Hello, World!\") is None', 'assert wrapped_print(\"World\") == None', 'assert wrapped_print(\"Hello\") == None', 'assert wrapped_print(\"World\") == None\\n\\nnow = datetime.now()\\ntime = now.strftime(\"%H:%M:%S\")', 'assert  wrapped_print(\"Hello, World!\") == None', 'assert  wrapped_print(\"Hello\") == None\\ntime = datetime.now().strftime(\"%H:%M:%S\")', 'assert wrapped_print(\"Hello, World!\") == None']\n",
      "completion\n",
      "    current_time = datetime.now().strftime(\"%H:%MM:%SS\")\n",
      "    print(f\"[{current_time}] {message}\\n\")\n",
      "passed\n",
      "True\n",
      "result\n",
      "[False, True, True, True, True, True, True, True, True, True, True, True, True, True, True, False, False, False, False]\n",
      "result_time\n",
      "[100000, 8.821487426757812e-06, 5.4836273193359375e-06, 5.0067901611328125e-06, 1.0013580322265625e-05, 5.245208740234375e-06, 5.0067901611328125e-06, 4.5299530029296875e-06, 9.298324584960938e-06, 8.344650268554688e-06, 4.5299530029296875e-06, 5.0067901611328125e-06, 4.76837158203125e-06, 5.4836273193359375e-06, 100000, 100000, 100000, 100000]\n"
     ]
    }
   ],
   "source": [
    "idx = 6670\n",
    "for k in exec_rst[idx]:\n",
    "    print(k)\n",
    "    print(exec_rst[idx][k])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "all_result = [e['result'] for e in exec_rst]\n",
    "all_time = [e['result_time'] for e in exec_rst]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "err_idx = []\n",
    "for i in range(len(all_result)):\n",
    "    if len(all_result[i]) != len(all_time[i]) and all_result[i] != all_result[34] and exec_rst[i]['passed']:\n",
    "        err_idx.append(i)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[6670, 6674]"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "err_idx"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# rank_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "import pickle"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(3943, ['195083', '19315', '199967', '183109', '181613'])"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "with open(\"/.../.../oss-instruct/save/cache/part-00019-de86e216-cb17-4c7d-93dd-b5a080dea8e1-c000.gzpage_rank_scores.pkl\",'rb') as f:\n",
    "    rank_score = pickle.load(f)\n",
    "len(rank_score), list(rank_score.keys())[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(['    missing_columns = [col for col in required_columns if col not in columns]\\n    columns = [col for col in columns if col in valid_columns or col in required_columns]\\n    columns.extend(missing_columns)\\n    return columns\\n'],\n",
       " 9.703393660982237e+22)"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rank_score['195083'][0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1982, ['195083', '199967', '183109', '195030', '19798'], 0.5026629469946741)"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "not_same_num = 0\n",
    "not_same_list = []\n",
    "for task_id in rank_score:\n",
    "    if rank_score[task_id][0][1] != rank_score[task_id][-1][1]:\n",
    "        not_same_num += 1\n",
    "        not_same_list.append(task_id)\n",
    "not_same_num, not_same_list[:5], not_same_num/len(rank_score)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "    slug = re.sub(r'\\W+', '-', text).lower().strip('-')\n",
      "    return slug\n",
      "1.9856364836970512e+21\n",
      "    slug = re.sub(r'\\W+','', text).strip().lower()\n",
      "    slug = slug.replace(' ', '-')\n",
      "    return slug\n",
      "2.2362207268992382e+20\n"
     ]
    }
   ],
   "source": [
    "not_same_id = \"199967\"\n",
    "print(rank_score[not_same_id][0][0][0])\n",
    "print(rank_score[not_same_id][0][1])\n",
    "print(rank_score[not_same_id][-1][0][0])\n",
    "print(rank_score[not_same_id][-1][1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(3943, ['195083', '19315', '199967', '183109', '181613'])"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "with open(\"/.../.../oss-instruct/save/cache/part-00019-de86e216-cb17-4c7d-93dd-b5a080dea8e1-c000.gzpage_rank_test_scores.pkl\",'rb') as f:\n",
    "    rank_test_score = pickle.load(f)\n",
    "len(rank_test_score), list(rank_test_score.keys())[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[(['assert make_slug(\"12345\") == \"12345\"'], 8.759550467873127e+19),\n",
       " (['assert make_slug(\"123abc\") == \"123abc\"'], 8.759550467873127e+19),\n",
       " (['assert make_slug(\"123abc\") == \"123abc\"'], 8.759550467873127e+19),\n",
       " (['assert  make_slug(\"This is a simple string.\") == \"this-is-a-simple-string\"'],\n",
       "  7.465050431554059e+19),\n",
       " (['assert make_slug(\"This, is a simple string.\") == \"this-is-a-simple-string\"'],\n",
       "  7.465050431554059e+19),\n",
       " (['assert make_slug(\"This is a more complex string. With - special characters & spaces\") == \"this-is-a-more-complex-string-with-special-characters-spaces\"'],\n",
       "  7.465050431554059e+19),\n",
       " (['assert  make_slug(\"Hello World!\") == \"hello-world\"'],\n",
       "  7.465050431554059e+19),\n",
       " (['assert  make_slug(\"Hello World!\") == \"hello-world\"'],\n",
       "  7.465050431554059e+19),\n",
       " (['assert  make_slug(\"Hello World!\") == \"hello-world\"'],\n",
       "  7.465050431554059e+19),\n",
       " (['assert  make_slug(\"Hello World!\") == \"hello-world\"'],\n",
       "  7.465050431554059e+19),\n",
       " (['assert make_slug(\"This is a test\") == \"this-is-a-test\"'],\n",
       "  7.465050431554059e+19),\n",
       " (['assert make_slug(\"This is a test\") == \"this-is-a-test\"'],\n",
       "  7.465050431554059e+19),\n",
       " (['assert make_slug(\"This is a test\") == \"this-is-a-test\"'],\n",
       "  7.465050431554059e+19),\n",
       " (['assert  make_slug(\"This is a simple string.\")  == \\'this-is-a-simple-string\\''],\n",
       "  7.465050431554059e+19),\n",
       " (['assert make_slug(\"This is a simple string!\") == \\'this-is-a-simple-string\\''],\n",
       "  7.465050431554059e+19),\n",
       " (['assert make_slug(\"This is a simple  string\") == \\'this-is-a-simple-string\\''],\n",
       "  7.465050431554059e+19),\n",
       " (['assert make_slug(\"This   is  a simple string\") == \\'this-is-a-simple-string\\''],\n",
       "  7.465050431554059e+19),\n",
       " (['assert make_slug(\"simple string\") ==\\'simple-string\\''],\n",
       "  7.465050431554059e+19),\n",
       " (['assert make_slug(\"simple string.\") ==\\'simple-string\\''],\n",
       "  7.465050431554059e+19),\n",
       " (['assert make_slug(\"This simple string\") == \\'this-simple-string\\''],\n",
       "  7.465050431554059e+19),\n",
       " (['assert  make_slug(\"Myocardial Infarction\") == \"myocardial-infarction\"'],\n",
       "  7.465050431554059e+19),\n",
       " (['assert  make_slug(\"This is a simple string.\") == \\'this-is-a-simple-string\\''],\n",
       "  7.465050431554059e+19),\n",
       " (['assert  make_slug(\"This is a simple string.\") == \\'this-is-a-simple-string\\''],\n",
       "  7.465050431554059e+19),\n",
       " (['assert make_slug(\"Hello World!\") == \\'hello-world\\''],\n",
       "  7.465050431554059e+19),\n",
       " (['assert  make_slug(\"This is a simple string\")   == \\'this-is-a-simple-string\\''],\n",
       "  7.465050431554059e+19),\n",
       " (['assert  make_slug(\"This, is a simple string\") == \\'this-is-a-simple-string\\''],\n",
       "  7.465050431554059e+19),\n",
       " (['assert  make_slug(\"This: is a simple string\") == \\'this-is-a-simple-string\\''],\n",
       "  7.465050431554059e+19),\n",
       " (['assert make_slug(\"This is a simple string\") == \"this-is-a-simple-string\"'],\n",
       "  7.465050431554059e+19),\n",
       " (['assert  make_slug(\" This is a simple string.\") == \\'this-is-a-simple-string\\''],\n",
       "  5.1086316841903915e+19),\n",
       " (['assert  make_slug(\"This is a simple string \") == \\'this-is-a-simple-string\\''],\n",
       "  5.1086316841903915e+19),\n",
       " (['assert  make_slug(\"This is a simple string. \") == \\'this-is-a-simple-string\\''],\n",
       "  5.1086316841903915e+19),\n",
       " (['assert  make_slug(\" This is a simple string. \") == \\'this-is-a-simple-string\\''],\n",
       "  5.1086316841903915e+19),\n",
       " (['assert make_slug(\"!@#$%^&*()_+[]{}|;\\':\\\\\",./<>?\") == \\'\\''],\n",
       "  2.3564187473636655e+19)]"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rank_test_score['199967']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# load parquet"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [],
   "source": [
    "# load_path = \".../codepo/selfoss_humanevalstyle_code_test_merge_n10/repartition/\" + \"part-00019-de86e216-cb17-4c7d-93dd-b5a080dea8e1-c000.gz.parquet\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {},
   "outputs": [],
   "source": [
    "prompt = pd.read_parquet(load_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "prompt['ranked_code'] = prompt['task_id'].apply(lambda x: [e[0][0] for e in rank_score[x]])\n",
    "prompt['ranked_code_score'] = prompt['task_id'].apply(lambda x: [e[1] for e in rank_score[x]])\n",
    "prompt['ranked_code_score_is_same'] = prompt['task_id'].apply(lambda x: rank_score[x][0][1] == rank_score[x][-1][1] if len(rank_score[x]) else False)\n",
    "\n",
    "\n",
    "prompt['ranked_test'] = prompt['task_id'].apply(lambda x: [e[0][0] for e in rank_test_score[x]])\n",
    "prompt['ranked_test_score'] = prompt['task_id'].apply(lambda x: [e[1] for e in rank_test_score[x]])\n",
    "prompt['ranked_test_score_is_same'] = prompt['task_id'].apply(lambda x: rank_test_score[x][0][1] == rank_test_score[x][-1][1] if len(rank_test_score[x]) else False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [],
   "source": [
    "prompt = prompt.drop(['test_output', 'prompt','code_output'], axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>task_id</th>\n",
       "      <th>prompt_codegen</th>\n",
       "      <th>prompt_testgen</th>\n",
       "      <th>ranked_code</th>\n",
       "      <th>ranked_code_score</th>\n",
       "      <th>ranked_code_score_is_same</th>\n",
       "      <th>ranked_test</th>\n",
       "      <th>ranked_test_score</th>\n",
       "      <th>ranked_test_score_is_same</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>196263</td>\n",
       "      <td>def calculate_fan_in_fan_out(input_shape, outp...</td>\n",
       "      <td>def calculate_fan_in_fan_out(input_shape, outp...</td>\n",
       "      <td>[    if not isinstance(input_shape, tuple) or ...</td>\n",
       "      <td>[14956924994.939896, 14956924994.939896, 14956...</td>\n",
       "      <td>True</td>\n",
       "      <td>[assert calculate_fan_in_fan_out((), ()) == (1...</td>\n",
       "      <td>[17342901303.074974]</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>195631</td>\n",
       "      <td>def largest_subarray_sum(nums):\\n    \"\"\"\\n    ...</td>\n",
       "      <td>def largest_subarray_sum(nums):\\n    \"\"\"\\n    ...</td>\n",
       "      <td>[    if not nums:\\n        return 0\\n\\n    max...</td>\n",
       "      <td>[6.0782822045712e+23, 6.0782822045712e+23, 6.0...</td>\n",
       "      <td>True</td>\n",
       "      <td>[assert  largest_subarray_sum([-2, 1, -3, 4, -...</td>\n",
       "      <td>[3.2483759289308444e+22, 3.2483759289308444e+2...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>194295</td>\n",
       "      <td>def foo(s):\\n    \"\"\"\\n    I have a function `f...</td>\n",
       "      <td>def foo(s):\\n    \"\"\"\\n    I have a function `f...</td>\n",
       "      <td>[    return s]</td>\n",
       "      <td>[259.6169195751934]</td>\n",
       "      <td>True</td>\n",
       "      <td>[assert  foo(\"aaa\") == \"aaa\", assert  foo(\"bbb...</td>\n",
       "      <td>[139.4899388813345, 139.4899388813345]</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>19315</td>\n",
       "      <td>def unicode_to_tuple(input_string: str) -&gt; tup...</td>\n",
       "      <td>def unicode_to_tuple(input_string: str) -&gt; tup...</td>\n",
       "      <td>[    code_points = input_string.split(',')\\n  ...</td>\n",
       "      <td>[4.7014574644898497e+24, 4.7014574644898497e+2...</td>\n",
       "      <td>True</td>\n",
       "      <td>[assert  unicode_to_tuple('0061, 0062, 0063') ...</td>\n",
       "      <td>[2.0475162768522093e+23, 2.0475162768522093e+2...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>19798</td>\n",
       "      <td>import base64\\n\\ndef encode_binary_to_base64(b...</td>\n",
       "      <td>import base64\\n\\ndef encode_binary_to_base64(b...</td>\n",
       "      <td>[    return base64.b64encode(binary_string),  ...</td>\n",
       "      <td>[1.297960281529166e+16, 1.297960281529166e+16,...</td>\n",
       "      <td>False</td>\n",
       "      <td>[assert True, assert  encode_binary_to_base64(...</td>\n",
       "      <td>[1977026299004316.2, 1654507387782427.8, 16545...</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  task_id                                     prompt_codegen  \\\n",
       "0  196263  def calculate_fan_in_fan_out(input_shape, outp...   \n",
       "1  195631  def largest_subarray_sum(nums):\\n    \"\"\"\\n    ...   \n",
       "2  194295  def foo(s):\\n    \"\"\"\\n    I have a function `f...   \n",
       "3   19315  def unicode_to_tuple(input_string: str) -> tup...   \n",
       "4   19798  import base64\\n\\ndef encode_binary_to_base64(b...   \n",
       "\n",
       "                                      prompt_testgen  \\\n",
       "0  def calculate_fan_in_fan_out(input_shape, outp...   \n",
       "1  def largest_subarray_sum(nums):\\n    \"\"\"\\n    ...   \n",
       "2  def foo(s):\\n    \"\"\"\\n    I have a function `f...   \n",
       "3  def unicode_to_tuple(input_string: str) -> tup...   \n",
       "4  import base64\\n\\ndef encode_binary_to_base64(b...   \n",
       "\n",
       "                                         ranked_code  \\\n",
       "0  [    if not isinstance(input_shape, tuple) or ...   \n",
       "1  [    if not nums:\\n        return 0\\n\\n    max...   \n",
       "2                                     [    return s]   \n",
       "3  [    code_points = input_string.split(',')\\n  ...   \n",
       "4  [    return base64.b64encode(binary_string),  ...   \n",
       "\n",
       "                                   ranked_code_score  \\\n",
       "0  [14956924994.939896, 14956924994.939896, 14956...   \n",
       "1  [6.0782822045712e+23, 6.0782822045712e+23, 6.0...   \n",
       "2                                [259.6169195751934]   \n",
       "3  [4.7014574644898497e+24, 4.7014574644898497e+2...   \n",
       "4  [1.297960281529166e+16, 1.297960281529166e+16,...   \n",
       "\n",
       "   ranked_code_score_is_same  \\\n",
       "0                       True   \n",
       "1                       True   \n",
       "2                       True   \n",
       "3                       True   \n",
       "4                      False   \n",
       "\n",
       "                                         ranked_test  \\\n",
       "0  [assert calculate_fan_in_fan_out((), ()) == (1...   \n",
       "1  [assert  largest_subarray_sum([-2, 1, -3, 4, -...   \n",
       "2  [assert  foo(\"aaa\") == \"aaa\", assert  foo(\"bbb...   \n",
       "3  [assert  unicode_to_tuple('0061, 0062, 0063') ...   \n",
       "4  [assert True, assert  encode_binary_to_base64(...   \n",
       "\n",
       "                                   ranked_test_score  \\\n",
       "0                               [17342901303.074974]   \n",
       "1  [3.2483759289308444e+22, 3.2483759289308444e+2...   \n",
       "2             [139.4899388813345, 139.4899388813345]   \n",
       "3  [2.0475162768522093e+23, 2.0475162768522093e+2...   \n",
       "4  [1977026299004316.2, 1654507387782427.8, 16545...   \n",
       "\n",
       "   ranked_test_score_is_same  \n",
       "0                       True  \n",
       "1                       True  \n",
       "2                       True  \n",
       "3                       True  \n",
       "4                      False  "
      ]
     },
     "execution_count": 79,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "prompt.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>task_id</th>\n",
       "      <th>prompt_codegen</th>\n",
       "      <th>prompt_testgen</th>\n",
       "      <th>ranked_code</th>\n",
       "      <th>ranked_code_score</th>\n",
       "      <th>ranked_code_score_is_same</th>\n",
       "      <th>ranked_test</th>\n",
       "      <th>ranked_test_score</th>\n",
       "      <th>ranked_test_score_is_same</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>199967</td>\n",
       "      <td>import re\\nimport string\\n\\ndef make_slug(text...</td>\n",
       "      <td>import re\\nimport string\\n\\ndef make_slug(text...</td>\n",
       "      <td>[    slug = re.sub(r'\\W+', '-', text).lower()....</td>\n",
       "      <td>[1.9856364836970512e+21, 1.9856364836970512e+2...</td>\n",
       "      <td>False</td>\n",
       "      <td>[assert make_slug(\"12345\") == \"12345\", assert ...</td>\n",
       "      <td>[8.759550467873127e+19, 8.759550467873127e+19,...</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  task_id                                     prompt_codegen  \\\n",
       "6  199967  import re\\nimport string\\n\\ndef make_slug(text...   \n",
       "\n",
       "                                      prompt_testgen  \\\n",
       "6  import re\\nimport string\\n\\ndef make_slug(text...   \n",
       "\n",
       "                                         ranked_code  \\\n",
       "6  [    slug = re.sub(r'\\W+', '-', text).lower()....   \n",
       "\n",
       "                                   ranked_code_score  \\\n",
       "6  [1.9856364836970512e+21, 1.9856364836970512e+2...   \n",
       "\n",
       "   ranked_code_score_is_same  \\\n",
       "6                      False   \n",
       "\n",
       "                                         ranked_test  \\\n",
       "6  [assert make_slug(\"12345\") == \"12345\", assert ...   \n",
       "\n",
       "                                   ranked_test_score  \\\n",
       "6  [8.759550467873127e+19, 8.759550467873127e+19,...   \n",
       "\n",
       "   ranked_test_score_is_same  \n",
       "6                      False  "
      ]
     },
     "execution_count": 80,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result = prompt.loc[prompt['task_id'] == \"199967\"]\n",
    "result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "    slug = re.sub(r'\\W+', '-', text).lower().strip('-')\n",
      "    return slug\n",
      "    slug = re.sub(r'\\W+','', text).strip().lower()\n",
      "    slug = slug.replace(' ', '-')\n",
      "    return slug\n"
     ]
    }
   ],
   "source": [
    "print(result['ranked_code'][6][0])\n",
    "print(result['ranked_code'][6][-1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1.9856364836970512e+21\n",
      "2.2362207268992382e+20\n"
     ]
    }
   ],
   "source": [
    "print(result['ranked_code_score'][6][0])\n",
    "print(result['ranked_code_score'][6][-1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "8.759550467873127e+19\n",
      "2.3564187473636655e+19\n"
     ]
    }
   ],
   "source": [
    "print(result['ranked_test_score'][6][0])\n",
    "print(result['ranked_test_score'][6][-1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "assert make_slug(\"12345\") == \"12345\"\n",
      "assert  make_slug(\" This is a simple string. \") == 'this-is-a-simple-string'\n"
     ]
    }
   ],
   "source": [
    "print(result['ranked_test'][6][0])\n",
    "print(result['ranked_test'][6][-2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "import re\n",
      "import string\n",
      "\n",
      "def make_slug(text):\n",
      "    \"\"\"\n",
      "    Construct a Python function to convert a given string into a URL-friendly slug. The function should remove all special characters, convert spaces to hyphens, and convert to lowercase. For example, `make_slug(\"This is a simple string.\")` should return `'this-is-a-simple-string'`.\n",
      "    \"\"\"\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(result['prompt_codegen'][6])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'this-is-a-simple-string'"
      ]
     },
     "execution_count": 89,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import re\n",
    "import string\n",
    "\n",
    "def make_slug(text):\n",
    "    slug = re.sub(r'\\W+', '-', text).lower().strip('-')\n",
    "    return slug\n",
    "make_slug(\"This is a simple string.\")\n",
    "make_slug(\" This is a simple string. \")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'thisisasimplestring'"
      ]
     },
     "execution_count": 90,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import re\n",
    "import string\n",
    "\n",
    "def make_slug(text):\n",
    "    slug = re.sub(r'\\W+','', text).strip().lower()\n",
    "    slug = slug.replace(' ', '-')\n",
    "    return slug\n",
    "make_slug(\"This is a simple string.\")\n",
    "make_slug(\" This is a simple string. \")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# rank_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "load_path = \".../codepo/.../save/part-00016-de86e216-cb17-4c7d-93dd-b5a080dea8e1-c000.gz.save_ranked.parquet\"\n",
    "ranked_score = pd.read_parquet(load_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "ranked_code_scores = list(ranked_score['ranked_code_score'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "valid_num = len([e for e in ranked_code_scores if len(e) > 0 and e[0] != e[-1]])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.4066246697825645"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "valid_num / len(ranked_code_scores)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3.9.2 64-bit",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.2"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
