{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# exec_rst"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "import pickle"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(\".../codepo/.../save/cache/part-00016-de86e216-cb17-4c7d-93dd-b5a080dea8e1-c000.gzdual_exec_result.pkl\",'rb') as f:\n",
    "    exec_rst = pickle.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "task_id\n",
      "151953\n",
      "test_cases\n",
      "[\"assert  find_matching_words('apple', ['ape', 'apple', 'peach', 'puppy'], 'A..E') == ['apple', 'ape']\", \"assert sorted(find_matching_words('hello world', ['hello', 'world', 'hello world'], '??')) == sorted(['hello', 'world'])\", \"assert  find_matching_words('apple', ['apple', 'banana', 'cherry'], 'pple') == ['apple']\", \"assert  find_matching_words('aaa', ['aaa', 'bbb', 'ccc'], '__') == ['aaa', 'bbb', 'ccc']\", 'assert  find_matching_words(\"aba\", [\"cat\", \"aba\", \"xyz\"], \"XX\") == [\"aba\"]', \"assert sorted(find_matching_words('hello world', ['hello', 'world', 'hello world'], 'h?llo')) == sorted(['hello'])\", \"assert  find_matching_words('banana', ['apple', 'banana', 'cherry'], 'bana') == ['banana']\", \"assert  find_matching_words('aaaacccd', ['aaaa','bbbb','cccc','cccd'], 'a??c') == ['aaaa', 'cccd']\", \"assert  find_matching_words('aaaaa', ['aaa', 'bbb'], 'a') == ['aaa']\", 'assert  find_matching_words(\"abbc\", [\"able\", \"ale\", \"apple\", \"bale\", \"kangaroo\"], \"XXc\") == [\"able\", \"bale\"]', 'assert  find_matching_words(\"abc\", [\"able\", \"ale\", \"apple\", \"bale\", \"kangaroo\"], \"XXc\") == [\"able\", \"bale\"]', 'assert  find_matching_words(\"abb\", [\"able\", \"ale\", \"apple\", \"bale\", \"kangaroo\"], \"XX\") == [\"able\", \"bale\"]', 'assert  find_matching_words(\"hello world\", [\"hello\", \"world\", \"apple\"], \"h%%o w%%d\") == [\"hello\", \"world\"]', \"assert find_matching_words('aaa', ['aaa', 'bbb'], 'a') == ['aaa', 'bbb']\", 'assert  find_matching_words(\"abc\", [\"able\", \"ale\", \"apple\", \"bale\", \"kangaroo\"], \"XXY\") == []', 'assert find_matching_words(\"hello world\", [\"hello\", \"world\", \"apple\"], \"h*o w*d\") == [\"hello\", \"world\"]', \"assert  sorted(find_matching_words('apple', ['aple', 'aaple', 'banana'], '?a??')) == sorted(['aple', 'aaple'])\", \"assert  find_matching_words('aaa', ['aaa', 'bbb', 'ccc'], 'a__') == ['aaa']\"]\n",
      "completion\n",
      "    matching_words = []\n",
      "    for word in words:\n",
      "        if re.match(pattern, word, re.IGNORECASE):\n",
      "            matching_words.append(word)\n",
      "    return matching_words\n",
      "\n",
      "passed\n",
      "True\n",
      "result\n",
      "[False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False]\n"
     ]
    }
   ],
   "source": [
    "idx = 1\n",
    "for k in exec_rst[idx]:\n",
    "    print(k)\n",
    "    print(exec_rst[idx][k])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# rank_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "import pickle"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(3943, ['195083', '19315', '199967', '183109', '181613'])"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "with open(\"/.../.../oss-instruct/save/cache/part-00019-de86e216-cb17-4c7d-93dd-b5a080dea8e1-c000.gzpage_rank_scores.pkl\",'rb') as f:\n",
    "    rank_score = pickle.load(f)\n",
    "len(rank_score), list(rank_score.keys())[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(['    missing_columns = [col for col in required_columns if col not in columns]\\n    columns = [col for col in columns if col in valid_columns or col in required_columns]\\n    columns.extend(missing_columns)\\n    return columns\\n'],\n",
       " 9.703393660982237e+22)"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rank_score['195083'][0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1982, ['195083', '199967', '183109', '195030', '19798'], 0.5026629469946741)"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "not_same_num = 0\n",
    "not_same_list = []\n",
    "for task_id in rank_score:\n",
    "    if rank_score[task_id][0][1] != rank_score[task_id][-1][1]:\n",
    "        not_same_num += 1\n",
    "        not_same_list.append(task_id)\n",
    "not_same_num, not_same_list[:5], not_same_num/len(rank_score)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "    slug = re.sub(r'\\W+', '-', text).lower().strip('-')\n",
      "    return slug\n",
      "1.9856364836970512e+21\n",
      "    slug = re.sub(r'\\W+','', text).strip().lower()\n",
      "    slug = slug.replace(' ', '-')\n",
      "    return slug\n",
      "2.2362207268992382e+20\n"
     ]
    }
   ],
   "source": [
    "not_same_id = \"199967\"\n",
    "print(rank_score[not_same_id][0][0][0])\n",
    "print(rank_score[not_same_id][0][1])\n",
    "print(rank_score[not_same_id][-1][0][0])\n",
    "print(rank_score[not_same_id][-1][1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(3943, ['195083', '19315', '199967', '183109', '181613'])"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "with open(\"/.../.../oss-instruct/save/cache/part-00019-de86e216-cb17-4c7d-93dd-b5a080dea8e1-c000.gzpage_rank_test_scores.pkl\",'rb') as f:\n",
    "    rank_test_score = pickle.load(f)\n",
    "len(rank_test_score), list(rank_test_score.keys())[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[(['assert make_slug(\"12345\") == \"12345\"'], 8.759550467873127e+19),\n",
       " (['assert make_slug(\"123abc\") == \"123abc\"'], 8.759550467873127e+19),\n",
       " (['assert make_slug(\"123abc\") == \"123abc\"'], 8.759550467873127e+19),\n",
       " (['assert  make_slug(\"This is a simple string.\") == \"this-is-a-simple-string\"'],\n",
       "  7.465050431554059e+19),\n",
       " (['assert make_slug(\"This, is a simple string.\") == \"this-is-a-simple-string\"'],\n",
       "  7.465050431554059e+19),\n",
       " (['assert make_slug(\"This is a more complex string. With - special characters & spaces\") == \"this-is-a-more-complex-string-with-special-characters-spaces\"'],\n",
       "  7.465050431554059e+19),\n",
       " (['assert  make_slug(\"Hello World!\") == \"hello-world\"'],\n",
       "  7.465050431554059e+19),\n",
       " (['assert  make_slug(\"Hello World!\") == \"hello-world\"'],\n",
       "  7.465050431554059e+19),\n",
       " (['assert  make_slug(\"Hello World!\") == \"hello-world\"'],\n",
       "  7.465050431554059e+19),\n",
       " (['assert  make_slug(\"Hello World!\") == \"hello-world\"'],\n",
       "  7.465050431554059e+19),\n",
       " (['assert make_slug(\"This is a test\") == \"this-is-a-test\"'],\n",
       "  7.465050431554059e+19),\n",
       " (['assert make_slug(\"This is a test\") == \"this-is-a-test\"'],\n",
       "  7.465050431554059e+19),\n",
       " (['assert make_slug(\"This is a test\") == \"this-is-a-test\"'],\n",
       "  7.465050431554059e+19),\n",
       " (['assert  make_slug(\"This is a simple string.\")  == \\'this-is-a-simple-string\\''],\n",
       "  7.465050431554059e+19),\n",
       " (['assert make_slug(\"This is a simple string!\") == \\'this-is-a-simple-string\\''],\n",
       "  7.465050431554059e+19),\n",
       " (['assert make_slug(\"This is a simple  string\") == \\'this-is-a-simple-string\\''],\n",
       "  7.465050431554059e+19),\n",
       " (['assert make_slug(\"This   is  a simple string\") == \\'this-is-a-simple-string\\''],\n",
       "  7.465050431554059e+19),\n",
       " (['assert make_slug(\"simple string\") ==\\'simple-string\\''],\n",
       "  7.465050431554059e+19),\n",
       " (['assert make_slug(\"simple string.\") ==\\'simple-string\\''],\n",
       "  7.465050431554059e+19),\n",
       " (['assert make_slug(\"This simple string\") == \\'this-simple-string\\''],\n",
       "  7.465050431554059e+19),\n",
       " (['assert  make_slug(\"Myocardial Infarction\") == \"myocardial-infarction\"'],\n",
       "  7.465050431554059e+19),\n",
       " (['assert  make_slug(\"This is a simple string.\") == \\'this-is-a-simple-string\\''],\n",
       "  7.465050431554059e+19),\n",
       " (['assert  make_slug(\"This is a simple string.\") == \\'this-is-a-simple-string\\''],\n",
       "  7.465050431554059e+19),\n",
       " (['assert make_slug(\"Hello World!\") == \\'hello-world\\''],\n",
       "  7.465050431554059e+19),\n",
       " (['assert  make_slug(\"This is a simple string\")   == \\'this-is-a-simple-string\\''],\n",
       "  7.465050431554059e+19),\n",
       " (['assert  make_slug(\"This, is a simple string\") == \\'this-is-a-simple-string\\''],\n",
       "  7.465050431554059e+19),\n",
       " (['assert  make_slug(\"This: is a simple string\") == \\'this-is-a-simple-string\\''],\n",
       "  7.465050431554059e+19),\n",
       " (['assert make_slug(\"This is a simple string\") == \"this-is-a-simple-string\"'],\n",
       "  7.465050431554059e+19),\n",
       " (['assert  make_slug(\" This is a simple string.\") == \\'this-is-a-simple-string\\''],\n",
       "  5.1086316841903915e+19),\n",
       " (['assert  make_slug(\"This is a simple string \") == \\'this-is-a-simple-string\\''],\n",
       "  5.1086316841903915e+19),\n",
       " (['assert  make_slug(\"This is a simple string. \") == \\'this-is-a-simple-string\\''],\n",
       "  5.1086316841903915e+19),\n",
       " (['assert  make_slug(\" This is a simple string. \") == \\'this-is-a-simple-string\\''],\n",
       "  5.1086316841903915e+19),\n",
       " (['assert make_slug(\"!@#$%^&*()_+[]{}|;\\':\\\\\",./<>?\") == \\'\\''],\n",
       "  2.3564187473636655e+19)]"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rank_test_score['199967']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# load parquet"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [],
   "source": [
    "# load_path = \".../codepo/selfoss_humanevalstyle_code_test_merge_n10/repartition/\" + \"part-00019-de86e216-cb17-4c7d-93dd-b5a080dea8e1-c000.gz.parquet\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {},
   "outputs": [],
   "source": [
    "prompt = pd.read_parquet(load_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "prompt['ranked_code'] = prompt['task_id'].apply(lambda x: [e[0][0] for e in rank_score[x]])\n",
    "prompt['ranked_code_score'] = prompt['task_id'].apply(lambda x: [e[1] for e in rank_score[x]])\n",
    "prompt['ranked_code_score_is_same'] = prompt['task_id'].apply(lambda x: rank_score[x][0][1] == rank_score[x][-1][1] if len(rank_score[x]) else False)\n",
    "\n",
    "\n",
    "prompt['ranked_test'] = prompt['task_id'].apply(lambda x: [e[0][0] for e in rank_test_score[x]])\n",
    "prompt['ranked_test_score'] = prompt['task_id'].apply(lambda x: [e[1] for e in rank_test_score[x]])\n",
    "prompt['ranked_test_score_is_same'] = prompt['task_id'].apply(lambda x: rank_test_score[x][0][1] == rank_test_score[x][-1][1] if len(rank_test_score[x]) else False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [],
   "source": [
    "prompt = prompt.drop(['test_output', 'prompt','code_output'], axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>task_id</th>\n",
       "      <th>prompt_codegen</th>\n",
       "      <th>prompt_testgen</th>\n",
       "      <th>ranked_code</th>\n",
       "      <th>ranked_code_score</th>\n",
       "      <th>ranked_code_score_is_same</th>\n",
       "      <th>ranked_test</th>\n",
       "      <th>ranked_test_score</th>\n",
       "      <th>ranked_test_score_is_same</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>196263</td>\n",
       "      <td>def calculate_fan_in_fan_out(input_shape, outp...</td>\n",
       "      <td>def calculate_fan_in_fan_out(input_shape, outp...</td>\n",
       "      <td>[    if not isinstance(input_shape, tuple) or ...</td>\n",
       "      <td>[14956924994.939896, 14956924994.939896, 14956...</td>\n",
       "      <td>True</td>\n",
       "      <td>[assert calculate_fan_in_fan_out((), ()) == (1...</td>\n",
       "      <td>[17342901303.074974]</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>195631</td>\n",
       "      <td>def largest_subarray_sum(nums):\\n    \"\"\"\\n    ...</td>\n",
       "      <td>def largest_subarray_sum(nums):\\n    \"\"\"\\n    ...</td>\n",
       "      <td>[    if not nums:\\n        return 0\\n\\n    max...</td>\n",
       "      <td>[6.0782822045712e+23, 6.0782822045712e+23, 6.0...</td>\n",
       "      <td>True</td>\n",
       "      <td>[assert  largest_subarray_sum([-2, 1, -3, 4, -...</td>\n",
       "      <td>[3.2483759289308444e+22, 3.2483759289308444e+2...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>194295</td>\n",
       "      <td>def foo(s):\\n    \"\"\"\\n    I have a function `f...</td>\n",
       "      <td>def foo(s):\\n    \"\"\"\\n    I have a function `f...</td>\n",
       "      <td>[    return s]</td>\n",
       "      <td>[259.6169195751934]</td>\n",
       "      <td>True</td>\n",
       "      <td>[assert  foo(\"aaa\") == \"aaa\", assert  foo(\"bbb...</td>\n",
       "      <td>[139.4899388813345, 139.4899388813345]</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>19315</td>\n",
       "      <td>def unicode_to_tuple(input_string: str) -&gt; tup...</td>\n",
       "      <td>def unicode_to_tuple(input_string: str) -&gt; tup...</td>\n",
       "      <td>[    code_points = input_string.split(',')\\n  ...</td>\n",
       "      <td>[4.7014574644898497e+24, 4.7014574644898497e+2...</td>\n",
       "      <td>True</td>\n",
       "      <td>[assert  unicode_to_tuple('0061, 0062, 0063') ...</td>\n",
       "      <td>[2.0475162768522093e+23, 2.0475162768522093e+2...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>19798</td>\n",
       "      <td>import base64\\n\\ndef encode_binary_to_base64(b...</td>\n",
       "      <td>import base64\\n\\ndef encode_binary_to_base64(b...</td>\n",
       "      <td>[    return base64.b64encode(binary_string),  ...</td>\n",
       "      <td>[1.297960281529166e+16, 1.297960281529166e+16,...</td>\n",
       "      <td>False</td>\n",
       "      <td>[assert True, assert  encode_binary_to_base64(...</td>\n",
       "      <td>[1977026299004316.2, 1654507387782427.8, 16545...</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  task_id                                     prompt_codegen  \\\n",
       "0  196263  def calculate_fan_in_fan_out(input_shape, outp...   \n",
       "1  195631  def largest_subarray_sum(nums):\\n    \"\"\"\\n    ...   \n",
       "2  194295  def foo(s):\\n    \"\"\"\\n    I have a function `f...   \n",
       "3   19315  def unicode_to_tuple(input_string: str) -> tup...   \n",
       "4   19798  import base64\\n\\ndef encode_binary_to_base64(b...   \n",
       "\n",
       "                                      prompt_testgen  \\\n",
       "0  def calculate_fan_in_fan_out(input_shape, outp...   \n",
       "1  def largest_subarray_sum(nums):\\n    \"\"\"\\n    ...   \n",
       "2  def foo(s):\\n    \"\"\"\\n    I have a function `f...   \n",
       "3  def unicode_to_tuple(input_string: str) -> tup...   \n",
       "4  import base64\\n\\ndef encode_binary_to_base64(b...   \n",
       "\n",
       "                                         ranked_code  \\\n",
       "0  [    if not isinstance(input_shape, tuple) or ...   \n",
       "1  [    if not nums:\\n        return 0\\n\\n    max...   \n",
       "2                                     [    return s]   \n",
       "3  [    code_points = input_string.split(',')\\n  ...   \n",
       "4  [    return base64.b64encode(binary_string),  ...   \n",
       "\n",
       "                                   ranked_code_score  \\\n",
       "0  [14956924994.939896, 14956924994.939896, 14956...   \n",
       "1  [6.0782822045712e+23, 6.0782822045712e+23, 6.0...   \n",
       "2                                [259.6169195751934]   \n",
       "3  [4.7014574644898497e+24, 4.7014574644898497e+2...   \n",
       "4  [1.297960281529166e+16, 1.297960281529166e+16,...   \n",
       "\n",
       "   ranked_code_score_is_same  \\\n",
       "0                       True   \n",
       "1                       True   \n",
       "2                       True   \n",
       "3                       True   \n",
       "4                      False   \n",
       "\n",
       "                                         ranked_test  \\\n",
       "0  [assert calculate_fan_in_fan_out((), ()) == (1...   \n",
       "1  [assert  largest_subarray_sum([-2, 1, -3, 4, -...   \n",
       "2  [assert  foo(\"aaa\") == \"aaa\", assert  foo(\"bbb...   \n",
       "3  [assert  unicode_to_tuple('0061, 0062, 0063') ...   \n",
       "4  [assert True, assert  encode_binary_to_base64(...   \n",
       "\n",
       "                                   ranked_test_score  \\\n",
       "0                               [17342901303.074974]   \n",
       "1  [3.2483759289308444e+22, 3.2483759289308444e+2...   \n",
       "2             [139.4899388813345, 139.4899388813345]   \n",
       "3  [2.0475162768522093e+23, 2.0475162768522093e+2...   \n",
       "4  [1977026299004316.2, 1654507387782427.8, 16545...   \n",
       "\n",
       "   ranked_test_score_is_same  \n",
       "0                       True  \n",
       "1                       True  \n",
       "2                       True  \n",
       "3                       True  \n",
       "4                      False  "
      ]
     },
     "execution_count": 79,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "prompt.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>task_id</th>\n",
       "      <th>prompt_codegen</th>\n",
       "      <th>prompt_testgen</th>\n",
       "      <th>ranked_code</th>\n",
       "      <th>ranked_code_score</th>\n",
       "      <th>ranked_code_score_is_same</th>\n",
       "      <th>ranked_test</th>\n",
       "      <th>ranked_test_score</th>\n",
       "      <th>ranked_test_score_is_same</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>199967</td>\n",
       "      <td>import re\\nimport string\\n\\ndef make_slug(text...</td>\n",
       "      <td>import re\\nimport string\\n\\ndef make_slug(text...</td>\n",
       "      <td>[    slug = re.sub(r'\\W+', '-', text).lower()....</td>\n",
       "      <td>[1.9856364836970512e+21, 1.9856364836970512e+2...</td>\n",
       "      <td>False</td>\n",
       "      <td>[assert make_slug(\"12345\") == \"12345\", assert ...</td>\n",
       "      <td>[8.759550467873127e+19, 8.759550467873127e+19,...</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  task_id                                     prompt_codegen  \\\n",
       "6  199967  import re\\nimport string\\n\\ndef make_slug(text...   \n",
       "\n",
       "                                      prompt_testgen  \\\n",
       "6  import re\\nimport string\\n\\ndef make_slug(text...   \n",
       "\n",
       "                                         ranked_code  \\\n",
       "6  [    slug = re.sub(r'\\W+', '-', text).lower()....   \n",
       "\n",
       "                                   ranked_code_score  \\\n",
       "6  [1.9856364836970512e+21, 1.9856364836970512e+2...   \n",
       "\n",
       "   ranked_code_score_is_same  \\\n",
       "6                      False   \n",
       "\n",
       "                                         ranked_test  \\\n",
       "6  [assert make_slug(\"12345\") == \"12345\", assert ...   \n",
       "\n",
       "                                   ranked_test_score  \\\n",
       "6  [8.759550467873127e+19, 8.759550467873127e+19,...   \n",
       "\n",
       "   ranked_test_score_is_same  \n",
       "6                      False  "
      ]
     },
     "execution_count": 80,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result = prompt.loc[prompt['task_id'] == \"199967\"]\n",
    "result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "    slug = re.sub(r'\\W+', '-', text).lower().strip('-')\n",
      "    return slug\n",
      "    slug = re.sub(r'\\W+','', text).strip().lower()\n",
      "    slug = slug.replace(' ', '-')\n",
      "    return slug\n"
     ]
    }
   ],
   "source": [
    "print(result['ranked_code'][6][0])\n",
    "print(result['ranked_code'][6][-1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1.9856364836970512e+21\n",
      "2.2362207268992382e+20\n"
     ]
    }
   ],
   "source": [
    "print(result['ranked_code_score'][6][0])\n",
    "print(result['ranked_code_score'][6][-1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "8.759550467873127e+19\n",
      "2.3564187473636655e+19\n"
     ]
    }
   ],
   "source": [
    "print(result['ranked_test_score'][6][0])\n",
    "print(result['ranked_test_score'][6][-1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "assert make_slug(\"12345\") == \"12345\"\n",
      "assert  make_slug(\" This is a simple string. \") == 'this-is-a-simple-string'\n"
     ]
    }
   ],
   "source": [
    "print(result['ranked_test'][6][0])\n",
    "print(result['ranked_test'][6][-2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "import re\n",
      "import string\n",
      "\n",
      "def make_slug(text):\n",
      "    \"\"\"\n",
      "    Construct a Python function to convert a given string into a URL-friendly slug. The function should remove all special characters, convert spaces to hyphens, and convert to lowercase. For example, `make_slug(\"This is a simple string.\")` should return `'this-is-a-simple-string'`.\n",
      "    \"\"\"\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(result['prompt_codegen'][6])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'this-is-a-simple-string'"
      ]
     },
     "execution_count": 89,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import re\n",
    "import string\n",
    "\n",
    "def make_slug(text):\n",
    "    slug = re.sub(r'\\W+', '-', text).lower().strip('-')\n",
    "    return slug\n",
    "make_slug(\"This is a simple string.\")\n",
    "make_slug(\" This is a simple string. \")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'thisisasimplestring'"
      ]
     },
     "execution_count": 90,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import re\n",
    "import string\n",
    "\n",
    "def make_slug(text):\n",
    "    slug = re.sub(r'\\W+','', text).strip().lower()\n",
    "    slug = slug.replace(' ', '-')\n",
    "    return slug\n",
    "make_slug(\"This is a simple string.\")\n",
    "make_slug(\" This is a simple string. \")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# rank_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "load_path = \".../codepo/.../save/part-00016-de86e216-cb17-4c7d-93dd-b5a080dea8e1-c000.gz.save_ranked.parquet\"\n",
    "ranked_score = pd.read_parquet(load_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "ranked_code_scores = list(ranked_score['ranked_code_score'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "valid_num = len([e for e in ranked_code_scores if len(e) > 0 and e[0] != e[-1]])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.4066246697825645"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "valid_num / len(ranked_code_scores)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3.9.2 64-bit",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.2"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
