{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Notebook for scraping example test cases for each question"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import time\n",
    "from utils import test_cases_from_slug\n",
    "import ast\n",
    "import re"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = pd.read_csv('data/with_snippets/leetcode_hard_with_snippets_uncontaminated.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 120,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "You are given the root of a binary tree with n nodes. Each node is assigned a unique value from 1 to n. You are also given an array queries of size m.\n",
      "You have to perform m independent queries on the tree where in the ith query you do the following:\n",
      "\n",
      "Remove the subtree rooted at the node with the value queries[i] from the tree. It is guaranteed that queries[i] will not be equal to the value of the root.\n",
      "\n",
      "Return an array answer of size m where answer[i] is the height of the tree after performing the ith query.\n",
      "Note:\n",
      "\n",
      "The queries are independent, so the tree returns to its initial state after each query.\n",
      "The height of a tree is the number of edges in the longest simple path from the root to some node in the tree.\n",
      "\n",
      " \n",
      "Example 1:\n",
      "\n",
      "\n",
      "Input: root = [1,3,4,2,null,6,5,null,null,null,null,null,7], queries = [4]\n",
      "Output: [2]\n",
      "Explanation: The diagram above shows the tree after removing the subtree rooted at node with value 4.\n",
      "The height of the tree is 2 (The path 1 -> 3 -> 2).\n",
      "\n",
      "Example 2:\n",
      "\n",
      "\n",
      "Input: root = [5,8,9,2,1,3,7,4,6], queries = [3,2,4,8]\n",
      "Output: [3,2,3,2]\n",
      "Explanation: We have the following queries:\n",
      "- Removing the subtree rooted at node with value 3. The height of the tree becomes 3 (The path 5 -> 8 -> 2 -> 4).\n",
      "- Removing the subtree rooted at node with value 2. The height of the tree becomes 2 (The path 5 -> 8 -> 1).\n",
      "- Removing the subtree rooted at node with value 4. The height of the tree becomes 3 (The path 5 -> 8 -> 2 -> 6).\n",
      "- Removing the subtree rooted at node with value 8. The height of the tree becomes 2 (The path 5 -> 9 -> 3).\n",
      "\n",
      " \n",
      "Constraints:\n",
      "\n",
      "The number of nodes in the tree is n.\n",
      "2 <= n <= 105\n",
      "1 <= Node.val <= n\n",
      "All the values in the tree are unique.\n",
      "m == queries.length\n",
      "1 <= m <= min(n, 104)\n",
      "1 <= queries[i] <= n\n",
      "queries[i] != root.val\n",
      "\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(data.iloc[34]['description'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 121,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "kwargs: {'nums1': '[1,0,1]', 'nums2': '[0,0,0]', 'queries': '[[1,1,1],[2,1,0],[3,0,0]]'}\n",
      "output: [3]\n",
      "\n",
      "kwargs: {'nums1': '[1]', 'nums2': '[5]', 'queries': '[[2,0,0],[3,0,0]]'}\n",
      "output: [5]\n",
      "\n"
     ]
    }
   ],
   "source": [
    "import re\n",
    "\n",
    "def extract_examples(description):\n",
    "    inputs = [l for l in description.split('\\n') if l.startswith('Input')]\n",
    "    outputs = [l.strip('Output: ') for l in description.split('\\n') if l.startswith('Output')]\n",
    "\n",
    "    examples = []\n",
    "\n",
    "    for input_str, output_str in zip(inputs, outputs):\n",
    "        kwargs_str = re.sub(r'\\s+', '', input_str)\n",
    "        kwargs_pairs = re.findall(r\"(\\w+)\\s*=\\s*([-\\w\\[\\],]+)(?:,|$)\", kwargs_str)\n",
    "        kwargs = {k: v for k, v in kwargs_pairs}\n",
    "        output = output_str\n",
    "        examples.append((kwargs, output))\n",
    "\n",
    "    return examples\n",
    "\n",
    "# Test the function with the provided problem description\n",
    "examples = extract_examples(data.iloc[4]['description'])\n",
    "for example in examples:\n",
    "    print(\"kwargs:\", example[0])\n",
    "    print(\"output:\", example[1])\n",
    "    print()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 122,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0\n",
      "1\n",
      "2\n",
      "3\n",
      "4\n",
      "5\n",
      "6\n",
      "7\n",
      "8\n",
      "9\n",
      "10\n",
      "11\n",
      "12\n",
      "13\n",
      "14\n",
      "15\n",
      "16\n",
      "17\n",
      "18\n",
      "19\n",
      "20\n",
      "21\n",
      "22\n",
      "23\n",
      "24\n",
      "25\n",
      "26\n",
      "27\n",
      "28\n",
      "29\n",
      "30\n",
      "31\n",
      "32\n",
      "33\n",
      "34\n",
      "35\n",
      "36\n",
      "37\n",
      "38\n",
      "39\n",
      "40\n"
     ]
    }
   ],
   "source": [
    "data['example_test_cases'] = None\n",
    "for ind, row in data.iterrows():\n",
    "    print(ind)\n",
    "    examples = extract_examples(row['description'])\n",
    "    data.at[ind, 'example_test_cases'] = examples"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 123,
   "metadata": {},
   "outputs": [],
   "source": [
    "data.to_csv('data/with_snippets/leetcode_hard_with_snippets_uncontaminated_tests.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data['example_test_cases'] = None\n",
    "for ind, row in data.iterrows():\n",
    "    print(ind)\n",
    "    slug = row['question_slug']\n",
    "    test_cases = test_cases_from_slug(slug)\n",
    "    data.at[ind, 'example_test_cases'] = test_cases\n",
    "    time.sleep(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['[[3,4,2,1],[4,2,3,1],[2,1,0,0],[2,4,0,0]]',\n",
       " '[[3,4,2,1],[4,2,1,1],[2,1,1,0],[3,4,1,0]]',\n",
       " '[[2,1,0],[1,0,0]]']"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.iloc[6]['example_test_cases']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "21\n"
     ]
    }
   ],
   "source": [
    "function_name_regex = r\"(?<=def\\s)\\w+\"\n",
    "no_args = 0\n",
    "data['example_test_cases_parsed'] = None\n",
    "for ind, row in data.iterrows():\n",
    "    statements = []\n",
    "    for test in row['example_test_cases']:\n",
    "        args = test.split('\\n')[:-1]\n",
    "        if len(args) == 0:\n",
    "            no_args += 1\n",
    "        expected_val = test.split('\\n')[-1]\n",
    "        entry_point = re.search(function_name_regex, row['python3_snippet']).group(0)\n",
    "        statement = f\"assert {entry_point}({', '.join(args)}) == {expected_val}\"\n",
    "        statements.append(statement)\n",
    "    #print(statements)\n",
    "    data.at[ind, 'example_test_cases_parsed'] = statements\n",
    "print(no_args)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "data.to_csv('data/with_snippets/leetcode_hard_with_snippets_uncontaminated_cleaned_tests.csv')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "env",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.1"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
