{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "6a561849",
   "metadata": {},
   "outputs": [],
   "source": [
    "from LCDP import LCDP"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4cc7eacb",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Generated 3 new test cases, total 6 test cases.\n",
      "Evaluating codes on 6 test cases...\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Testing Progress: 100%|##########| 30/30\n",
      "Analyzing codes: 100%|██████████| 5/5 [00:05<00:00,  1.03s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'code_0': {'code': 'def extract_string(input_list, specified_size):\\n    # Initialize an empty list to store strings with specified size\\n    output_list = []\\n    \\n    # Iterate through each string in the input list\\n    for string in input_list:\\n        # Check if the length of the string is equal to the specified size\\n        if len(string) == specified_size:\\n            # If true, add the string to the output list\\n            output_list.append(string)\\n    \\n    return output_list', 'plan': 'The function `extract_string` takes in a list of string values and an integer specifying the size of the strings to be extracted. It initializes an empty list to store the extracted strings that match the specified size. Then, it iterates through each string in the input list, checking if the length of the string is equal to the specified size. If the length matches, the string is added to the output list. Finally, the function returns the list of extracted strings.\\n\\nThe workflow steps are as follows:\\n1. Initialize an empty list `output_list` to store the extracted strings.\\n2. Iterate through each `string` in the `input_list`.\\n3. Check if the length of the `string` is equal to the `specified_size`.\\n4. If the length matches, append the `string` to the `output_list`.\\n5. Return the `output_list`.', 'main_function_name': 'extract_string', 'score': 0.8167, 'pass_rate': 1.0, 'pass_rate_score': 1.0, 'prediction_score': 0.0, 'pylint_score': 0.16699999999999998, 'radon_score': 1.0, 'test_case_results': {'test_case_1': {'success': True, 'reason': None, 'message': 'Test passed: Correctly extracted strings with specified size'}, 'test_case_2': {'success': True, 'reason': None, 'message': 'Test passed: Returned empty list for no matching strings'}, 'test_case_1_2': {'success': True, 'reason': None, 'message': 'Test passed'}, 'test_case_2_1': {'success': True, 'reason': None, 'message': 'Test passed'}, 'test_case_1_1': {'success': True, 'reason': None, 'message': 'Test passed'}, 'test_case_2_2': {'success': True, 'reason': None, 'message': 'Test passed'}}, 'test_weights': {'test_case_1': 0.5172413793103449, 'test_case_2': 0.5172413793103449, 'test_case_1_2': 0.5172413793103449, 'test_case_2_1': 0.7894736842105263, 'test_case_1_1': 0.5172413793103449, 'test_case_2_2': 0.7894736842105263}}, 'code_1': {'code': 'def extract_string(strings, size):\\n    extracted_strings = []  # Initialize an empty list to store extracted strings\\n    \\n    # Iterate through each string in the input list\\n    for string in strings:\\n        # Check if the length of the current string is equal to the specified size\\n        if len(string) == size:\\n            extracted_strings.append(string)  # If true, add the string to the output list\\n    \\n    return extracted_strings  # Return the list of extracted strings', 'plan': \"1. Create an empty list called 'extracted_strings' to store the strings that match the specified size.\\n2. Iterate through each string in the input list 'strings'.\\n3. For each string, check if its length is equal to the specified 'size'.\\n4. If the length matches, add the string to the 'extracted_strings' list.\\n5. Finally, return the list of extracted strings.\", 'main_function_name': 'extract_string', 'score': 0.8167, 'pass_rate': 1.0, 'pass_rate_score': 1.0, 'prediction_score': 0.0, 'pylint_score': 0.16699999999999998, 'radon_score': 1.0, 'test_case_results': {'test_case_1': {'success': True, 'reason': None, 'message': 'Test passed: Correctly extracted strings with specified size'}, 'test_case_2': {'success': True, 'reason': None, 'message': 'Test passed: Returned empty list for no matching strings'}, 'test_case_1_2': {'success': True, 'reason': None, 'message': 'Test passed'}, 'test_case_2_1': {'success': True, 'reason': None, 'message': 'Test passed'}, 'test_case_1_1': {'success': True, 'reason': None, 'message': 'Test passed'}, 'test_case_2_2': {'success': True, 'reason': None, 'message': 'Test passed'}}, 'test_weights': {'test_case_1': 0.5172413793103449, 'test_case_2': 0.5172413793103449, 'test_case_1_2': 0.5172413793103449, 'test_case_2_1': 0.7894736842105263, 'test_case_1_1': 0.5172413793103449, 'test_case_2_2': 0.7894736842105263}}, 'code_2': {'code': 'def extract_string(string_list, specified_size):\\n    # Initialize an empty list to store the strings that match the specified size\\n    output_list = []\\n    \\n    # Iterate through each string in the input list\\n    for string in string_list:\\n        # Check if the length of the string is equal to the specified size\\n        if len(string) == specified_size:\\n            # If true, add the string to the output list\\n            output_list.append(string)\\n    \\n    # Return the list of strings that match the specified size\\n    return output_list', 'plan': \"1. Define the function 'extract_string' with parameters 'string_list' and 'specified_size'.\\n2. Initialize an empty list 'output_list' to store the strings that match the specified size.\\n3. Iterate through each string in the 'string_list' using a for loop.\\n4. Check if the length of the current string is equal to the 'specified_size'.\\n5. If the length matches, add the string to the 'output_list'.\\n6. Return the 'output_list' containing the strings that match the specified size.\", 'main_function_name': 'extract_string', 'score': 0.8167, 'pass_rate': 1.0, 'pass_rate_score': 1.0, 'prediction_score': 0.0, 'pylint_score': 0.16699999999999998, 'radon_score': 1.0, 'test_case_results': {'test_case_1': {'success': True, 'reason': None, 'message': 'Test passed: Correctly extracted strings with specified size'}, 'test_case_2': {'success': True, 'reason': None, 'message': 'Test passed: Returned empty list for no matching strings'}, 'test_case_1_2': {'success': True, 'reason': None, 'message': 'Test passed'}, 'test_case_2_1': {'success': True, 'reason': None, 'message': 'Test passed'}, 'test_case_1_1': {'success': True, 'reason': None, 'message': 'Test passed'}, 'test_case_2_2': {'success': True, 'reason': None, 'message': 'Test passed'}}, 'test_weights': {'test_case_1': 0.5172413793103449, 'test_case_2': 0.5172413793103449, 'test_case_1_2': 0.5172413793103449, 'test_case_2_1': 0.7894736842105263, 'test_case_1_1': 0.5172413793103449, 'test_case_2_2': 0.7894736842105263}}}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "test_task_description = \"\"\"Write a function to extract specified size of strings from a given list of string values.\\n\\nYour goal is to implement the Python function described above. The function should be named `extract_string`.\\n\\nHere are some example assertions the function should satisfy:\\n- `assert extract_string(['Python', 'list', 'exercises', 'practice', 'solution'] ,8)==['practice', 'solution']`\\n- `assert extract_string(['Python', 'list', 'exercises', 'practice', 'solution'] ,6)==['Python']`\\n- `assert extract_string(['Python', 'list', 'exercises', 'practice', 'solution'] ,9)==['exercises']`\\n\\nPlease provide the complete Python code for this function.\"\"\"\n",
    "\n",
    "lcdp = LCDP(api_key=None, model=\"gpt-3.5-turbo\", max_workers=50, ignore_advice=True, use_pr_predictor=False)\n",
    "best_codes = await lcdp.run(\n",
    "    task_description=test_task_description,\n",
    "    max_iterations=2,\n",
    "    num_plans=3,\n",
    "    num_tests=3,\n",
    "    num_codes=5,\n",
    "    refine_rounds=3,\n",
    "    use_pass_rate_for_train=False,\n",
    "    test_timeout=10,\n",
    "    # use_example=True,\n",
    "    # example_dataset=example_codes,\n",
    "    use_async_generation=False,\n",
    "    best_only=True,\n",
    ")\n",
    "print(best_codes)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "5b687b9e",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'test_case_1': {'test_type': 'correctness', 'purpose': 'This test case will validate the function with a list of strings where the specified size is present in multiple strings. This is to verify if the function correctly extracts all the strings with the specified size.', 'test_function': 'def test_extract_string_multiple_strings(func):\\n    input_list = [\\'Python\\', \\'list\\', \\'exercises\\', \\'practice\\', \\'solution\\']\\n    specified_size = 8\\n    expected_output = [\\'practice\\', \\'solution\\']\\n    \\n    output = func(input_list, specified_size)\\n    \\n    if output == expected_output:\\n        return True, \"Test passed: Correctly extracted strings with specified size\"\\n    else:\\n        return False, f\"Test failed: Expected {expected_output} but got {output}\"', 'all_pass_times': 1, 'all_fail_times': 0}, 'test_case_2': {'test_type': 'correctness', 'purpose': \"This test case will check the function behavior when the specified size doesn't match any string in the input list. The expected output in this case should be an empty list.\", 'test_function': 'def test_extract_string_no_matching_string(func):\\n    input_list = [\\'Python\\', \\'list\\', \\'exercises\\', \\'practice\\', \\'solution\\']\\n    specified_size = 10\\n    expected_output = []\\n    \\n    output = func(input_list, specified_size)\\n    \\n    if output == expected_output:\\n        return True, \"Test passed: Returned empty list for no matching strings\"\\n    else:\\n        return False, f\"Test failed: Expected {expected_output} but got {output}\"', 'all_pass_times': 1, 'all_fail_times': 0}, 'test_case_1_2': {'test_type': 'correctness', 'purpose': \"This test case aims to validate the function's correctness by providing a list of strings with different lengths and extracting strings based on the specified size. The expected output for this test case is a list of strings that match the specified size criterion.\", 'test_function': 'def test_extract_string_correctness(func):\\n    input_list = [\\'Python\\', \\'list\\', \\'exercises\\', \\'practice\\', \\'solution\\']\\n    size_1 = 8\\n    expected_output_1 = [\\'practice\\', \\'solution\\']\\n    \\n    size_2 = 6\\n    expected_output_2 = [\\'Python\\']\\n    \\n    size_3 = 9\\n    expected_output_3 = [\\'exercises\\']\\n    \\n    # Test case 1\\n    output_1 = func(input_list, size_1)\\n    if output_1 == expected_output_1:\\n        return True, \"Test passed\"\\n    else:\\n        return False, f\"Test failed: expected {expected_output_1} but got {output_1}\"\\n    \\n    # Test case 2\\n    output_2 = func(input_list, size_2)\\n    if output_2 == expected_output_2:\\n        return True, \"Test passed\"\\n    else:\\n        return False, f\"Test failed: expected {expected_output_2} but got {output_2}\"\\n    \\n    # Test case 3\\n    output_3 = func(input_list, size_3)\\n    if output_3 == expected_output_3:\\n        return True, \"Test passed\"\\n    else:\\n        return False, f\"Test failed: expected {expected_output_3} but got {output_3}\"', 'all_pass_times': 1, 'all_fail_times': 0}, 'test_case_2_1': {'test_type': 'edge_case', 'purpose': 'This test case aims to test for an edge case where the specified size is not present in any of the input strings. The expected output for this scenario is an empty list since there is no string of the desired size.', 'test_function': 'def test_extract_string_edge_case(func):\\n    input_list = [\\'Python\\', \\'list\\', \\'exercises\\', \\'practice\\', \\'solution\\']\\n    size = 10\\n    expected_output = []\\n    \\n    output = func(input_list, size)\\n    if output == expected_output:\\n        return True, \"Test passed\"\\n    else:\\n        return False, f\"Test failed: expected {expected_output} but got {output}\"', 'all_pass_times': 1, 'all_fail_times': 0}, 'test_case_1_1': {'test_type': 'correctness', 'purpose': \"This test case aims to validate the function's correctness when the specified size is present in one of the input strings. We will use input where the specified size appears only once in an input string list.\", 'test_function': 'def test_case(func):\\n    input_list = [\\'Python\\', \\'list\\', \\'exercises\\', \\'practice\\', \\'solution\\']\\n    size = 8\\n    expected_output = [\\'practice\\', \\'solution\\']\\n    output = func(input_list, size)\\n    if output == expected_output:\\n        return True, \"Test passed\"\\n    else:\\n        return False, f\"Test failed: expected {expected_output} but got {output}\"', 'all_pass_times': 1, 'all_fail_times': 0}, 'test_case_2_2': {'test_type': 'edge_case', 'purpose': 'This test case is designed to handle an edge case scenario where the input list is empty. The function should return an empty list since there are no strings to extract from.', 'test_function': 'def test_case(func):\\n    input_list = []\\n    size = 5\\n    expected_output = []\\n    output = func(input_list, size)\\n    if output == expected_output:\\n        return True, \"Test passed\"\\n    else:\\n        return False, f\"Test failed: expected {expected_output} but got {output}\"', 'all_pass_times': 1, 'all_fail_times': 0}}\n",
      "6\n",
      "Test ID: test_case_1\n",
      "Test Case:\n",
      "def test_extract_string_multiple_strings(func):\n",
      "    input_list = ['Python', 'list', 'exercises', 'practice', 'solution']\n",
      "    specified_size = 8\n",
      "    expected_output = ['practice', 'solution']\n",
      "    \n",
      "    output = func(input_list, specified_size)\n",
      "    \n",
      "    if output == expected_output:\n",
      "        return True, \"Test passed: Correctly extracted strings with specified size\"\n",
      "    else:\n",
      "        return False, f\"Test failed: Expected {expected_output} but got {output}\"\n",
      "Test Type: correctness\n",
      "----------------------------------------\n",
      "Test ID: test_case_2\n",
      "Test Case:\n",
      "def test_extract_string_no_matching_string(func):\n",
      "    input_list = ['Python', 'list', 'exercises', 'practice', 'solution']\n",
      "    specified_size = 10\n",
      "    expected_output = []\n",
      "    \n",
      "    output = func(input_list, specified_size)\n",
      "    \n",
      "    if output == expected_output:\n",
      "        return True, \"Test passed: Returned empty list for no matching strings\"\n",
      "    else:\n",
      "        return False, f\"Test failed: Expected {expected_output} but got {output}\"\n",
      "Test Type: correctness\n",
      "----------------------------------------\n",
      "Test ID: test_case_1_2\n",
      "Test Case:\n",
      "def test_extract_string_correctness(func):\n",
      "    input_list = ['Python', 'list', 'exercises', 'practice', 'solution']\n",
      "    size_1 = 8\n",
      "    expected_output_1 = ['practice', 'solution']\n",
      "    \n",
      "    size_2 = 6\n",
      "    expected_output_2 = ['Python']\n",
      "    \n",
      "    size_3 = 9\n",
      "    expected_output_3 = ['exercises']\n",
      "    \n",
      "    # Test case 1\n",
      "    output_1 = func(input_list, size_1)\n",
      "    if output_1 == expected_output_1:\n",
      "        return True, \"Test passed\"\n",
      "    else:\n",
      "        return False, f\"Test failed: expected {expected_output_1} but got {output_1}\"\n",
      "    \n",
      "    # Test case 2\n",
      "    output_2 = func(input_list, size_2)\n",
      "    if output_2 == expected_output_2:\n",
      "        return True, \"Test passed\"\n",
      "    else:\n",
      "        return False, f\"Test failed: expected {expected_output_2} but got {output_2}\"\n",
      "    \n",
      "    # Test case 3\n",
      "    output_3 = func(input_list, size_3)\n",
      "    if output_3 == expected_output_3:\n",
      "        return True, \"Test passed\"\n",
      "    else:\n",
      "        return False, f\"Test failed: expected {expected_output_3} but got {output_3}\"\n",
      "Test Type: correctness\n",
      "----------------------------------------\n",
      "Test ID: test_case_2_1\n",
      "Test Case:\n",
      "def test_extract_string_edge_case(func):\n",
      "    input_list = ['Python', 'list', 'exercises', 'practice', 'solution']\n",
      "    size = 10\n",
      "    expected_output = []\n",
      "    \n",
      "    output = func(input_list, size)\n",
      "    if output == expected_output:\n",
      "        return True, \"Test passed\"\n",
      "    else:\n",
      "        return False, f\"Test failed: expected {expected_output} but got {output}\"\n",
      "Test Type: edge_case\n",
      "----------------------------------------\n",
      "Test ID: test_case_1_1\n",
      "Test Case:\n",
      "def test_case(func):\n",
      "    input_list = ['Python', 'list', 'exercises', 'practice', 'solution']\n",
      "    size = 8\n",
      "    expected_output = ['practice', 'solution']\n",
      "    output = func(input_list, size)\n",
      "    if output == expected_output:\n",
      "        return True, \"Test passed\"\n",
      "    else:\n",
      "        return False, f\"Test failed: expected {expected_output} but got {output}\"\n",
      "Test Type: correctness\n",
      "----------------------------------------\n",
      "Test ID: test_case_2_2\n",
      "Test Case:\n",
      "def test_case(func):\n",
      "    input_list = []\n",
      "    size = 5\n",
      "    expected_output = []\n",
      "    output = func(input_list, size)\n",
      "    if output == expected_output:\n",
      "        return True, \"Test passed\"\n",
      "    else:\n",
      "        return False, f\"Test failed: expected {expected_output} but got {output}\"\n",
      "Test Type: edge_case\n",
      "----------------------------------------\n"
     ]
    }
   ],
   "source": [
    "test_cases = lcdp.test_cases\n",
    "print(test_cases)\n",
    "print(len(test_cases))\n",
    "for test_id, test_case_dict in test_cases.items():\n",
    "    print(f\"Test ID: {test_id}\")\n",
    "    print(f\"Test Case:\\n{test_case_dict['test_function']}\")\n",
    "    print(f\"Test Type: {test_case_dict['test_type']}\")\n",
    "    print(\"-\" * 40)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "473c5ed8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "dict_keys(['code_0', 'code_1', 'code_2'])\n",
      "dict_keys(['code', 'plan', 'main_function_name', 'score', 'pass_rate', 'pass_rate_score', 'prediction_score', 'pylint_score', 'radon_score', 'test_case_results', 'test_weights'])\n",
      "Scores:\n",
      "\n",
      "Overall score:\n",
      "0.8167\n",
      "\n",
      "Pass rate score:\n",
      "1.0\n",
      "\n",
      "Prediction score:\n",
      "0.0\n",
      "\n",
      "Pylint score:\n",
      "0.16699999999999998\n",
      "\n",
      "Radon score:\n",
      "1.0\n",
      "\n",
      "Test case results:\n",
      "{'test_case_1': {'success': True, 'reason': None, 'message': 'Test passed: Correctly extracted strings with specified size'}, 'test_case_2': {'success': True, 'reason': None, 'message': 'Test passed: Returned empty list for no matching strings'}, 'test_case_1_2': {'success': True, 'reason': None, 'message': 'Test passed'}, 'test_case_2_1': {'success': True, 'reason': None, 'message': 'Test passed'}, 'test_case_1_1': {'success': True, 'reason': None, 'message': 'Test passed'}, 'test_case_2_2': {'success': True, 'reason': None, 'message': 'Test passed'}}\n",
      "6\n",
      "6\n"
     ]
    }
   ],
   "source": [
    "checking_code = \"code_0\"\n",
    "print(best_codes.keys())\n",
    "print(best_codes[checking_code].keys())\n",
    "\n",
    "# print all scores  \n",
    "print(\"Scores:\")\n",
    "print(\"\\nOverall score:\")\n",
    "print(best_codes[checking_code]['score'])\n",
    "print(\"\\nPass rate score:\")\n",
    "print(best_codes[checking_code]['pass_rate_score'])\n",
    "print(\"\\nPrediction score:\")\n",
    "print(best_codes[checking_code]['prediction_score'])\n",
    "print(\"\\nPylint score:\")\n",
    "print(best_codes[checking_code]['pylint_score'])\n",
    "print(\"\\nRadon score:\")\n",
    "print(best_codes[checking_code]['radon_score'])\n",
    "print(\"\\nTest case results:\")\n",
    "print(best_codes[checking_code][\"test_case_results\"])\n",
    "print(sum([v[\"success\"] for v in best_codes[checking_code][\"test_case_results\"].values()]))\n",
    "print(len(best_codes[checking_code][\"test_case_results\"]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "74ae6168",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "def extract_string(input_list, specified_size):\n",
      "    # Initialize an empty list to store strings with specified size\n",
      "    output_list = []\n",
      "    \n",
      "    # Iterate through each string in the input list\n",
      "    for string in input_list:\n",
      "        # Check if the length of the string is equal to the specified size\n",
      "        if len(string) == specified_size:\n",
      "            # If true, add the string to the output list\n",
      "            output_list.append(string)\n",
      "    \n",
      "    return output_list\n"
     ]
    }
   ],
   "source": [
    "print(best_codes[checking_code][\"code\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "c3fab755",
   "metadata": {},
   "outputs": [],
   "source": [
    "from datasets import load_dataset\n",
    "\n",
    "mbpp_dataset = load_dataset(\"mbpp\", \"sanitized\", split=\"test\", trust_remote_code=True)\n",
    "problems = list(mbpp_dataset)[:10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "6ac75fb7",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'source_file': 'Benchmark Questions Verification V2.ipynb', 'task_id': 11, 'prompt': 'Write a python function to remove first and last occurrence of a given character from the string.', 'code': 'def remove_Occ(s,ch): \\n    for i in range(len(s)): \\n        if (s[i] == ch): \\n            s = s[0 : i] + s[i + 1:] \\n            break\\n    for i in range(len(s) - 1,-1,-1):  \\n        if (s[i] == ch): \\n            s = s[0 : i] + s[i + 1:] \\n            break\\n    return s ', 'test_imports': [], 'test_list': ['assert remove_Occ(\"hello\",\"l\") == \"heo\"', 'assert remove_Occ(\"abcda\",\"a\") == \"bcd\"', 'assert remove_Occ(\"PHP\",\"P\") == \"H\"']}\n"
     ]
    }
   ],
   "source": [
    "print(problems[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "4e4aa3b7",
   "metadata": {},
   "outputs": [],
   "source": [
    "test_prompt = \"\"\"You are a test case generation agent. Your task is to create Python test functions to validate a code generation task based on the provided specifications. Follow these instructions carefully:\n",
    "\n",
    "### Input Specifications:\n",
    "- **Task Description**:\n",
    "You are given an integer array nums and two integers, k and limit. Your task is to find a non-empty subsequence of nums that:\n",
    "\n",
    "Has an alternating sum equal to k.\n",
    "Maximizes the product of all its numbers without the product exceeding limit.\n",
    "Return the product of the numbers in such a subsequence. If no subsequence satisfies the requirements, return -1.\n",
    "\n",
    "The alternating sum of a 0-indexed array is defined as the sum of the elements at even indices minus the sum of the elements at odd indices.\n",
    "\n",
    "Example 1:\n",
    "\n",
    "Input: nums = [1,2,3], k = 2, limit = 10\n",
    "\n",
    "Output: 6\n",
    "\n",
    "Explanation:\n",
    "\n",
    "The subsequences with an alternating sum of 2 are:\n",
    "\n",
    "[1, 2, 3]\n",
    "Alternating Sum: 1 - 2 + 3 = 2\n",
    "Product: 1 * 2 * 3 = 6\n",
    "[2]\n",
    "Alternating Sum: 2\n",
    "Product: 2\n",
    "The maximum product within the limit is 6.\n",
    "\n",
    "Example 2:\n",
    "\n",
    "Input: nums = [0,2,3], k = -5, limit = 12\n",
    "\n",
    "Output: -1\n",
    "\n",
    "Explanation:\n",
    "\n",
    "A subsequence with an alternating sum of exactly -5 does not exist.\n",
    "\n",
    "Example 3:\n",
    "\n",
    "Input: nums = [2,2,3,3], k = 0, limit = 9\n",
    "\n",
    "Output: 9\n",
    "\n",
    "Explanation:\n",
    "\n",
    "The subsequences with an alternating sum of 0 are:\n",
    "\n",
    "[2, 2]\n",
    "Alternating Sum: 2 - 2 = 0\n",
    "Product: 2 * 2 = 4\n",
    "[3, 3]\n",
    "Alternating Sum: 3 - 3 = 0\n",
    "Product: 3 * 3 = 9\n",
    "[2, 2, 3, 3]\n",
    "Alternating Sum: 2 - 2 + 3 - 3 = 0\n",
    "Product: 2 * 2 * 3 * 3 = 36\n",
    "The subsequence [2, 2, 3, 3] has the greatest product with an alternating sum equal to k, but 36 > 9. The next greatest product is 9, which is within the limit.\n",
    "- **Input Format**: \n",
    "- Argument 1: list (no fixed shape)\n",
    "- Argument 2: int (no fixed shape)\n",
    "- Argument 3: int (no fixed shape)\n",
    "- **Output Format**: \n",
    "- Output 1: int (no fixed shape)\n",
    "- **Components Used**: find_subsequence\n",
    "- **Plan**: \n",
    "Invoke the find_subsequence component with the given inputs (nums, k, limit).\n",
    "Return the result from find_subsequence as the final output.\n",
    "- **Test Case Advise**: \n",
    "- Include variations in input values like negative numbers, zeros, and large positive integers.\n",
    "- Test with k and limit values close to the limits to assess boundary conditions.\n",
    "- Include cases where the alternating sum is negative, positive, or zero.\n",
    "\n",
    "### Requirements:\n",
    "1. **Test Function Structure**:\n",
    "   - Each test function must accept **only the function under test** as its parameter (e.g., `def test_case(func):...`).\n",
    "   - Return `True` if the test passes, `False` otherwise. Do not use assertions, please return a boolean value.\n",
    "   - Include input generation, runtime checks, code inspection, or result validation within the function.\n",
    "\n",
    "2. **Test Types** (use one of these for indicating the test_type):\n",
    "   - `correctness`: Validate output against expected results for specific inputs.\n",
    "   - `edge_case`: Test inputs like empty lists, extreme values, or invalid data.\n",
    "   - `runtime`: Measure execution time (e.g., ensure it's below a threshold).\n",
    "   - `component_check`: Verify the function's code uses specified components (e.g., via string inspection).\n",
    "   - `error_handling`: Check if errors are raised for invalid inputs.\n",
    "\n",
    "3. **Test Case Diversity**:\n",
    "   - Cover all provided advisories.\n",
    "   - Include at least one test per advisory and one for each test type where applicable.\n",
    "\n",
    "### Output Format:\n",
    "For each test case, you need to firstly define the Test Types to indicate what type of test case you are going to create and then give the reasoning and explanation of the test case. After that, generate the test function based on the your reasoning.\n",
    "\n",
    "For each test function, return with following structure:\n",
    "\n",
    "<Type>\n",
    "Pick one of correctness|edge_case|runtime|component_check|error_handling\n",
    "</Type>\n",
    "<Planning>\n",
    "Introduce how would you design the test function. Specify the purpose of the test function and the reasoning behind it. Explain step by step why your test case is correct and what is the expected output.\n",
    "</Planning>\n",
    "<Code>\n",
    "def test_case(func):\n",
    "    # Your test function code here\n",
    "</Code>\n",
    "\n",
    "If you are going to create multiple test cases, please separate them with <separator> tag.\n",
    "\n",
    "\n",
    "Generate test cases that rigorously validate the function's behavior, code structure, and performance.\n",
    "You MUST strictly follow the output format and structure. The generated test functions MUST be runnable function that use another python function as its parameter.\"\"\"\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4a9d6706",
   "metadata": {},
   "outputs": [],
   "source": [
    "from LLM_call import LLMModel\n",
    "\n",
    "llm = LLMModel(api_key=None, model=\"gpt-3.5-turbo\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "8042266e",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "``` \n",
      "<Planning> \n",
      "Introduce how would you design the test function. Specify the purpose of the test function and the reasoning behind it. Explain step by step why your test case is correct and what is the expected output. \n",
      "</Planning> \n",
      "<Code> \n",
      "def test_case(func): \n",
      "    # Your test function code here \n",
      "</Code> \n",
      "``` \n",
      "\n",
      "If you are going to create multiple test cases, please separate them with `<separator>` tag. \n",
      "\n",
      "Generate test cases that rigorously validate the function's behavior, code structure, and performance. \n",
      "You MUST strictly follow the output format and structure. The generated test functions MUST be runnable functions that use another python function as its parameter.\n"
     ]
    }
   ],
   "source": [
    "output = await llm.LLM_response_async(test_prompt, model=\"gpt-3.5-turbo\")\n",
    "print(output)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "ed3a87b4",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<Type>\n",
      "correctness\n",
      "</Type>\n",
      "<Planning>\n",
      "Test the function with a simple input where a valid subsequence exists and the product is within the limit.\n",
      "</Planning>\n",
      "<Code>\n",
      "def test_case(func):\n",
      "    nums = [1, 2, 3]\n",
      "    k = 2\n",
      "    limit = 10\n",
      "    expected_output = 6\n",
      "    result = func(nums, k, limit)\n",
      "    return result == expected_output\n",
      "</Code>\n",
      "\n",
      "<separator>\n",
      "\n",
      "<Type>\n",
      "correctness\n",
      "</Type>\n",
      "<Planning>\n",
      "Test the function with an input where no valid subsequence exists.\n",
      "</Planning>\n",
      "<Code>\n",
      "def test_case(func):\n",
      "    nums = [0, 2, 3]\n",
      "    k = -5\n",
      "    limit = 12\n",
      "    expected_output = -1\n",
      "    result = func(nums, k, limit)\n",
      "    return result == expected_output\n",
      "</Code>\n",
      "\n",
      "<separator>\n",
      "\n",
      "<Type>\n",
      "correctness\n",
      "</Type>\n",
      "<Planning>\n",
      "Test the function with an input where multiple valid subsequences exist.\n",
      "</Planning>\n",
      "<Code>\n",
      "def test_case(func):\n",
      "    nums = [2, 2, 3, 3]\n",
      "    k = 0\n",
      "    limit = 9\n",
      "    expected_output = 9\n",
      "    result = func(nums, k, limit)\n",
      "    return result == expected_output\n",
      "</Code>\n"
     ]
    }
   ],
   "source": [
    "output = llm.LLM_response(test_prompt, model=\"gpt-3.5-turbo\")\n",
    "print(output)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "42127fc5",
   "metadata": {},
   "source": [
    "### extraction test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "5ca4dc70",
   "metadata": {},
   "outputs": [],
   "source": [
    "text_temp = \"\"\"<Type>\n",
    "correctness\n",
    "</Type>\n",
    "<Planning>\n",
    "This test case will cover the scenario where a subsequence with an alternating sum equal to k exists and the product does not exceed the limit.\n",
    "\n",
    "I will create a test case with nums = [1, 2, 3], k = 2, limit = 10.\n",
    "The expected output is 6 because the subsequence [1, 2, 3] has an alternating sum of 2 and the product of these numbers is 6, which is within the limit.\n",
    "</Planning>\n",
    "<Code>\n",
    "def test_case(func):\n",
    "    nums = [1, 2, 3]\n",
    "    k = 2\n",
    "    limit = 10\n",
    "    expected_output = 6\n",
    "\n",
    "    result = func(nums, k, limit)\n",
    "    \n",
    "    return result == expected_output\n",
    "</Code>\n",
    "\n",
    "<separator>\n",
    "\n",
    "<Type>\n",
    "edge_case\n",
    "</Type>\n",
    "<Planning>\n",
    "This test case will cover the scenario where no subsequence with an alternating sum equal to k exists.\n",
    "\n",
    "I will create a test case with nums = [0, 2, 3], k = -5, limit = 12.\n",
    "The expected output is -1 because there is no subsequence with an alternating sum of -5.\n",
    "</Planning>\n",
    "<Code>\n",
    "def test_case(func):\n",
    "    nums = [0, 2, 3]\n",
    "    k = -5\n",
    "    limit = 12\n",
    "    expected_output = -1\n",
    "\n",
    "    result = func(nums, k, limit)\n",
    "    \n",
    "    return result == expected_output\n",
    "</Code>\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "04a4ed0c",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'test_case_1': {'test_type': 'correctness', 'purpose': 'This test case will cover the scenario where a subsequence with an alternating sum equal to k exists and the product does not exceed the limit.\\n\\nI will create a test case with nums = [1, 2, 3], k = 2, limit = 10.\\nThe expected output is 6 because the subsequence [1, 2, 3] has an alternating sum of 2 and the product of these numbers is 6, which is within the limit.', 'test_function': 'def test_case(func):\\n    nums = [1, 2, 3]\\n    k = 2\\n    limit = 10\\n    expected_output = 6\\n\\n    result = func(nums, k, limit)\\n    \\n    return result == expected_output'}, 'test_case_2': {'test_type': 'edge_case', 'purpose': 'This test case will cover the scenario where no subsequence with an alternating sum equal to k exists.\\n\\nI will create a test case with nums = [0, 2, 3], k = -5, limit = 12.\\nThe expected output is -1 because there is no subsequence with an alternating sum of -5.', 'test_function': 'def test_case(func):\\n    nums = [0, 2, 3]\\n    k = -5\\n    limit = 12\\n    expected_output = -1\\n\\n    result = func(nums, k, limit)\\n    \\n    return result == expected_output'}}\n"
     ]
    }
   ],
   "source": [
    "print(extracted_cases)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "bb401448",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "'<Think>\n",
      "The last code failed to pass the test function because the calculate_xor_pairs_sum function is not calculating the sum of all pairs of numbers correctly. It is only calculating the xor of pairs within the first k elements of the list. To fix this, we need to iterate over all pairs of numbers in the given list and calculate the xor sum accordingly.\n",
      "</Think>\n",
      "\n",
      "<Code>\n",
      "```python\n",
      "def pair_xor_Sum(nums, k):\n",
      "    # Function to calculate the sum of xor of all pairs of numbers in the given list\n",
      "    def calculate_xor_pairs_sum(nums):\n",
      "        sum_xor_pairs = 0\n",
      "        # Iterate through all pairs of numbers in the list\n",
      "        for i in range(len(nums)):\n",
      "            for j in range(i+1, len(nums)):\n",
      "                # Calculate the xor of each pair and add it to sum_xor_pairs\n",
      "                sum_xor_pairs += nums[i] ^ nums[j]\n",
      "        return sum_xor_pairs\n",
      "    \n",
      "    return calculate_xor_pairs_sum(nums)\n",
      "```\n",
      "</Code>\n",
      "\n",
      "<Planning>\n",
      "1. Define a function `pair_xor_Sum` that takes a list of numbers `nums` and an integer `k` as input.\n",
      "2. Define a nested function `calculate_xor_pairs_sum` within `pair_xor_Sum` to calculate the sum of xor of all pairs of numbers in the given list.\n",
      "3. Initialize `sum_xor_pairs` to 0.\n",
      "4. Iterate through all pairs of numbers in the `nums` list using nested loops.\n",
      "5. Calculate the xor of each pair of numbers and add it to `sum_xor_pairs`.\n",
      "6. Return the final `sum_xor_pairs`.\n",
      "7. Return the result from `calculate_xor_pairs_sum` as the final output.\n",
      "</Planning>\n",
      "\n",
      "<Main Function Name>\n",
      "pair_xor_Sum\n",
      "</Main Function Name>'\n"
     ]
    }
   ],
   "source": [
    "temp_text = \"\"\"'<Think>\\nThe last code failed to pass the test function because the calculate_xor_pairs_sum function is not calculating the sum of all pairs of numbers correctly. It is only calculating the xor of pairs within the first k elements of the list. To fix this, we need to iterate over all pairs of numbers in the given list and calculate the xor sum accordingly.\\n</Think>\\n\\n<Code>\\n```python\\ndef pair_xor_Sum(nums, k):\\n    # Function to calculate the sum of xor of all pairs of numbers in the given list\\n    def calculate_xor_pairs_sum(nums):\\n        sum_xor_pairs = 0\\n        # Iterate through all pairs of numbers in the list\\n        for i in range(len(nums)):\\n            for j in range(i+1, len(nums)):\\n                # Calculate the xor of each pair and add it to sum_xor_pairs\\n                sum_xor_pairs += nums[i] ^ nums[j]\\n        return sum_xor_pairs\\n    \\n    return calculate_xor_pairs_sum(nums)\\n```\\n</Code>\\n\\n<Planning>\\n1. Define a function `pair_xor_Sum` that takes a list of numbers `nums` and an integer `k` as input.\\n2. Define a nested function `calculate_xor_pairs_sum` within `pair_xor_Sum` to calculate the sum of xor of all pairs of numbers in the given list.\\n3. Initialize `sum_xor_pairs` to 0.\\n4. Iterate through all pairs of numbers in the `nums` list using nested loops.\\n5. Calculate the xor of each pair of numbers and add it to `sum_xor_pairs`.\\n6. Return the final `sum_xor_pairs`.\\n7. Return the result from `calculate_xor_pairs_sum` as the final output.\\n</Planning>\\n\\n<Main Function Name>\\npair_xor_Sum\\n</Main Function Name>'\"\"\"\n",
    "print(temp_text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1f5ebb86",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "True\n"
     ]
    }
   ],
   "source": [
    "def extract_string(input_list, specified_size):\n",
    "    # Initialize an empty list to store extracted strings\n",
    "    result = []\n",
    "\n",
    "    # Iterate through each string in the input list\n",
    "    for string in input_list:\n",
    "        # Check if the length of the current string is less than the specified size\n",
    "        if len(string) == specified_size:\n",
    "            result.append(string)\n",
    "        elif len(string) < specified_size:\n",
    "            result.append(string)\n",
    "\n",
    "    # Return the list of extracted strings\n",
    "    return result\n",
    "\n",
    "print(tuple_intersection([(3, 4), (5, 6), (9, 10), (4, 5)] , [(5, 4), (3, 4), (6, 5), (9, 11)]) == {(4, 5), (3, 4), (5, 6)})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5b062dbe",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'code': 'import pandas as pd\\nimport numpy as np\\nfrom sklearn.model_selection import train_test_split\\nfrom sklearn.preprocessing import StandardScaler, OneHotEncoder\\nfrom sklearn.impute import SimpleImputer\\nimport torch\\nimport torch.nn as nn\\nimport torch.optim as optim\\n\\ndef load_data(train_file_path, test_file_path):\\n    try:\\n        train_df = pd.read_excel(train_file_path)\\n        test_df = pd.read_excel(test_file_path)\\n        return train_df, test_df\\n    except FileNotFoundError as fnf_error:\\n        raise FileNotFoundError(f\"File not found: {fnf_error}\")\\n    except Exception as e:\\n        raise Exception(f\"An error occurred while loading data: {e}\")\\n\\ndef preprocess_data(df):\\n    num_imputer = SimpleImputer(strategy=\\'mean\\')\\n    num_cols = df.select_dtypes(include=[np.number]).columns\\n    df[num_cols] = num_imputer.fit_transform(df[num_cols])\\n    \\n    enc = OneHotEncoder(sparse=False)\\n    if \\'主波向\\' in df.columns:\\n        encoded_features = enc.fit_transform(df[[\\'主波向\\']])\\n        encoder_df = pd.DataFrame(encoded_features, columns=enc.get_feature_names_out([\\'主波向\\']))\\n        df = pd.concat([df, encoder_df], axis=1).drop(\\'主波向\\', axis=1)\\n\\n    scaler_x = StandardScaler()\\n    scaler_y = StandardScaler()\\n    df[\\'x\\'] = scaler_x.fit_transform(df[[\\'x\\']])\\n    df[\\'y\\'] = scaler_y.fit_transform(df[[\\'y\\']])\\n    \\n    return df, scaler_x, scaler_y, {\\'主波向\\': enc}\\n\\ndef select_features(df):\\n    X = df.drop(\\'y\\', axis=1).values\\n    y = df[\\'y\\'].values\\n    return X, y\\n\\nclass BeachProfileNN(nn.Module):\\n    def __init__(self, input_dim, output_dim):\\n        super(BeachProfileNN, self).__init__()\\n        self.fc1 = nn.Linear(input_dim, 64)\\n        self.fc2 = nn.Linear(64, 32)\\n        self.fc3 = nn.Linear(32, output_dim)\\n        self.relu = nn.ReLU()\\n\\n    def forward(self, x):\\n        x = self.relu(self.fc1(x))\\n        x = self.relu(self.fc2(x))\\n        x = self.fc3(x)\\n        return x\\n\\ndef build_neural_network(input_dim, output_dim):\\n    model = BeachProfileNN(input_dim, output_dim)\\n    return model\\n\\ndef train_evaluate_model(model, X_train, y_train, X_test, y_test):\\n    X_train_tensor = torch.tensor(X_train, dtype=torch.float32)\\n    y_train_tensor = torch.tensor(y_train, dtype=torch.float32)\\n    X_test_tensor = torch.tensor(X_test, dtype=torch.float32)\\n    y_test_tensor = torch.tensor(y_test, dtype=torch.float32)\\n\\n    criterion = nn.MSELoss()\\n    optimizer = optim.Adam(model.parameters(), lr=0.01)\\n    for epoch in range(100):\\n        model.train()\\n        optimizer.zero_grad()\\n        outputs = model(X_train_tensor)\\n        loss = criterion(outputs.view(-1), y_train_tensor)\\n        loss.backward()\\n        optimizer.step()\\n\\n    model.eval()\\n    with torch.no_grad():\\n        predictions = model(X_test_tensor).view(-1)\\n        mse = criterion(predictions, y_test_tensor).item()\\n\\n    return mse\\n\\ndef evaluate_framework_performance(train_file_path, test_file_path):\\n    train_df, test_df = load_data(train_file_path, test_file_path)\\n    train_sampled_df = train_df.sample(frac=0.2, random_state=42)\\n    test_sampled_df = test_df.sample(frac=0.2, random_state=42)\\n\\n    train_processed_df, scaler_x, scaler_y, encoders = preprocess_data(train_sampled_df)\\n    test_processed_df, _, _, _ = preprocess_data(test_sampled_df)\\n\\n    X_train_sampled, y_train_sampled = select_features(train_processed_df)\\n    X_test_sampled, y_test_sampled = select_features(test_processed_df)\\n\\n    input_dim = X_train_sampled.shape[1]\\n    model = build_neural_network(input_dim, 1)\\n\\n    mse_sampled = train_evaluate_model(model, X_train_sampled, y_train_sampled, X_test_sampled, y_test_sampled)\\n    \\n    return mse_sampled\\n\\ndef predict_y(model, input_x, other_profile_features, scaler_x, scaler_y, feature_encoders_dict, device=\\'cpu\\'):\\n    input_x_scaled = scaler_x.transform(input_x)\\n    all_features_scaled = np.concatenate([input_x_scaled, np.array(other_profile_features)], axis=1)\\n\\n    input_tensor = torch.tensor(all_features_scaled, dtype=torch.float32).to(device)\\n\\n    model.to(device)\\n    model.eval()\\n    with torch.no_grad():\\n        pred_scaled = model(input_tensor).cpu().numpy()\\n\\n    predictions = scaler_y.inverse_transform(pred_scaled)\\n    return predictions', 'plan': \"1. **Load Data:** This function reads Excel files containing the training and test datasets. It handles exceptions to manage errors such as missing files, thus preventing the script from failing unexpectedly.\\n2. **Preprocess Data:** This function includes steps to fill missing numerical values with mean values, one-hot encode the '主波向' categorical field, and scale any numerical fields ('x' and 'y') necessary for neural network training.\\n3. **Select Features:** This extracts input features and target variable 'y' from the preprocessed data for model training.\\n4. **Build Neural Network:** This component defines a simple feedforward neural network suitable for regression to model the relationship between features and target variable.\\n5. **Train and Evaluate Model:** The neural network is trained using the preprocessed feature set and validation set, and its performance is quantified based on the mean squared error (MSE).\\n6. **Evaluate Framework Performance:** This orchestrates the evaluation, using sampled data to assess framework performance quickly.\\n7. **Predict 'y':** Utilizes the trained model to make predictions for new inputs by scaling them, applying the model, and inverse-scaling the outputs to original values.\", 'main_function_name': 'evaluate_framework_performance', 'think': 'The code from the last generation failed to pass the test function because it did not handle file-related errors, specifically the FileNotFoundError, which is required in the test case. The code inherently fails due to a TypeError as it attempts to access non-existent data paths and does not provide any catch block for handling such exceptions.\\n\\nTo fix this, the code can be improved by adding error handling to the `load_data` function. This function should have a try-except block that specifically catches FileNotFoundError and possibly other related exceptions, ensuring that the function behaves as expected when provided with invalid file paths.'}\n",
      "['code', 'plan', 'main_function_name', 'think']\n"
     ]
    }
   ],
   "source": [
    "text_temp = \"\"\"<Think>\n",
    "The code from the last generation failed to pass the test function because it did not handle file-related errors, specifically the FileNotFoundError, which is required in the test case. The code inherently fails due to a TypeError as it attempts to access non-existent data paths and does not provide any catch block for handling such exceptions.\n",
    "\n",
    "To fix this, the code can be improved by adding error handling to the `load_data` function. This function should have a try-except block that specifically catches FileNotFoundError and possibly other related exceptions, ensuring that the function behaves as expected when provided with invalid file paths.\n",
    "</Think>\n",
    "\n",
    "<Code>\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.preprocessing import StandardScaler, OneHotEncoder\n",
    "from sklearn.impute import SimpleImputer\n",
    "import torch\n",
    "import torch.nn as nn\n",
    "import torch.optim as optim\n",
    "\n",
    "def load_data(train_file_path, test_file_path):\n",
    "    try:\n",
    "        train_df = pd.read_excel(train_file_path)\n",
    "        test_df = pd.read_excel(test_file_path)\n",
    "        return train_df, test_df\n",
    "    except FileNotFoundError as fnf_error:\n",
    "        raise FileNotFoundError(f\"File not found: {fnf_error}\")\n",
    "    except Exception as e:\n",
    "        raise Exception(f\"An error occurred while loading data: {e}\")\n",
    "\n",
    "def preprocess_data(df):\n",
    "    num_imputer = SimpleImputer(strategy='mean')\n",
    "    num_cols = df.select_dtypes(include=[np.number]).columns\n",
    "    df[num_cols] = num_imputer.fit_transform(df[num_cols])\n",
    "    \n",
    "    enc = OneHotEncoder(sparse=False)\n",
    "    if '主波向' in df.columns:\n",
    "        encoded_features = enc.fit_transform(df[['主波向']])\n",
    "        encoder_df = pd.DataFrame(encoded_features, columns=enc.get_feature_names_out(['主波向']))\n",
    "        df = pd.concat([df, encoder_df], axis=1).drop('主波向', axis=1)\n",
    "\n",
    "    scaler_x = StandardScaler()\n",
    "    scaler_y = StandardScaler()\n",
    "    df['x'] = scaler_x.fit_transform(df[['x']])\n",
    "    df['y'] = scaler_y.fit_transform(df[['y']])\n",
    "    \n",
    "    return df, scaler_x, scaler_y, {'主波向': enc}\n",
    "\n",
    "def select_features(df):\n",
    "    X = df.drop('y', axis=1).values\n",
    "    y = df['y'].values\n",
    "    return X, y\n",
    "\n",
    "class BeachProfileNN(nn.Module):\n",
    "    def __init__(self, input_dim, output_dim):\n",
    "        super(BeachProfileNN, self).__init__()\n",
    "        self.fc1 = nn.Linear(input_dim, 64)\n",
    "        self.fc2 = nn.Linear(64, 32)\n",
    "        self.fc3 = nn.Linear(32, output_dim)\n",
    "        self.relu = nn.ReLU()\n",
    "\n",
    "    def forward(self, x):\n",
    "        x = self.relu(self.fc1(x))\n",
    "        x = self.relu(self.fc2(x))\n",
    "        x = self.fc3(x)\n",
    "        return x\n",
    "\n",
    "def build_neural_network(input_dim, output_dim):\n",
    "    model = BeachProfileNN(input_dim, output_dim)\n",
    "    return model\n",
    "\n",
    "def train_evaluate_model(model, X_train, y_train, X_test, y_test):\n",
    "    X_train_tensor = torch.tensor(X_train, dtype=torch.float32)\n",
    "    y_train_tensor = torch.tensor(y_train, dtype=torch.float32)\n",
    "    X_test_tensor = torch.tensor(X_test, dtype=torch.float32)\n",
    "    y_test_tensor = torch.tensor(y_test, dtype=torch.float32)\n",
    "\n",
    "    criterion = nn.MSELoss()\n",
    "    optimizer = optim.Adam(model.parameters(), lr=0.01)\n",
    "    for epoch in range(100):\n",
    "        model.train()\n",
    "        optimizer.zero_grad()\n",
    "        outputs = model(X_train_tensor)\n",
    "        loss = criterion(outputs.view(-1), y_train_tensor)\n",
    "        loss.backward()\n",
    "        optimizer.step()\n",
    "\n",
    "    model.eval()\n",
    "    with torch.no_grad():\n",
    "        predictions = model(X_test_tensor).view(-1)\n",
    "        mse = criterion(predictions, y_test_tensor).item()\n",
    "\n",
    "    return mse\n",
    "\n",
    "def evaluate_framework_performance(train_file_path, test_file_path):\n",
    "    train_df, test_df = load_data(train_file_path, test_file_path)\n",
    "    train_sampled_df = train_df.sample(frac=0.2, random_state=42)\n",
    "    test_sampled_df = test_df.sample(frac=0.2, random_state=42)\n",
    "\n",
    "    train_processed_df, scaler_x, scaler_y, encoders = preprocess_data(train_sampled_df)\n",
    "    test_processed_df, _, _, _ = preprocess_data(test_sampled_df)\n",
    "\n",
    "    X_train_sampled, y_train_sampled = select_features(train_processed_df)\n",
    "    X_test_sampled, y_test_sampled = select_features(test_processed_df)\n",
    "\n",
    "    input_dim = X_train_sampled.shape[1]\n",
    "    model = build_neural_network(input_dim, 1)\n",
    "\n",
    "    mse_sampled = train_evaluate_model(model, X_train_sampled, y_train_sampled, X_test_sampled, y_test_sampled)\n",
    "    \n",
    "    return mse_sampled\n",
    "\n",
    "def predict_y(model, input_x, other_profile_features, scaler_x, scaler_y, feature_encoders_dict, device='cpu'):\n",
    "    input_x_scaled = scaler_x.transform(input_x)\n",
    "    all_features_scaled = np.concatenate([input_x_scaled, np.array(other_profile_features)], axis=1)\n",
    "\n",
    "    input_tensor = torch.tensor(all_features_scaled, dtype=torch.float32).to(device)\n",
    "\n",
    "    model.to(device)\n",
    "    model.eval()\n",
    "    with torch.no_grad():\n",
    "        pred_scaled = model(input_tensor).cpu().numpy()\n",
    "\n",
    "    predictions = scaler_y.inverse_transform(pred_scaled)\n",
    "    return predictions\n",
    "</Code>\n",
    "\n",
    "<Planning>\n",
    "1. **Load Data:** This function reads Excel files containing the training and test datasets. It handles exceptions to manage errors such as missing files, thus preventing the script from failing unexpectedly.\n",
    "2. **Preprocess Data:** This function includes steps to fill missing numerical values with mean values, one-hot encode the '主波向' categorical field, and scale any numerical fields ('x' and 'y') necessary for neural network training.\n",
    "3. **Select Features:** This extracts input features and target variable 'y' from the preprocessed data for model training.\n",
    "4. **Build Neural Network:** This component defines a simple feedforward neural network suitable for regression to model the relationship between features and target variable.\n",
    "5. **Train and Evaluate Model:** The neural network is trained using the preprocessed feature set and validation set, and its performance is quantified based on the mean squared error (MSE).\n",
    "6. **Evaluate Framework Performance:** This orchestrates the evaluation, using sampled data to assess framework performance quickly.\n",
    "7. **Predict 'y':** Utilizes the trained model to make predictions for new inputs by scaling them, applying the model, and inverse-scaling the outputs to original values.\n",
    "</Planning>\n",
    "\n",
    "<Main Function Name>\n",
    "evaluate_framework_performance\n",
    "</Main Function Name>\"\"\"\n",
    "\n",
    "def extract_code_test(llm_output):\n",
    "    \"\"\"支持双标签和单标签的代码提取，优先处理闭合标签\"\"\"\n",
    "    import re\n",
    "    result = {\"code\": None, \"plan\": None, \"main_function_name\": None}\n",
    "\n",
    "    llm_output = llm_output.replace(\"\\\\n\", \"\\n\")\n",
    "\n",
    "    def preprocess(text):\n",
    "        \"\"\"保护代码块内的换行符\"\"\"\n",
    "        placeholder = \"###NL###\"\n",
    "        protected = re.sub(\n",
    "            r'(```python.*?```)', \n",
    "            lambda m: m.group(0).replace(\"\\n\", placeholder),\n",
    "            text,\n",
    "            flags=re.IGNORECASE | re.DOTALL\n",
    "        )\n",
    "        return protected, placeholder\n",
    "\n",
    "    modified_text, placeholder = preprocess(llm_output)\n",
    "\n",
    "    def extract_paired_tag(tag, text):\n",
    "        \"\"\"提取闭合标签内容，例如<tag>content</tag>\"\"\"\n",
    "        pattern = re.compile(\n",
    "            r'<\\s*{tag}\\s*>(.*?)<\\s*/\\s*{tag}\\s*>'.format(tag=tag),\n",
    "            re.IGNORECASE | re.DOTALL\n",
    "        )\n",
    "        match = pattern.search(text)\n",
    "        return match.group(1).replace(placeholder, \"\\n\").strip() if match else None\n",
    "\n",
    "    def extract_single_tag(tag, text):\n",
    "        \"\"\"提取单标签后的内容，例如<tag>content...\"\"\"\n",
    "        pattern = re.compile(\n",
    "            r'<\\s*{tag}\\s*>(?!</)(.*?)(?=(<\\s*\\w|```|$))'.format(tag=tag),\n",
    "            re.IGNORECASE | re.DOTALL\n",
    "        )\n",
    "        match = pattern.search(text)\n",
    "        return match.group(1).replace(placeholder, \"\\n\").strip() if match else None\n",
    "\n",
    "    # 代码提取逻辑（优先级：闭合标签 > 代码块 > 函数定义）\n",
    "    result[\"think\"] = extract_paired_tag('think', modified_text)\n",
    "\n",
    "    code_string = extract_paired_tag('code', modified_text)\n",
    "    if not code_string:\n",
    "        code_blocks = re.findall(r'```python\\s*(.*?)\\s*```', modified_text, re.DOTALL)\n",
    "        if code_blocks:\n",
    "            result[\"code\"] = code_blocks[0].replace(placeholder, \"\\n\").strip()\n",
    "        else:\n",
    "            # 提取所有函数定义作为后备\n",
    "            functions = re.findall(r'(def\\s+.+?:\\n(?:\\s*.+\\n)+)', modified_text, re.DOTALL)\n",
    "            if functions:\n",
    "                result[\"code\"] = '\\n\\n'.join([f.replace(placeholder, \"\\n\").strip() for f in functions])\n",
    "    else:\n",
    "        if \"```python\" in code_string:\n",
    "            code_blocks = re.findall(r'```python\\s*(.*?)\\s*```', modified_text, re.DOTALL)\n",
    "            if code_blocks:\n",
    "                result[\"code\"] = code_blocks[0].replace(placeholder, \"\\n\").strip()\n",
    "            else:\n",
    "                result[\"code\"] = code_string.replace(\"```python\", \"\").strip()\n",
    "        else:\n",
    "            result[\"code\"] = code_string\n",
    "\n",
    "    # 计划提取（双标签优先）\n",
    "    result[\"plan\"] = extract_paired_tag('planning', modified_text) or \\\n",
    "                    extract_paired_tag('reasoning', modified_text)\n",
    "    if not result[\"plan\"]:  # 单标签后备\n",
    "        result[\"plan\"] = extract_single_tag('planning', modified_text) or \\\n",
    "                        extract_single_tag('reasoning', modified_text)\n",
    "\n",
    "    # 主函数名提取（双标签优先）\n",
    "    main_func = extract_paired_tag('main\\s*function\\s*name', modified_text)\n",
    "    if main_func:\n",
    "        result[\"main_function_name\"] = re.findall(r'\\b\\w+\\b', main_func)[-1]\n",
    "    else:  # 单标签后备\n",
    "        single_tag_content = extract_single_tag('main\\s*function\\s*name', modified_text)\n",
    "        if single_tag_content:\n",
    "            candidates = re.findall(r'\\b([a-zA-Z_]\\w*)\\s*\\(?', single_tag_content)\n",
    "            result[\"main_function_name\"] = candidates[-1] if candidates else None\n",
    "\n",
    "    return result\n",
    "\n",
    "result_temp = extract_code_test(text_temp)\n",
    "print(result_temp)\n",
    "print(list(result_temp.keys()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "bb6189be",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "import pandas as pd\n",
      "import numpy as np\n",
      "from sklearn.model_selection import train_test_split\n",
      "from sklearn.preprocessing import StandardScaler, OneHotEncoder\n",
      "from sklearn.impute import SimpleImputer\n",
      "import torch\n",
      "import torch.nn as nn\n",
      "import torch.optim as optim\n",
      "\n",
      "def load_data(train_file_path, test_file_path):\n",
      "    try:\n",
      "        train_df = pd.read_excel(train_file_path)\n",
      "        test_df = pd.read_excel(test_file_path)\n",
      "        return train_df, test_df\n",
      "    except FileNotFoundError as fnf_error:\n",
      "        raise FileNotFoundError(f\"File not found: {fnf_error}\")\n",
      "    except Exception as e:\n",
      "        raise Exception(f\"An error occurred while loading data: {e}\")\n",
      "\n",
      "def preprocess_data(df):\n",
      "    num_imputer = SimpleImputer(strategy='mean')\n",
      "    num_cols = df.select_dtypes(include=[np.number]).columns\n",
      "    df[num_cols] = num_imputer.fit_transform(df[num_cols])\n",
      "    \n",
      "    enc = OneHotEncoder(sparse=False)\n",
      "    if '主波向' in df.columns:\n",
      "        encoded_features = enc.fit_transform(df[['主波向']])\n",
      "        encoder_df = pd.DataFrame(encoded_features, columns=enc.get_feature_names_out(['主波向']))\n",
      "        df = pd.concat([df, encoder_df], axis=1).drop('主波向', axis=1)\n",
      "\n",
      "    scaler_x = StandardScaler()\n",
      "    scaler_y = StandardScaler()\n",
      "    df['x'] = scaler_x.fit_transform(df[['x']])\n",
      "    df['y'] = scaler_y.fit_transform(df[['y']])\n",
      "    \n",
      "    return df, scaler_x, scaler_y, {'主波向': enc}\n",
      "\n",
      "def select_features(df):\n",
      "    X = df.drop('y', axis=1).values\n",
      "    y = df['y'].values\n",
      "    return X, y\n",
      "\n",
      "class BeachProfileNN(nn.Module):\n",
      "    def __init__(self, input_dim, output_dim):\n",
      "        super(BeachProfileNN, self).__init__()\n",
      "        self.fc1 = nn.Linear(input_dim, 64)\n",
      "        self.fc2 = nn.Linear(64, 32)\n",
      "        self.fc3 = nn.Linear(32, output_dim)\n",
      "        self.relu = nn.ReLU()\n",
      "\n",
      "    def forward(self, x):\n",
      "        x = self.relu(self.fc1(x))\n",
      "        x = self.relu(self.fc2(x))\n",
      "        x = self.fc3(x)\n",
      "        return x\n",
      "\n",
      "def build_neural_network(input_dim, output_dim):\n",
      "    model = BeachProfileNN(input_dim, output_dim)\n",
      "    return model\n",
      "\n",
      "def train_evaluate_model(model, X_train, y_train, X_test, y_test):\n",
      "    X_train_tensor = torch.tensor(X_train, dtype=torch.float32)\n",
      "    y_train_tensor = torch.tensor(y_train, dtype=torch.float32)\n",
      "    X_test_tensor = torch.tensor(X_test, dtype=torch.float32)\n",
      "    y_test_tensor = torch.tensor(y_test, dtype=torch.float32)\n",
      "\n",
      "    criterion = nn.MSELoss()\n",
      "    optimizer = optim.Adam(model.parameters(), lr=0.01)\n",
      "    for epoch in range(100):\n",
      "        model.train()\n",
      "        optimizer.zero_grad()\n",
      "        outputs = model(X_train_tensor)\n",
      "        loss = criterion(outputs.view(-1), y_train_tensor)\n",
      "        loss.backward()\n",
      "        optimizer.step()\n",
      "\n",
      "    model.eval()\n",
      "    with torch.no_grad():\n",
      "        predictions = model(X_test_tensor).view(-1)\n",
      "        mse = criterion(predictions, y_test_tensor).item()\n",
      "\n",
      "    return mse\n",
      "\n",
      "def evaluate_framework_performance(train_file_path, test_file_path):\n",
      "    train_df, test_df = load_data(train_file_path, test_file_path)\n",
      "    train_sampled_df = train_df.sample(frac=0.2, random_state=42)\n",
      "    test_sampled_df = test_df.sample(frac=0.2, random_state=42)\n",
      "\n",
      "    train_processed_df, scaler_x, scaler_y, encoders = preprocess_data(train_sampled_df)\n",
      "    test_processed_df, _, _, _ = preprocess_data(test_sampled_df)\n",
      "\n",
      "    X_train_sampled, y_train_sampled = select_features(train_processed_df)\n",
      "    X_test_sampled, y_test_sampled = select_features(test_processed_df)\n",
      "\n",
      "    input_dim = X_train_sampled.shape[1]\n",
      "    model = build_neural_network(input_dim, 1)\n",
      "\n",
      "    mse_sampled = train_evaluate_model(model, X_train_sampled, y_train_sampled, X_test_sampled, y_test_sampled)\n",
      "    \n",
      "    return mse_sampled\n",
      "\n",
      "def predict_y(model, input_x, other_profile_features, scaler_x, scaler_y, feature_encoders_dict, device='cpu'):\n",
      "    input_x_scaled = scaler_x.transform(input_x)\n",
      "    all_features_scaled = np.concatenate([input_x_scaled, np.array(other_profile_features)], axis=1)\n",
      "\n",
      "    input_tensor = torch.tensor(all_features_scaled, dtype=torch.float32).to(device)\n",
      "\n",
      "    model.to(device)\n",
      "    model.eval()\n",
      "    with torch.no_grad():\n",
      "        pred_scaled = model(input_tensor).cpu().numpy()\n",
      "\n",
      "    predictions = scaler_y.inverse_transform(pred_scaled)\n",
      "    return predictions\n"
     ]
    }
   ],
   "source": [
    "print(result_temp[\"code\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "84344781",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "llm",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.19"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
