[
    {
        "question": "def below_threshold(l: list, t: int):\n    \"\"\"Return True if all numbers in the list l are below threshold t.\n    >>> below_threshold([1, 2, 4, 10], 100)\n    True\n    >>> below_threshold([1, 20, 4, 10], 5)\n    False\n    \"\"\"",
        "unit_test": "METADATA = {}\n\n\ndef check(candidate):\n    assert candidate([1, 2, 4, 10], 100)\n    assert not candidate([1, 20, 4, 10], 5)\n    assert candidate([1, 20, 4, 10], 21)\n    assert candidate([1, 20, 4, 10], 22)\n    assert candidate([1, 8, 4, 10], 11)\n    assert not candidate([1, 8, 4, 10], 10)",
        "ground_truth_code": "for e in l:\n        if e >= t:\n            return False\n    return True\n",
        "generation": "def below_threshold(l: list, t: int):\n    \"\"\"Return True if all numbers in the list l are below threshold t.\n    >>> below_threshold([1, 2, 4, 10], 100)\n    True\n    >>> below_threshold([1, 20, 4, 10], 5)\n    False\n    \"\"\"\n\n    return all(x < t for x in l)\n\n\ncheck(below_threshold)\n",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmpar8lw1yg.py\", line 28, in <module>\\n    check(below_threshold)\\nNameError: name \\'check\\' is not defined\\n'}",
        "data_source": "humaneval",
        "metadata": {
            "quality": "med",
            "llm_name": "vicuna-33b"
        }
    },
    {
        "question": "def will_it_fly(q,w):\n    '''\n    Write a function that returns True if the object q will fly, and False otherwise.\n    The object q will fly if it's balanced (it is a palindromic list) and the sum of its elements is less than or equal the maximum possible weight w.\n\n    Example:\n    will_it_fly([1, 2], 5) ➞ False \n    # 1+2 is less than the maximum possible weight, but it's unbalanced.\n\n    will_it_fly([3, 2, 3], 1) ➞ False\n    # it's balanced, but 3+2+3 is more than the maximum possible weight.\n\n    will_it_fly([3, 2, 3], 9) ➞ True\n    # 3+2+3 is less than the maximum possible weight, and it's balanced.\n\n    will_it_fly([3], 5) ➞ True\n    # 3 is less than the maximum possible weight, and it's balanced.\n    '''",
        "unit_test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate([3, 2, 3], 9) is True\n    assert candidate([1, 2], 5) is False\n    assert candidate([3], 5) is True\n    assert candidate([3, 2, 3], 1) is False\n\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate([1, 2, 3], 6) is False\n    assert candidate([5], 5) is True",
        "ground_truth_code": "if sum(q) > w:\n        return False\n\n    i, j = 0, len(q)-1\n    while i<j:\n        if q[i] != q[j]:\n            return False\n        i+=1\n        j-=1\n    return True\n",
        "generation": "import itertools\n\ndef will_it_fly(q, w):\n    # Check if the list is balanced\n    for i in range(len(q)):\n        if q[i] != q[-(i + 1)]:\n            return False\n\n    # Check if the sum of the elements is less than or equal to the maximum possible weight\n    max_weight = w - sum(q)\n    for i in range(len(q)):\n        for k in itertools.permutations(q[i + 1:], i + 1):\n            if sum(k) > max_weight:\n                return False\n\n    return True\n",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmpl1v3q09g.py\", line 47, in <module>\\n    check(will_it_fly)\\n  File \"/tmp/tmpl1v3q09g.py\", line 36, in check\\n    assert candidate([3, 2, 3], 9) is True\\nAssertionError\\n'}",
        "data_source": "humaneval",
        "metadata": {
            "quality": "med",
            "llm_name": "vicuna-33b"
        }
    },
    {
        "question": "def is_simple_power(x, n):\n    \"\"\"Your task is to write a function that returns true if a number x is a simple\n    power of n and false in other cases.\n    x is a simple power of n if n**int=x\n    For example:\n    is_simple_power(1, 4) => true\n    is_simple_power(2, 2) => true\n    is_simple_power(8, 2) => true\n    is_simple_power(3, 2) => false\n    is_simple_power(3, 1) => false\n    is_simple_power(5, 3) => false\n    \"\"\"",
        "unit_test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(16, 2)== True, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate(143214, 16)== False, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate(4, 2)==True, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate(9, 3)==True, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate(16, 4)==True, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate(24, 2)==False, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate(128, 4)==False, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate(12, 6)==False, \"This prints if this assert fails 1 (good for debugging!)\"\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate(1, 1)==True, \"This prints if this assert fails 2 (also good for debugging!)\"\n    assert candidate(1, 12)==True, \"This prints if this assert fails 2 (also good for debugging!)\"",
        "ground_truth_code": "if (n == 1): \n        return (x == 1) \n    power = 1\n    while (power < x): \n        power = power * n \n    return (power == x) \n",
        "generation": "import math\n\ndef is_simple_power(x, n):\n    \"\"\"\n    Your task is to write a function that returns true if a number x is a simple\n    power of n and false in other cases.\n    x is a simple power of n if n**int=x\n    For example:\n    is_simple_power(1, 4) => true\n    is_simple_power(2, 2) => true\n    is_simple_power(8, 2) => true\n    is_simple_power(3, 2) => false\n    is_simple_power(3, 1) => false\n    is_simple_power(5, 3) => false\n    \"\"\"\n\n    if x < 1 or n < 1:\n        return False\n\n    for i in range(1, int(math.log(x, n)) + 1):\n        if x % (n ** i) == 0:\n            return True\n\n    return False\n\ndef check(candidate):\n\n    # Check some simple cases\n    assert candidate(16, 2)== True, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate(143214, 16)== False, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate(4, 2)==True, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate(9, 3)==True, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate(16, 4)==True, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate(24, 2)==False, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate(128, 4)==False, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate(12, 6)==False, \"This prints if this assert fails 1 (good for debugging!)\"\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate(1, 1)==True, \"This prints if this assert fails 2 (also good for debugging!)\"\n    assert candidate(1, 12)==True, \"This prints if this assert fails 2 (also good for debugging!)\"\n\n# Test the function\nprint(is_simple_power(16, 2))  # Should print True\nprint(is_simple_power(143214, 16))  # Should print False\nprint(is_simple_power(4, 2))  # Should print True\nprint(is_simple_power(9, 3))  # Should print True\nprint(is_simple_power(16, 4))  # Should print True\nprint(is_simple_power(24, 2))  # Should print False\nprint(is_simple_power(128, 4))  # Should print False\nprint(is_simple_power(12, 6))  # Should print False\nprint(is_simple_power(1, 1))  # Should print True\nprint(is_simple_power(1, 12))  # Should print True\n",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmp6i_h99go.py\", line 67, in <module>\\n    print(is_simple_power(1, 1))  # Should print True\\n  File \"/tmp/tmp6i_h99go.py\", line 36, in is_simple_power\\n    for i in range(1, int(math.log(x, n)) + 1):\\nZeroDivisionError: float division by zero\\n'}",
        "data_source": "humaneval",
        "metadata": {
            "quality": "high",
            "llm_name": "vicuna-33b"
        }
    },
    {
        "question": "Write a function to find t-nth term of geometric series.",
        "unit_test": "assert tn_gp(1,5,2)==16\nassert tn_gp(1,5,4)==256\nassert tn_gp(2,6,3)==486",
        "ground_truth_code": "import math\r\ndef tn_gp(a,n,r):\r\n  tn = a * (math.pow(r, n - 1))\r\n  return tn",
        "generation": "def tn_gp(a, r, n):\n    if n == 0:\n        return 0\n    else:\n        return (a * (1 - r**n)) / (1 - r)\n",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmp7snspkfs.py\", line 22, in <module>\\n    assert tn_gp(1,5,2)==16\\nAssertionError\\n'}",
        "data_source": "mbpp",
        "metadata": {
            "quality": "med",
            "llm_name": "vicuna-33b"
        }
    },
    {
        "question": "def fizz_buzz(n: int):\n    \"\"\"Return the number of times the digit 7 appears in integers less than n which are divisible by 11 or 13.\n    >>> fizz_buzz(50)\n    0\n    >>> fizz_buzz(78)\n    2\n    >>> fizz_buzz(79)\n    3\n    \"\"\"",
        "unit_test": "METADATA = {}\n\n\ndef check(candidate):\n    assert candidate(50) == 0\n    assert candidate(78) == 2\n    assert candidate(79) == 3\n    assert candidate(100) == 3\n    assert candidate(200) == 6\n    assert candidate(4000) == 192\n    assert candidate(10000) == 639\n    assert candidate(100000) == 8026",
        "ground_truth_code": "ns = []\n    for i in range(n):\n        if i % 11 == 0 or i % 13 == 0:\n            ns.append(i)\n    s = ''.join(list(map(str, ns)))\n    ans = 0\n    for c in s:\n        ans += (c == '7')\n    return ans\n",
        "generation": "def fizz_buzz(n: int):\n    \"\"\"Return the number of times the digit 7 appears in integers less than n which are divisible by 11 or 13.\n    >>> fizz_buzz(50)\n    0\n    >>> fizz_buzz(78)\n    2\n    >>> fizz_buzz(79)\n    3\n    \"\"\"\n\n    count = 0\n    for i in range(1, n):\n        if i % 11 == 0 or i % 13 == 0:\n            count += 1\n    return count\n",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmpps84iz0j.py\", line 48, in <module>\\n    check(fizz_buzz)\\n  File \"/tmp/tmpps84iz0j.py\", line 38, in check\\n    assert candidate(50) == 0\\nAssertionError\\n'}",
        "data_source": "humaneval",
        "metadata": {
            "quality": "high",
            "llm_name": "internlm-20b"
        }
    },
    {
        "question": "Write a function to convert the given binary number to its decimal equivalent.",
        "unit_test": "assert binary_to_decimal(100) == 4\nassert binary_to_decimal(1011) == 11\nassert binary_to_decimal(1101101) == 109",
        "ground_truth_code": "def binary_to_decimal(binary): \r\n    binary1 = binary \r\n    decimal, i, n = 0, 0, 0\r\n    while(binary != 0): \r\n        dec = binary % 10\r\n        decimal = decimal + dec * pow(2, i) \r\n        binary = binary//10\r\n        i += 1\r\n    return (decimal)",
        "generation": "def binary_to_decimal(binary):\n    decimal = 0\n    power = 0\n    for digit in binary[::-1]:\n        decimal += int(digit) * 2**power\n        power += 1\n    return decimal\n",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmpktciaaa7.py\", line 24, in <module>\\n    assert binary_to_decimal(100) == 4\\n  File \"/tmp/tmpktciaaa7.py\", line 20, in binary_to_decimal\\n    for digit in binary[::-1]:\\nTypeError: \\'int\\' object is not subscriptable\\n'}",
        "data_source": "mbpp",
        "metadata": {
            "quality": "high",
            "llm_name": "internlm-20b"
        }
    },
    {
        "question": "Write a python function to set all odd bits of a given number.",
        "unit_test": "assert odd_bit_set_number(10) == 15\nassert odd_bit_set_number(20) == 21\nassert odd_bit_set_number(30) == 31",
        "ground_truth_code": "def odd_bit_set_number(n):\r\n    count = 0;res = 0;temp = n\r\n    while temp > 0:\r\n        if count % 2 == 0:\r\n            res |= (1 << count)\r\n        count += 1\r\n        temp >>= 1\r\n    return (n | res)",
        "generation": "def odd_bit_set_number(number):\n    \"\"\"\n    Set all odd bits of a given number.\n\n    Args:\n        number: An integer.\n\n    Returns:\n        The integer with all odd bits set to 1.\n    \"\"\"\n    # Calculate the number of odd bits in the number\n    num_bits = len(bin(number)) - 2\n    # Set all odd bits to 1\n    return number | (1 << num_bits)\n",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmpw9u4zq82.py\", line 31, in <module>\\n    assert odd_bit_set_number(10) == 15\\nAssertionError\\n'}",
        "data_source": "mbpp",
        "metadata": {
            "quality": "high",
            "llm_name": "internlm-20b"
        }
    },
    {
        "question": "Write a function to zip the two given tuples.",
        "unit_test": "assert zip_tuples((7, 8, 4, 5, 9, 10),(1, 5, 6) ) == [(7, 1), (8, 5), (4, 6), (5, 1), (9, 5), (10, 6)]\nassert zip_tuples((8, 9, 5, 6, 10, 11),(2, 6, 7) ) == [(8, 2), (9, 6), (5, 7), (6, 2), (10, 6), (11, 7)]\nassert zip_tuples((9, 10, 6, 7, 11, 12),(3, 7, 8) ) == [(9, 3), (10, 7), (6, 8), (7, 3), (11, 7), (12, 8)]",
        "ground_truth_code": "def zip_tuples(test_tup1, test_tup2):\r\n  res = []\r\n  for i, j in enumerate(test_tup1):\r\n    res.append((j, test_tup2[i % len(test_tup2)])) \r\n  return (res) ",
        "generation": "def zip_tuples(test_tuples):\n    result = []\n    for t1, t2 in zip(test_tuples[0], test_tuples[1]):\n        result.append((t1, t2))\n    return result\n",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmpeftv10ct.py\", line 22, in <module>\\n    assert zip_tuples((7, 8, 4, 5, 9, 10),(1, 5, 6) ) == [(7, 1), (8, 5), (4, 6), (5, 1), (9, 5), (10, 6)]\\nTypeError: zip_tuples() takes 1 positional argument but 2 were given\\n'}",
        "data_source": "mbpp",
        "metadata": {
            "quality": "med",
            "llm_name": "internlm-20b"
        }
    },
    {
        "question": "Write a function to merge three dictionaries into a single expression.",
        "unit_test": "assert merge_dictionaries_three({ \"R\": \"Red\", \"B\": \"Black\", \"P\": \"Pink\" }, { \"G\": \"Green\", \"W\": \"White\" },{ \"O\": \"Orange\", \"W\": \"White\", \"B\": \"Black\" })=={'B': 'Black', 'R': 'Red', 'P': 'Pink', 'G': 'Green', 'W': 'White', 'O': 'Orange'}\nassert merge_dictionaries_three({ \"R\": \"Red\", \"B\": \"Black\", \"P\": \"Pink\" }, { \"G\": \"Green\", \"W\": \"White\" },{\"L\":\"lavender\",\"B\":\"Blue\"})=={'W': 'White', 'P': 'Pink', 'B': 'Black', 'R': 'Red', 'G': 'Green', 'L': 'lavender'}\nassert merge_dictionaries_three({ \"R\": \"Red\", \"B\": \"Black\", \"P\": \"Pink\" },{\"L\":\"lavender\",\"B\":\"Blue\"},{ \"G\": \"Green\", \"W\": \"White\" })=={'B': 'Black', 'P': 'Pink', 'R': 'Red', 'G': 'Green', 'L': 'lavender', 'W': 'White'}",
        "ground_truth_code": "import collections as ct\r\ndef merge_dictionaries_three(dict1,dict2, dict3):\r\n    merged_dict = dict(ct.ChainMap({},dict1,dict2,dict3))\r\n    return merged_dict",
        "generation": "def merge_dictionaries_three(dict1, dict2, dict3):\n    merged_dict = dict1.copy()\n    merged_dict.update(dict2)\n    merged_dict.update(dict3)\n    return merged_dict\n",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmp_l7cimxg.py\", line 23, in <module>\\n    assert merge_dictionaries_three({ \"R\": \"Red\", \"B\": \"Black\", \"P\": \"Pink\" }, { \"G\": \"Green\", \"W\": \"White\" },{\"L\":\"lavender\",\"B\":\"Blue\"})=={\\'W\\': \\'White\\', \\'P\\': \\'Pink\\', \\'B\\': \\'Black\\', \\'R\\': \\'Red\\', \\'G\\': \\'Green\\', \\'L\\': \\'lavender\\'}\\nAssertionError\\n'}",
        "data_source": "mbpp",
        "metadata": {
            "quality": "high",
            "llm_name": "internlm-20b"
        }
    },
    {
        "question": "from typing import List\n\n\ndef separate_paren_groups(paren_string: str) -> List[str]:\n    \"\"\" Input to this function is a string containing multiple groups of nested parentheses. Your goal is to\n    separate those group into separate strings and return the list of those.\n    Separate groups are balanced (each open brace is properly closed) and not nested within each other\n    Ignore any spaces in the input string.\n    >>> separate_paren_groups('( ) (( )) (( )( ))')\n    ['()', '(())', '(()())']\n    \"\"\"",
        "unit_test": "METADATA = {\n    'author': 'jt',\n    'dataset': 'test'\n}\n\n\ndef check(candidate):\n    assert candidate('(()()) ((())) () ((())()())') == [\n        '(()())', '((()))', '()', '((())()())'\n    ]\n    assert candidate('() (()) ((())) (((())))') == [\n        '()', '(())', '((()))', '(((())))'\n    ]\n    assert candidate('(()(())((())))') == [\n        '(()(())((())))'\n    ]\n    assert candidate('( ) (( )) (( )( ))') == ['()', '(())', '(()())']",
        "ground_truth_code": "result = []\n    current_string = []\n    current_depth = 0\n\n    for c in paren_string:\n        if c == '(':\n            current_depth += 1\n            current_string.append(c)\n        elif c == ')':\n            current_depth -= 1\n            current_string.append(c)\n\n            if current_depth == 0:\n                result.append(''.join(current_string))\n                current_string.clear()\n\n    return result\n",
        "generation": "from typing import List\n\ndef separate_paren_groups(paren_string: str) -> List[str]:\n    \"\"\"\n    Input to this function is a string containing multiple groups of nested parentheses. Your goal is to\n    separate those group into separate strings and return the list of those.\n    Separate groups are balanced (each open brace is properly closed) and not nested within each other\n    Ignore any spaces in the input string.\n    >>> separate_paren_groups('( ) (( )) (( )( ))')\n    ['()', '(())', '(()())']\n    \"\"\"\n    # Add your code here\n    # HINT: You can use stack data structure to solve this problem\n    # Stack is a good choice because it can easily detect if an opening parenthesis is balanced with a closing one\n    grouped_strings = []\n    paren_stack = []\n\n    for char in paren_string:\n        if char == '(':\n            paren_stack.append(char)\n        elif char == ')':\n            while paren_stack and paren_stack[-1] != '(':\n                paren_stack.pop()\n            if paren_stack:\n                paren_stack.pop()\n            else:\n                break\n\n    while paren_stack:\n        grouped_strings.append(''.join(paren_stack))\n        paren_stack.pop()\n\n    return grouped_strings\n",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmpb2s_t735.py\", line 70, in <module>\\n    check(separate_paren_groups)\\n  File \"/tmp/tmpb2s_t735.py\", line 59, in check\\n    assert candidate(\\'(()()) ((())) () ((())()())\\') == [\\nAssertionError\\n'}",
        "data_source": "humaneval",
        "metadata": {
            "quality": "med",
            "llm_name": "baichuan2-13b"
        }
    },
    {
        "question": "def sort_third(l: list):\n    \"\"\"This function takes a list l and returns a list l' such that\n    l' is identical to l in the indicies that are not divisible by three, while its values at the indicies that are divisible by three are equal\n    to the values of the corresponding indicies of l, but sorted.\n    >>> sort_third([1, 2, 3])\n    [1, 2, 3]\n    >>> sort_third([5, 6, 3, 4, 8, 9, 2])\n    [2, 6, 3, 4, 8, 9, 5]\n    \"\"\"",
        "unit_test": "METADATA = {}\n\n\ndef check(candidate):\n    assert tuple(candidate([1, 2, 3])) == tuple(sort_third([1, 2, 3]))\n    assert tuple(candidate([5, 3, -5, 2, -3, 3, 9, 0, 123, 1, -10])) == tuple(sort_third([5, 3, -5, 2, -3, 3, 9, 0, 123, 1, -10]))\n    assert tuple(candidate([5, 8, -12, 4, 23, 2, 3, 11, 12, -10])) == tuple(sort_third([5, 8, -12, 4, 23, 2, 3, 11, 12, -10]))\n    assert tuple(candidate([5, 6, 3, 4, 8, 9, 2])) == tuple([2, 6, 3, 4, 8, 9, 5])\n    assert tuple(candidate([5, 8, 3, 4, 6, 9, 2])) == tuple([2, 8, 3, 4, 6, 9, 5])\n    assert tuple(candidate([5, 6, 9, 4, 8, 3, 2])) == tuple([2, 6, 9, 4, 8, 3, 5])\n    assert tuple(candidate([5, 6, 3, 4, 8, 9, 2, 1])) == tuple([2, 6, 3, 4, 8, 9, 5, 1])",
        "ground_truth_code": "l = list(l)\n    l[::3] = sorted(l[::3])\n    return l\n",
        "generation": "def sort_third(l: list):\n    \"\"\"This function takes a list l and returns a list l' such that\n    l' is identical to l in the indicies that are not divisible by three, while its values at the indicies that are divisible by three are equal\n    to the values of the corresponding indicies of l, but sorted.\n    >>> sort_third([1, 2, 3])\n    [1, 2, 3]\n    >>> sort_third([5, 6, 3, 4, 8, 9, 2])\n    [2, 6, 3, 4, 8, 9, 5]\n    \"\"\"\n    # Your code here\n",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmp_fhmua20.py\", line 42, in <module>\\n    check(sort_third)\\n  File \"/tmp/tmp_fhmua20.py\", line 33, in check\\n    assert tuple(candidate([1, 2, 3])) == tuple(sort_third([1, 2, 3]))\\nTypeError: \\'NoneType\\' object is not iterable\\n'}",
        "data_source": "humaneval",
        "metadata": {
            "quality": "low",
            "llm_name": "baichuan2-13b"
        }
    },
    {
        "question": "def change_base(x: int, base: int):\n    \"\"\"Change numerical base of input number x to base.\n    return string representation after the conversion.\n    base numbers are less than 10.\n    >>> change_base(8, 3)\n    '22'\n    >>> change_base(8, 2)\n    '1000'\n    >>> change_base(7, 2)\n    '111'\n    \"\"\"",
        "unit_test": "METADATA = {}\n\n\ndef check(candidate):\n    assert candidate(8, 3) == \"22\"\n    assert candidate(9, 3) == \"100\"\n    assert candidate(234, 2) == \"11101010\"\n    assert candidate(16, 2) == \"10000\"\n    assert candidate(8, 2) == \"1000\"\n    assert candidate(7, 2) == \"111\"\n    for x in range(2, 8):\n        assert candidate(x, x + 1) == str(x)",
        "ground_truth_code": "ret = \"\"\n    while x > 0:\n        ret = str(x % base) + ret\n        x //= base\n    return ret\n",
        "generation": "def change_base(x: int, base: int):\n    \"\"\"Change numerical base of input number x to base.\n    return string representation after the conversion.\n    base numbers are less than 10.\n    >>> change_base(8, 3)\n    '22'\n    >>> change_base(8, 2)\n    '1000'\n    >>> change_base(7, 2)\n    '111'\n    \"\"\"\n    if x < 0 or base < 2:\n        raise ValueError(\"Input should be positive integer\")\n    digits = \"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ\"\n    result = \"\"\n    while x > 0:\n        remainder = x % base\n        x = x // base\n        result = digits[remainder] + result\n    return result\n\ncheck(change_base)\n",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmpj99uh7id.py\", line 38, in <module>\\n    check(change_base)\\nNameError: name \\'check\\' is not defined\\n'}",
        "data_source": "humaneval",
        "metadata": {
            "quality": "high",
            "llm_name": "baichuan2-13b"
        }
    },
    {
        "question": "def monotonic(l: list):\n    \"\"\"Return True is list elements are monotonically increasing or decreasing.\n    >>> monotonic([1, 2, 4, 20])\n    True\n    >>> monotonic([1, 20, 4, 10])\n    False\n    >>> monotonic([4, 1, 0, -10])\n    True\n    \"\"\"",
        "unit_test": "METADATA = {}\n\n\ndef check(candidate):\n    assert candidate([1, 2, 4, 10]) == True\n    assert candidate([1, 2, 4, 20]) == True\n    assert candidate([1, 20, 4, 10]) == False\n    assert candidate([4, 1, 0, -10]) == True\n    assert candidate([4, 1, 1, 0]) == True\n    assert candidate([1, 2, 3, 2, 5, 60]) == False\n    assert candidate([1, 2, 3, 4, 5, 60]) == True\n    assert candidate([9, 9, 9, 9]) == True",
        "ground_truth_code": "if l == sorted(l) or l == sorted(l, reverse=True):\n        return True\n    return False\n",
        "generation": "def monotonic(l: list):\n    \"\"\"Return True is list elements are monotonically increasing or decreasing.\n    >>> monotonic([1, 2, 4, 20])\n    True\n    >>> monotonic([1, 20, 4, 10])\n    False\n    >>> monotonic([4, 1, 0, -10])\n    True\n    >>> monotonic([4, 1, 1, 0])\n    True\n    >>> monotonic([1, 2, 3, 2, 5, 60])\n    False\n    >>> monotonic([1, 2, 3, 4, 5, 60])\n    True\n    >>> monotonic([9, 9, 9, 9])\n    True\n    \"\"\"\n    # Your code here\n",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmpjioeqlcc.py\", line 51, in <module>\\n    check(monotonic)\\n  File \"/tmp/tmpjioeqlcc.py\", line 41, in check\\n    assert candidate([1, 2, 4, 10]) == True\\nAssertionError\\n'}",
        "data_source": "humaneval",
        "metadata": {
            "quality": "low",
            "llm_name": "baichuan2-13b"
        }
    },
    {
        "question": "def circular_shift(x, shift):\n    \"\"\"Circular shift the digits of the integer x, shift the digits right by shift\n    and return the result as a string.\n    If shift > number of digits, return digits reversed.\n    >>> circular_shift(12, 1)\n    \"21\"\n    >>> circular_shift(12, 2)\n    \"12\"\n    \"\"\"",
        "unit_test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(100, 2) == \"001\"\n    assert candidate(12, 2) == \"12\"\n    assert candidate(97, 8) == \"79\"\n    assert candidate(12, 1) == \"21\", \"This prints if this assert fails 1 (good for debugging!)\"\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate(11, 101) == \"11\", \"This prints if this assert fails 2 (also good for debugging!)\"",
        "ground_truth_code": "s = str(x)\n    if shift > len(s):\n        return s[::-1]\n    else:\n        return s[len(s) - shift:] + s[:len(s) - shift]\n",
        "generation": "def circular_shift(x, shift):\n    \"\"\"Circular shift the digits of the integer x, shift the digits right by shift\n    and return the result as a string.\n    If shift > number of digits, return digits reversed.\n    >>> circular_shift(12, 1)\n    \"21\"\n    >>> circular_shift(12, 2)\n    \"12\"\n    \"\"\"\n    digits = str(abs(x))\n    if shift > len(digits):\n        shifted_digits = digits[-1::-1]  # reverse the digits\n    else:\n        shifted_digits = digits[shift:] + digits[:shift]\n    return shifted_digits\n",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmppppe94w7.py\", line 44, in <module>\\n    check(circular_shift)\\n  File \"/tmp/tmppppe94w7.py\", line 35, in check\\n    assert candidate(100, 2) == \"001\"\\nAssertionError\\n'}",
        "data_source": "humaneval",
        "metadata": {
            "quality": "high",
            "llm_name": "baichuan2-13b"
        }
    },
    {
        "question": "def strange_sort_list(lst):\n    '''\n    Given list of integers, return list in strange order.\n    Strange sorting, is when you start with the minimum value,\n    then maximum of the remaining integers, then minimum and so on.\n\n    Examples:\n    strange_sort_list([1, 2, 3, 4]) == [1, 4, 2, 3]\n    strange_sort_list([5, 5, 5, 5]) == [5, 5, 5, 5]\n    strange_sort_list([]) == []\n    '''",
        "unit_test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate([1, 2, 3, 4]) == [1, 4, 2, 3]\n    assert candidate([5, 6, 7, 8, 9]) == [5, 9, 6, 8, 7]\n    assert candidate([1, 2, 3, 4, 5]) == [1, 5, 2, 4, 3]\n    assert candidate([5, 6, 7, 8, 9, 1]) == [1, 9, 5, 8, 6, 7]\n    assert candidate([5, 5, 5, 5]) == [5, 5, 5, 5]\n    assert candidate([]) == []\n    assert candidate([1,2,3,4,5,6,7,8]) == [1, 8, 2, 7, 3, 6, 4, 5]\n    assert candidate([0,2,2,2,5,5,-5,-5]) == [-5, 5, -5, 5, 0, 2, 2, 2]\n    assert candidate([111111]) == [111111]\n\n    # Check some edge cases that are easy to work out by hand.\n    assert True",
        "ground_truth_code": "res, switch = [], True\n    while lst:\n        res.append(min(lst) if switch else max(lst))\n        lst.remove(res[-1])\n        switch = not switch\n    return res\n",
        "generation": "def strange_sort_list(lst):\n    \"\"\"\n    Given list of integers, return list in strange order. Strange sorting, is when you start with the minimum value, then maximum of the remaining integers, then minimum and so on.\n\n    Examples:\n    strange_sort_list([1, 2, 3, 4]) == [1, 4, 2, 3]\n    strange_sort_list([5, 5, 5, 5]) == [5, 5, 5, 5]\n    strange_sort_list([]) == []\n    \"\"\"\n    return sorted(lst, reverse=True) if min(lst) else lst\n",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmpjlskwqai.py\", line 44, in <module>\\n    check(strange_sort_list)\\n  File \"/tmp/tmpjlskwqai.py\", line 30, in check\\n    assert candidate([1, 2, 3, 4]) == [1, 4, 2, 3]\\nAssertionError\\n'}",
        "data_source": "humaneval",
        "metadata": {
            "quality": "med",
            "llm_name": "baichuan2-13b"
        }
    },
    {
        "question": "def will_it_fly(q,w):\n    '''\n    Write a function that returns True if the object q will fly, and False otherwise.\n    The object q will fly if it's balanced (it is a palindromic list) and the sum of its elements is less than or equal the maximum possible weight w.\n\n    Example:\n    will_it_fly([1, 2], 5) ➞ False \n    # 1+2 is less than the maximum possible weight, but it's unbalanced.\n\n    will_it_fly([3, 2, 3], 1) ➞ False\n    # it's balanced, but 3+2+3 is more than the maximum possible weight.\n\n    will_it_fly([3, 2, 3], 9) ➞ True\n    # 3+2+3 is less than the maximum possible weight, and it's balanced.\n\n    will_it_fly([3], 5) ➞ True\n    # 3 is less than the maximum possible weight, and it's balanced.\n    '''",
        "unit_test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate([3, 2, 3], 9) is True\n    assert candidate([1, 2], 5) is False\n    assert candidate([3], 5) is True\n    assert candidate([3, 2, 3], 1) is False\n\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate([1, 2, 3], 6) is False\n    assert candidate([5], 5) is True",
        "ground_truth_code": "if sum(q) > w:\n        return False\n\n    i, j = 0, len(q)-1\n    while i<j:\n        if q[i] != q[j]:\n            return False\n        i+=1\n        j-=1\n    return True\n",
        "generation": "def will_it_fly(q, w):\n    \"\"\"\n    This function returns true if the object q will fly, and false otherwise. The object q will fly if it's balanced (it is a palindromic list) and the sum of its elements is less than or equal the maximum possible weight w.\n\n    - Check some simple cases in the assertion:\n        - Candidate([3, 2, 3], 9) should return True\n        - Candidate([1, 2], 5) should return False\n        - Candidate([3], 5) should return True\n        - Candidate([3, 2, 3], 1) should return False\n\n    - Check some edge cases in the assertion:\n        - Candidate([1, 2, 3], 6) should return False\n        - Candidate([5], 5) should return True\n    \"\"\"\n    if len(q) > 1:\n        for i in range(len(q) // 2):\n            if q[i] != q[-(i + 1)] or q[i] + q[-(i + 1)] > w:\n                return False\n    else:\n        return q[0] <= w\n",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmp62ib69hv.py\", line 51, in <module>\\n    check(will_it_fly)\\n  File \"/tmp/tmp62ib69hv.py\", line 40, in check\\n    assert candidate([3, 2, 3], 9) is True\\nAssertionError\\n'}",
        "data_source": "humaneval",
        "metadata": {
            "quality": "high",
            "llm_name": "baichuan2-13b"
        }
    },
    {
        "question": "def smallest_change(arr):\n    \"\"\"\n    Given an array arr of integers, find the minimum number of elements that\n    need to be changed to make the array palindromic. A palindromic array is an array that\n    is read the same backwards and forwards. In one change, you can change one element to any other element.\n\n    For example:\n    smallest_change([1,2,3,5,4,7,9,6]) == 4\n    smallest_change([1, 2, 3, 4, 3, 2, 2]) == 1\n    smallest_change([1, 2, 3, 2, 1]) == 0\n    \"\"\"",
        "unit_test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate([1,2,3,5,4,7,9,6]) == 4\n    assert candidate([1, 2, 3, 4, 3, 2, 2]) == 1\n    assert candidate([1, 4, 2]) == 1\n    assert candidate([1, 4, 4, 2]) == 1\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate([1, 2, 3, 2, 1]) == 0\n    assert candidate([3, 1, 1, 3]) == 0\n    assert candidate([1]) == 0\n    assert candidate([0, 1]) == 1",
        "ground_truth_code": "ans = 0\n    for i in range(len(arr) // 2):\n        if arr[i] != arr[len(arr) - i - 1]:\n            ans += 1\n    return ans\n",
        "generation": "def smallest_change(arr):\n    \"\"\"\n    Given an array arr of integers, find the minimum number of elements that\n    need to be changed to make the array palindromic. A palindromic array is an array that\n    is read the same backwards and forwards. In one change, you can change one element to any other element.\n\n    For example:\n    smallest_change([1,2,3,5,4,7,9,6]) == 4\n    smallest_change([1, 2, 3, 4, 3, 2, 2]) == 1\n    smallest_change([1, 2, 3, 2, 1]) == 0\n    \"\"\"\n",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmpdigyr5hp.py\", line 43, in <module>\\n    check(smallest_change)\\n  File \"/tmp/tmpdigyr5hp.py\", line 31, in check\\n    assert candidate([1,2,3,5,4,7,9,6]) == 4\\nAssertionError\\n'}",
        "data_source": "humaneval",
        "metadata": {
            "quality": "high",
            "llm_name": "baichuan2-13b"
        }
    },
    {
        "question": "Write a python function to find the largest number that can be formed with the given digits.",
        "unit_test": "assert find_Max_Num([1,2,3],3) == 321\nassert find_Max_Num([4,5,6,1],4) == 6541\nassert find_Max_Num([1,2,3,9],4) == 9321",
        "ground_truth_code": "def find_Max_Num(arr,n) : \r\n    arr.sort(reverse = True) \r\n    num = arr[0] \r\n    for i in range(1,n) : \r\n        num = num * 10 + arr[i] \r\n    return num ",
        "generation": "def find_Max_Num(nums, target):\n    digits = str(target)\n    max_num = 0\n    for i in range(len(nums)):\n        start = i * len(digits)\n        end = (i + 1) * len(digits)\n        if end > len(nums):\n            end = len(nums)\n        current_num = int(digits[:start] + nums[start:end])\n        if current_num > max_num:\n            max_num = current_num\n    return max_num\n",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmpdjl4kt44.py\", line 29, in <module>\\n    assert find_Max_Num([1,2,3],3) == 321\\n  File \"/tmp/tmpdjl4kt44.py\", line 25, in find_Max_Num\\n    current_num = int(digits[:start] + nums[start:end])\\nTypeError: can only concatenate str (not \"list\") to str\\n'}",
        "data_source": "mbpp",
        "metadata": {
            "quality": "low",
            "llm_name": "baichuan2-13b"
        }
    },
    {
        "question": "from typing import List\n\n\ndef parse_nested_parens(paren_string: str) -> List[int]:\n    \"\"\" Input to this function is a string represented multiple groups for nested parentheses separated by spaces.\n    For each of the group, output the deepest level of nesting of parentheses.\n    E.g. (()()) has maximum two levels of nesting while ((())) has three.\n\n    >>> parse_nested_parens('(()()) ((())) () ((())()())')\n    [2, 3, 1, 3]\n    \"\"\"",
        "unit_test": "METADATA = {\n    'author': 'jt',\n    'dataset': 'test'\n}\n\n\ndef check(candidate):\n    assert candidate('(()()) ((())) () ((())()())') == [2, 3, 1, 3]\n    assert candidate('() (()) ((())) (((())))') == [1, 2, 3, 4]\n    assert candidate('(()(())((())))') == [4]",
        "ground_truth_code": "def parse_paren_group(s):\n        depth = 0\n        max_depth = 0\n        for c in s:\n            if c == '(':\n                depth += 1\n                max_depth = max(depth, max_depth)\n            else:\n                depth -= 1\n\n        return max_depth\n\n    return [parse_paren_group(x) for x in paren_string.split(' ') if x]\n",
        "generation": "from typing import List\n\ndef parse_nested_parens(paren_string: str) -> List[int]:\n    \"\"\"\n    Input to this function is a string represented multiple groups for nested parentheses separated by spaces.\n    For each of the group, output the deepest level of nesting of parentheses.\n    E.g. (()()) has maximum two levels of nesting while ((())) has three.\n\n    Args:\n        paren_string (str): A string represented multiple groups for nested parentheses separated by spaces.\n\n    Returns:\n        List[int]: A list of integers representing the deepest level of nesting of parentheses for each group.\n    \"\"\"\n    levels = []\n    max_level = 0\n    current_level = 0\n    for char in paren_string:\n        if char == '(':\n            current_level += 1\n            if current_level > max_level:\n                max_level = current_level\n        elif char == ')':\n            current_level -= 1\n    levels.append(max_level)\n    return levels\n",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmpp52knctc.py\", line 56, in <module>\\n    check(parse_nested_parens)\\n  File \"/tmp/tmpp52knctc.py\", line 52, in check\\n    assert candidate(\\'(()()) ((())) () ((())()())\\') == [2, 3, 1, 3]\\nAssertionError\\n'}",
        "data_source": "humaneval",
        "metadata": {
            "quality": "med",
            "llm_name": "qwen-14b"
        }
    },
    {
        "question": "from typing import List\n\n\ndef remove_duplicates(numbers: List[int]) -> List[int]:\n    \"\"\" From a list of integers, remove all elements that occur more than once.\n    Keep order of elements left the same as in the input.\n    >>> remove_duplicates([1, 2, 3, 2, 4])\n    [1, 3, 4]\n    \"\"\"",
        "unit_test": "METADATA = {\n    'author': 'jt',\n    'dataset': 'test'\n}\n\n\ndef check(candidate):\n    assert candidate([]) == []\n    assert candidate([1, 2, 3, 4]) == [1, 2, 3, 4]\n    assert candidate([1, 2, 3, 2, 4, 3, 5]) == [1, 4, 5]",
        "ground_truth_code": "import collections\n    c = collections.Counter(numbers)\n    return [n for n in numbers if c[n] <= 1]\n",
        "generation": "from typing import List\n\ndef remove_duplicates(numbers: List[int]) -> List[int]:\n    \"\"\" From a list of integers, remove all elements that occur more than once.\n    Keep order of elements left the same as in the input.\n    >>> remove_duplicates([1, 2, 3, 2, 4])\n    [1, 3, 4]\n    \"\"\"\n    seen = set()\n    result = []\n    for num in numbers:\n        if num not in seen:\n            seen.add(num)\n            result.append(num)\n    return result\n",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmpexwjrv1j.py\", line 45, in <module>\\n    check(remove_duplicates)\\n  File \"/tmp/tmpexwjrv1j.py\", line 43, in check\\n    assert candidate([1, 2, 3, 2, 4, 3, 5]) == [1, 4, 5]\\nAssertionError\\n'}",
        "data_source": "humaneval",
        "metadata": {
            "quality": "high",
            "llm_name": "qwen-14b"
        }
    },
    {
        "question": "import math\n\n\ndef poly(xs: list, x: float):\n    \"\"\"\n    Evaluates polynomial with coefficients xs at point x.\n    return xs[0] + xs[1] * x + xs[1] * x^2 + .... xs[n] * x^n\n    \"\"\"\n    return sum([coeff * math.pow(x, i) for i, coeff in enumerate(xs)])\n\n\ndef find_zero(xs: list):\n    \"\"\" xs are coefficients of a polynomial.\n    find_zero find x such that poly(x) = 0.\n    find_zero returns only only zero point, even if there are many.\n    Moreover, find_zero only takes list xs having even number of coefficients\n    and largest non zero coefficient as it guarantees\n    a solution.\n    >>> round(find_zero([1, 2]), 2) # f(x) = 1 + 2x\n    -0.5\n    >>> round(find_zero([-6, 11, -6, 1]), 2) # (x - 1) * (x - 2) * (x - 3) = -6 + 11x - 6x^2 + x^3\n    1.0\n    \"\"\"",
        "unit_test": "METADATA = {}\n\n\ndef check(candidate):\n    import math\n    import random\n    rng = random.Random(42)\n    import copy\n    for _ in range(100):\n        ncoeff = 2 * rng.randint(1, 4)\n        coeffs = []\n        for _ in range(ncoeff):\n            coeff = rng.randint(-10, 10)\n            if coeff == 0:\n                coeff = 1\n            coeffs.append(coeff)\n        solution = candidate(copy.deepcopy(coeffs))\n        assert math.fabs(poly(coeffs, solution)) < 1e-4",
        "ground_truth_code": "begin, end = -1., 1.\n    while poly(xs, begin) * poly(xs, end) > 0:\n        begin *= 2.0\n        end *= 2.0\n    while end - begin > 1e-10:\n        center = (begin + end) / 2.0\n        if poly(xs, center) * poly(xs, begin) > 0:\n            begin = center\n        else:\n            end = center\n    return begin\n",
        "generation": "import math\nimport random\nrng = random.Random(42)\n\ndef find_zero(xs):\n    \"\"\"\n    xs are coefficients of a polynomial.\n    find_zero find x such that poly(x) = 0.\n    find_zero returns only only zero point, even if there are many.\n    Moreover, find_zero only takes list xs having even number of coefficients\n    and largest non zero coefficient as it guarantees\n    a solution.\n    >>> round(find_zero([1, 2]), 2) # f(x) = 1 + 2x\n    -0.5\n    >>> round(find_zero([-6, 11, -6, 1]), 2) # (x - 1) * (x - 2) * (x - 3) = -6 + 11x - 6x^2 + x^3\n    1.0\n    \"\"\"\n    if len(xs) % 2 != 0:\n        raise ValueError(\"xs should have even number of coefficients\")\n    if max([abs(coeff) for coeff in xs]) == 0:\n        raise ValueError(\"xs should have non-zero coefficients\")\n    ncoeff = len(xs) // 2\n    x = xs[ncoeff]\n    for _ in range(100):\n        x -= poly(xs, x) / poly_diff(xs, x)\n    return x\n\ndef poly_diff(xs, x):\n    \"\"\"\n    Evaluates derivative of polynomial with coefficients xs at point x.\n    return xs[1] + 2 * xs[2] * x + 3 * xs[3] * x^2 + ...\n    \"\"\"\n    return sum([i * coeff * math.pow(x, i - 1) for i, coeff in enumerate(xs, start=2)])\n",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmpx9em510j.py\", line 72, in <module>\\n    check(find_zero)\\n  File \"/tmp/tmpx9em510j.py\", line 68, in check\\n    solution = candidate(copy.deepcopy(coeffs))\\n  File \"/tmp/tmpx9em510j.py\", line 41, in find_zero\\n    x -= poly(xs, x) / poly_diff(xs, x)\\n  File \"/cpfs01/user/lantian/minconda3/envs/critique_inference/lib/python3.9/site-packages/sympy/polys/polytools.py\", line 7380, in poly\\n    if expr.is_Poly:\\nAttributeError: \\'list\\' object has no attribute \\'is_Poly\\'\\n'}",
        "data_source": "humaneval",
        "metadata": {
            "quality": "high",
            "llm_name": "qwen-14b"
        }
    },
    {
        "question": "def same_chars(s0: str, s1: str):\n    \"\"\"\n    Check if two words have the same characters.\n    >>> same_chars('eabcdzzzz', 'dddzzzzzzzddeddabc')\n    True\n    >>> same_chars('abcd', 'dddddddabc')\n    True\n    >>> same_chars('dddddddabc', 'abcd')\n    True\n    >>> same_chars('eabcd', 'dddddddabc')\n    False\n    >>> same_chars('abcd', 'dddddddabce')\n    False\n    >>> same_chars('eabcdzzzz', 'dddzzzzzzzddddabc')\n    False\n    \"\"\"",
        "unit_test": "METADATA = {}\n\n\ndef check(candidate):\n    assert candidate('eabcdzzzz', 'dddzzzzzzzddeddabc') == True\n    assert candidate('abcd', 'dddddddabc') == True\n    assert candidate('dddddddabc', 'abcd') == True\n    assert candidate('eabcd', 'dddddddabc') == False\n    assert candidate('abcd', 'dddddddabcf') == False\n    assert candidate('eabcdzzzz', 'dddzzzzzzzddddabc') == False\n    assert candidate('aabb', 'aaccc') == False",
        "ground_truth_code": "return set(s0) == set(s1)\n",
        "generation": "def same_chars(s0: str, s1: str):\n    \"\"\"\n    Check if two words have the same characters.\n    >>> same_chars('eabcdzzzz', 'dddzzzzzzzddeddabc')\n    True\n    >>> same_chars('abcd', 'dddddddabc')\n    True\n    >>> same_chars('dddddddabc', 'abcd')\n    True\n    >>> same_chars('eabcd', 'dddddddabc')\n    False\n    >>> same_chars('abcd', 'dddddddabce')\n    False\n    >>> same_chars('eabcdzzzz', 'dddzzzzzzzddddabc')\n    False\n    \"\"\"\n    return sorted(s0) == sorted(s1)\n",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmp0tj7kffp.py\", line 49, in <module>\\n    check(same_chars)\\n  File \"/tmp/tmp0tj7kffp.py\", line 40, in check\\n    assert candidate(\\'eabcdzzzz\\', \\'dddzzzzzzzddeddabc\\') == True\\nAssertionError\\n'}",
        "data_source": "humaneval",
        "metadata": {
            "quality": "med",
            "llm_name": "qwen-14b"
        }
    },
    {
        "question": "def derivative(xs: list):\n    \"\"\" xs represent coefficients of a polynomial.\n    xs[0] + xs[1] * x + xs[2] * x^2 + ....\n     Return derivative of this polynomial in the same form.\n    >>> derivative([3, 1, 2, 4, 5])\n    [1, 4, 12, 20]\n    >>> derivative([1, 2, 3])\n    [2, 6]\n    \"\"\"",
        "unit_test": "METADATA = {}\n\n\ndef check(candidate):\n    assert candidate([3, 1, 2, 4, 5]) == [1, 4, 12, 20]\n    assert candidate([1, 2, 3]) == [2, 6]\n    assert candidate([3, 2, 1]) == [2, 2]\n    assert candidate([3, 2, 1, 0, 4]) == [2, 2, 0, 16]\n    assert candidate([1]) == []",
        "ground_truth_code": "return [(i * x) for i, x in enumerate(xs)][1:]\n",
        "generation": "def derivative(xs: list):\n    \"\"\" xs represent coefficients of a polynomial.\n    xs[0] + xs[1] * x + xs[2] * x^2 + ....\n     Return derivative of this polynomial in the same form.\n    \"\"\"\n    result = []\n    for i in range(1, len(xs)):\n        result.append(xs[i] * (i+1))\n    return result\n",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmpfxwxucji.py\", line 39, in <module>\\n    check(derivative)\\n  File \"/tmp/tmpfxwxucji.py\", line 32, in check\\n    assert candidate([3, 1, 2, 4, 5]) == [1, 4, 12, 20]\\nAssertionError\\n'}",
        "data_source": "humaneval",
        "metadata": {
            "quality": "high",
            "llm_name": "qwen-14b"
        }
    },
    {
        "question": "def strange_sort_list(lst):\n    '''\n    Given list of integers, return list in strange order.\n    Strange sorting, is when you start with the minimum value,\n    then maximum of the remaining integers, then minimum and so on.\n\n    Examples:\n    strange_sort_list([1, 2, 3, 4]) == [1, 4, 2, 3]\n    strange_sort_list([5, 5, 5, 5]) == [5, 5, 5, 5]\n    strange_sort_list([]) == []\n    '''",
        "unit_test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate([1, 2, 3, 4]) == [1, 4, 2, 3]\n    assert candidate([5, 6, 7, 8, 9]) == [5, 9, 6, 8, 7]\n    assert candidate([1, 2, 3, 4, 5]) == [1, 5, 2, 4, 3]\n    assert candidate([5, 6, 7, 8, 9, 1]) == [1, 9, 5, 8, 6, 7]\n    assert candidate([5, 5, 5, 5]) == [5, 5, 5, 5]\n    assert candidate([]) == []\n    assert candidate([1,2,3,4,5,6,7,8]) == [1, 8, 2, 7, 3, 6, 4, 5]\n    assert candidate([0,2,2,2,5,5,-5,-5]) == [-5, 5, -5, 5, 0, 2, 2, 2]\n    assert candidate([111111]) == [111111]\n\n    # Check some edge cases that are easy to work out by hand.\n    assert True",
        "ground_truth_code": "res, switch = [], True\n    while lst:\n        res.append(min(lst) if switch else max(lst))\n        lst.remove(res[-1])\n        switch = not switch\n    return res\n",
        "generation": "def strange_sort_list(lst):\n    \"\"\"\n    Given list of integers, return list in strange order.\n    Strange sorting, is when you start with the minimum value,\n    then maximum of the remaining integers, then minimum and so on.\n\n    Examples:\n    strange_sort_list([1, 2, 3, 4]) == [1, 4, 2, 3]\n    strange_sort_list([5, 5, 5, 5]) == [5, 5, 5, 5]\n    strange_sort_list([]) == []\n    \"\"\"\n    if not lst:\n        return []\n    min_val = min(lst)\n    max_val = max(lst)\n    lst.remove(min_val)\n    lst.remove(max_val)\n    return [min_val] + strange_sort_list(lst) + [max_val]\n",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmp19kj268h.py\", line 52, in <module>\\n    check(strange_sort_list)\\n  File \"/tmp/tmp19kj268h.py\", line 38, in check\\n    assert candidate([1, 2, 3, 4]) == [1, 4, 2, 3]\\nAssertionError\\n'}",
        "data_source": "humaneval",
        "metadata": {
            "quality": "high",
            "llm_name": "qwen-14b"
        }
    },
    {
        "question": "Write a function to check if the given tuple list has all k elements.",
        "unit_test": "assert check_k_elements([(4, 4), (4, 4, 4), (4, 4), (4, 4, 4, 4), (4, )], 4) == True\nassert check_k_elements([(7, 7, 7), (7, 7)], 7) == True\nassert check_k_elements([(9, 9), (9, 9, 9, 9)], 7) == False",
        "ground_truth_code": "def check_k_elements(test_list, K):\r\n  res = True\r\n  for tup in test_list:\r\n    for ele in tup:\r\n      if ele != K:\r\n        res = False\r\n  return (res) ",
        "generation": "def check_k_elements(tpl, k):\n    \"\"\"\n    Check if the given tuple list has all k elements.\n    \"\"\"\n    return len(set([len(t) for t in tpl])) == 1 and len(tpl[0]) == k\n",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmpryode279.py\", line 22, in <module>\\n    assert check_k_elements([(4, 4), (4, 4, 4), (4, 4), (4, 4, 4, 4), (4, )], 4) == True\\nAssertionError\\n'}",
        "data_source": "mbpp",
        "metadata": {
            "quality": "low",
            "llm_name": "qwen-14b"
        }
    },
    {
        "question": "Write a python function to count number of substrings with the sum of digits equal to their length.",
        "unit_test": "assert count_Substrings('112112',6) == 6\nassert count_Substrings('111',3) == 6\nassert count_Substrings('1101112',7) == 12",
        "ground_truth_code": "from collections import defaultdict\r\ndef count_Substrings(s,n):\r\n    count,sum = 0,0\r\n    mp = defaultdict(lambda : 0)\r\n    mp[0] += 1\r\n    for i in range(n):\r\n        sum += ord(s[i]) - ord('0')\r\n        count += mp[sum - (i + 1)]\r\n        mp[sum - (i + 1)] += 1\r\n    return count",
        "generation": "def count_Substrings(s, k):\n    count = 0\n    for i in range(len(s)):\n        for j in range(i+k, len(s)+1):\n            if sum(int(digit) for digit in s[i:j]) == j - i:\n                count += 1\n    return count\n",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmprkjmnzap.py\", line 24, in <module>\\n    assert count_Substrings(\\'112112\\',6) == 6\\nAssertionError\\n'}",
        "data_source": "mbpp",
        "metadata": {
            "quality": "high",
            "llm_name": "qwen-14b"
        }
    },
    {
        "question": "Write a function to find the n-th number in newman conway sequence.",
        "unit_test": "assert sequence(10) == 6\nassert sequence(2) == 1\nassert sequence(3) == 2",
        "ground_truth_code": "def sequence(n): \r\n\tif n == 1 or n == 2: \r\n\t\treturn 1\r\n\telse: \r\n\t\treturn sequence(sequence(n-1)) + sequence(n-sequence(n-1))",
        "generation": "def sequence(n):\n    if n == 1:\n        return 1\n    elif n == 2:\n        return 1\n    else:\n        return sequence(sequence(n-1)) + sequence(n-2)\n",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmp0cfqgaja.py\", line 24, in <module>\\n    assert sequence(10) == 6\\n  File \"/tmp/tmp0cfqgaja.py\", line 23, in sequence\\n    return sequence(sequence(n-1)) + sequence(n-2)\\n  File \"/tmp/tmp0cfqgaja.py\", line 23, in sequence\\n    return sequence(sequence(n-1)) + sequence(n-2)\\n  File \"/tmp/tmp0cfqgaja.py\", line 23, in sequence\\n    return sequence(sequence(n-1)) + sequence(n-2)\\n  [Previous line repeated 995 more times]\\n  File \"/tmp/tmp0cfqgaja.py\", line 18, in sequence\\n    if n == 1:\\nRecursionError: maximum recursion depth exceeded in comparison\\n'}",
        "data_source": "mbpp",
        "metadata": {
            "quality": "high",
            "llm_name": "qwen-14b"
        }
    },
    {
        "question": "Write a function to check whether the given number is undulating or not.",
        "unit_test": "assert is_undulating(\"1212121\") == True\nassert is_undulating(\"1991\") == False\nassert is_undulating(\"121\") == True",
        "ground_truth_code": "def is_undulating(n): \r\n\tif (len(n) <= 2): \r\n\t\treturn False\r\n\tfor i in range(2, len(n)): \r\n\t\tif (n[i - 2] != n[i]): \r\n\t\t\treturn False\r\n\treturn True",
        "generation": "def is_undulating(number):\n    \"\"\"\n    Check whether the given number is undulating or not.\n    \"\"\"\n    if len(number) < 3:\n        return False\n    for i in range(len(number) - 2):\n        if number[i] != number[i + 1] != number[i + 2]:\n            return False\n    return True\n",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmphuv8noay.py\", line 27, in <module>\\n    assert is_undulating(\"1212121\") == True\\nAssertionError\\n'}",
        "data_source": "mbpp",
        "metadata": {
            "quality": "high",
            "llm_name": "qwen-14b"
        }
    },
    {
        "question": "import math\n\n\ndef poly(xs: list, x: float):\n    \"\"\"\n    Evaluates polynomial with coefficients xs at point x.\n    return xs[0] + xs[1] * x + xs[1] * x^2 + .... xs[n] * x^n\n    \"\"\"\n    return sum([coeff * math.pow(x, i) for i, coeff in enumerate(xs)])\n\n\ndef find_zero(xs: list):\n    \"\"\" xs are coefficients of a polynomial.\n    find_zero find x such that poly(x) = 0.\n    find_zero returns only only zero point, even if there are many.\n    Moreover, find_zero only takes list xs having even number of coefficients\n    and largest non zero coefficient as it guarantees\n    a solution.\n    >>> round(find_zero([1, 2]), 2) # f(x) = 1 + 2x\n    -0.5\n    >>> round(find_zero([-6, 11, -6, 1]), 2) # (x - 1) * (x - 2) * (x - 3) = -6 + 11x - 6x^2 + x^3\n    1.0\n    \"\"\"",
        "unit_test": "METADATA = {}\n\n\ndef check(candidate):\n    import math\n    import random\n    rng = random.Random(42)\n    import copy\n    for _ in range(100):\n        ncoeff = 2 * rng.randint(1, 4)\n        coeffs = []\n        for _ in range(ncoeff):\n            coeff = rng.randint(-10, 10)\n            if coeff == 0:\n                coeff = 1\n            coeffs.append(coeff)\n        solution = candidate(copy.deepcopy(coeffs))\n        assert math.fabs(poly(coeffs, solution)) < 1e-4",
        "ground_truth_code": "begin, end = -1., 1.\n    while poly(xs, begin) * poly(xs, end) > 0:\n        begin *= 2.0\n        end *= 2.0\n    while end - begin > 1e-10:\n        center = (begin + end) / 2.0\n        if poly(xs, center) * poly(xs, begin) > 0:\n            begin = center\n        else:\n            end = center\n    return begin\n",
        "generation": "import math\nimport random\n\n\ndef check(candidate):\n    import math\n    import random\n    rng = random.Random(42)\n    import copy\n    for _ in range(100):\n        ncoeff = 2 * rng.randint(1, 4)\n        coeffs = []\n        for _ in range(ncoeff):\n            coeff = rng.randint(-10, 10)\n            if coeff == 0:\n                coeff = 1\n            coeffs.append(coeff)\n        solution = candidate(coeffs)\n        assert math.fabs(poly(coeffs, solution)) < 1e-4\n\n\ndef poly(xs: list, x: float):\n    \"\"\"\n    Evaluates polynomial with coefficients xs at point x.\n    return xs[0] + xs[1] * x + xs[1] * x^2 + .... xs[n] * x^n\n    \"\"\"\n    return sum([coeff * math.pow(x, i) for i, coeff in enumerate(xs)])\n\n\ndef find_zero(xs: list):\n    \"\"\" xs are coefficients of a polynomial.\n    find_zero find x such that poly(x) = 0.\n    find_zero returns only only zero point, even if there are many.\n    Moreover, find_zero only takes list xs having even number of coefficients\n    and largest non zero coefficient as it guarantees\n    a solution.\n    >>> round(find_zero([1, 2]), 2) # f(x) = 1 + 2x\n    -0.5\n    >>> round(find_zero([-6, 11, -6, 1]), 2) # (x - 1) * (x - 2) * (x - 3) = -6 + 11x - 6x^2 + x^3\n    1.0\n    \"\"\"\n    if len(xs) % 2 != 0 or len(xs) <2 or xs==[]:\n        return None\n    for i in range(1, len(xs), 2):\n        if xs[i] <= 0:\n            continue\n        else:\n            return -xs[i-1]/(xs[i])\n    return None\n",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmpupe92yoj.py\", line 88, in <module>\\n    check(check)\\n  File \"/tmp/tmpupe92yoj.py\", line 84, in check\\n    solution = candidate(copy.deepcopy(coeffs))\\n  File \"/tmp/tmpupe92yoj.py\", line 84, in check\\n    solution = candidate(copy.deepcopy(coeffs))\\nTypeError: \\'list\\' object is not callable\\n'}",
        "data_source": "humaneval",
        "metadata": {
            "quality": "med",
            "llm_name": "wizardlm-70b"
        }
    },
    {
        "question": "def circular_shift(x, shift):\n    \"\"\"Circular shift the digits of the integer x, shift the digits right by shift\n    and return the result as a string.\n    If shift > number of digits, return digits reversed.\n    >>> circular_shift(12, 1)\n    \"21\"\n    >>> circular_shift(12, 2)\n    \"12\"\n    \"\"\"",
        "unit_test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(100, 2) == \"001\"\n    assert candidate(12, 2) == \"12\"\n    assert candidate(97, 8) == \"79\"\n    assert candidate(12, 1) == \"21\", \"This prints if this assert fails 1 (good for debugging!)\"\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate(11, 101) == \"11\", \"This prints if this assert fails 2 (also good for debugging!)\"",
        "ground_truth_code": "s = str(x)\n    if shift > len(s):\n        return s[::-1]\n    else:\n        return s[len(s) - shift:] + s[:len(s) - shift]\n",
        "generation": "def circular_shift(x, shift):\n    \"\"\"\n    Circular shift the digits of the integer x, shift the digits right by shift\n    and return the result as a string.\n    If shift > number of digits, return digits reversed.\n    \"\"\"\n    # Convert the number to a string\n    x_str = str(x)\n    \n    # Get the number of digits\n    num_digits = len(x_str)\n    \n    # Shift the digits if shift is less than or equal to the number of digits\n    if shift <= num_digits:\n        return ''.join([int(digit) for digit in x_str[shift-1: num_digits+1]])\n    else:\n        # If shift is greater than the number of digits, return the digits reversed\n        return ''.join([int(digit) for digit in x_str[::-1]])\n\n# Unit test\ndef check(candidate):\n    # Check some simple cases\n    assert candidate(100, 2) == \"001\"\n    assert candidate(12, 2) == \"12\"\n    assert candidate(97, 8) == \"79\"\n    assert candidate(12, 1) == \"21\", \"This prints if this assert fails 1 (good for debugging!)\"\n    \n    # Check some edge cases that are easy to work out by hand.\n    assert candidate(11, 101) == \"11\", \"This prints if this assert fails 2 (also good for debugging!)\"\n",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmpbdcznl08.py\", line 58, in <module>\\n    check(circular_shift)\\n  File \"/tmp/tmpbdcznl08.py\", line 49, in check\\n    assert candidate(100, 2) == \"001\"\\n  File \"/tmp/tmpbdcznl08.py\", line 31, in circular_shift\\n    return \\'\\'.join([int(digit) for digit in x_str[shift-1: num_digits+1]])\\nTypeError: sequence item 0: expected str instance, int found\\n'}",
        "data_source": "humaneval",
        "metadata": {
            "quality": "low",
            "llm_name": "wizardlm-70b"
        }
    }
]