[
    {
        "question": "def fizz_buzz(n: int):\n    \"\"\"Return the number of times the digit 7 appears in integers less than n which are divisible by 11 or 13.\n    >>> fizz_buzz(50)\n    0\n    >>> fizz_buzz(78)\n    2\n    >>> fizz_buzz(79)\n    3\n    \"\"\"",
        "unit_test": "METADATA = {}\n\n\ndef check(candidate):\n    assert candidate(50) == 0\n    assert candidate(78) == 2\n    assert candidate(79) == 3\n    assert candidate(100) == 3\n    assert candidate(200) == 6\n    assert candidate(4000) == 192\n    assert candidate(10000) == 639\n    assert candidate(100000) == 8026",
        "ground_truth_code": "ns = []\n    for i in range(n):\n        if i % 11 == 0 or i % 13 == 0:\n            ns.append(i)\n    s = ''.join(list(map(str, ns)))\n    ans = 0\n    for c in s:\n        ans += (c == '7')\n    return ans\n",
        "generation": "def fizz_buzz(n: int):\n    \"\"\"Return the number of times the digit 7 appears in integers less than n which are divisible by 11 or 13.\n    >>> fizz_buzz(50)\n    0\n    >>> fizz_buzz(78)\n    2\n    >>> fizz_buzz(79)\n    3\n    \"\"\"\n\n    count = 0\n    for i in range(1, n):\n        if i % 11 == 0 or i % 13 == 0:\n            count += 1\n    return count",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmpps84iz0j.py\", line 48, in <module>\\n    check(fizz_buzz)\\n  File \"/tmp/tmpps84iz0j.py\", line 38, in check\\n    assert candidate(50) == 0\\nAssertionError\\n'}",
        "data_source": "humaneval",
        "metadata": {
            "quality": "high",
            "llm_name": "internlm-20b"
        }
    },
    {
        "question": "def encode_cyclic(s: str):\n    \"\"\"\n    returns encoded string by cycling groups of three characters.\n    \"\"\"\n    # split string to groups. Each of length 3.\n    groups = [s[(3 * i):min((3 * i + 3), len(s))] for i in range((len(s) + 2) // 3)]\n    # cycle elements in each group. Unless group has fewer elements than 3.\n    groups = [(group[1:] + group[0]) if len(group) == 3 else group for group in groups]\n    return \"\".join(groups)\n\n\ndef decode_cyclic(s: str):\n    \"\"\"\n    takes as input string encoded with encode_cyclic function. Returns decoded string.\n    \"\"\"",
        "unit_test": "METADATA = {}\n\n\ndef check(candidate):\n    from random import randint, choice\n    import string\n\n    letters = string.ascii_lowercase\n    for _ in range(100):\n        str = ''.join(choice(letters) for i in range(randint(10, 20)))\n        encoded_str = encode_cyclic(str)\n        assert candidate(encoded_str) == str",
        "ground_truth_code": "return encode_cyclic(encode_cyclic(s))\n",
        "generation": "def encode_cyclic(s: str) -> str:\n    \"\"\"\n    returns encoded string by cycling groups of three characters.\n    \"\"\"\n    # split string to groups. Each of length 3.\n    groups = [s[(3 * i):min((3 * i + 3), len(s))] for i in range((len(s) + 2) // 3)]\n    # cycle elements in each group. Unless group has fewer elements than 3.\n    groups = [(group[1:] + group[0]) if len(group) == 3 else group for group in groups]\n    return \"\".join(groups)\n\n\ndef decode_cyclic(s: str) -> str:\n    \"\"\"\n    takes as input string encoded with encode_cyclic function. Returns decoded string.\n    \"\"\"\n    # split string to groups. Each of length 3.\n    groups = [s[(3 * i):min((3 * i + 3), len(s))] for i in range((len(s) + 2) // 3)]\n    # cycle elements in each group. Unless group has fewer elements than 3.\n    groups = [(group[1:] + group[0]) if len(group) == 3 else group for group in groups]\n    # join groups to form the decoded string.\n    return \"\".join(groups)",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmpde57gbhr.py\", line 54, in <module>\\n    check(encode_cyclic)\\n  File \"/tmp/tmpde57gbhr.py\", line 51, in check\\n    assert candidate(encoded_str) == str\\nAssertionError\\n'}",
        "data_source": "humaneval",
        "metadata": {
            "quality": "high",
            "llm_name": "internlm-20b"
        }
    },
    {
        "question": "def derivative(xs: list):\n    \"\"\" xs represent coefficients of a polynomial.\n    xs[0] + xs[1] * x + xs[2] * x^2 + ....\n     Return derivative of this polynomial in the same form.\n    >>> derivative([3, 1, 2, 4, 5])\n    [1, 4, 12, 20]\n    >>> derivative([1, 2, 3])\n    [2, 6]\n    \"\"\"",
        "unit_test": "METADATA = {}\n\n\ndef check(candidate):\n    assert candidate([3, 1, 2, 4, 5]) == [1, 4, 12, 20]\n    assert candidate([1, 2, 3]) == [2, 6]\n    assert candidate([3, 2, 1]) == [2, 2]\n    assert candidate([3, 2, 1, 0, 4]) == [2, 2, 0, 16]\n    assert candidate([1]) == []",
        "ground_truth_code": "return [(i * x) for i, x in enumerate(xs)][1:]\n",
        "generation": "def derivative(xs: list):\n    \"\"\" xs represent coefficients of a polynomial.\n    xs[0] + xs[1] * x + xs[2] * x^2 +....\n     Return derivative of this polynomial in the same form.\n    >>> derivative([3, 1, 2, 4, 5])\n    [1, 4, 12, 20]\n    >>> derivative([1, 2, 3])\n    [2, 6]\n    \"\"\"\n\n    derivative = []\n    for i in range(len(xs) - 1):\n        derivative.append((i + 1) * xs[i])\n    return derivative",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmppf1k7sfn.py\", line 44, in <module>\\n    check(derivative)\\n  File \"/tmp/tmppf1k7sfn.py\", line 37, in check\\n    assert candidate([3, 1, 2, 4, 5]) == [1, 4, 12, 20]\\nAssertionError\\n'}",
        "data_source": "humaneval",
        "metadata": {
            "quality": "med",
            "llm_name": "internlm-20b"
        }
    },
    {
        "question": "def fruit_distribution(s,n):\n    \"\"\"\n    In this task, you will be given a string that represents a number of apples and oranges \n    that are distributed in a basket of fruit this basket contains \n    apples, oranges, and mango fruits. Given the string that represents the total number of \n    the oranges and apples and an integer that represent the total number of the fruits \n    in the basket return the number of the mango fruits in the basket.\n    for examble:\n    fruit_distribution(\"5 apples and 6 oranges\", 19) ->19 - 5 - 6 = 8\n    fruit_distribution(\"0 apples and 1 oranges\",3) -> 3 - 0 - 1 = 2\n    fruit_distribution(\"2 apples and 3 oranges\", 100) -> 100 - 2 - 3 = 95\n    fruit_distribution(\"100 apples and 1 oranges\",120) -> 120 - 100 - 1 = 19\n    \"\"\"",
        "unit_test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(\"5 apples and 6 oranges\",19) == 8\n    assert candidate(\"5 apples and 6 oranges\",21) == 10\n    assert candidate(\"0 apples and 1 oranges\",3) == 2\n    assert candidate(\"1 apples and 0 oranges\",3) == 2\n    assert candidate(\"2 apples and 3 oranges\",100) == 95\n    assert candidate(\"2 apples and 3 oranges\",5) == 0\n    assert candidate(\"1 apples and 100 oranges\",120) == 19",
        "ground_truth_code": "lis = list()\n    for i in s.split(' '):\n        if i.isdigit():\n            lis.append(int(i))\n    return n - sum(lis)\n",
        "generation": "def fruit_distribution(s,n):\n    \"\"\"\n    In this task, you will be given a string that represents a number of apples and oranges \n    that are distributed in a basket of fruit this basket contains \n    apples, oranges, and mango fruits. Given the string that represents the total number of \n    the oranges and apples and an integer that represent the total number of the fruits \n    in the basket return the number of the mango fruits in the basket.\n    for examble:\n    fruit_distribution(\"5 apples and 6 oranges\", 19) ->19 - 5 - 6 = 8\n    fruit_distribution(\"0 apples and 1 oranges\",3) -> 3 - 0 - 1 = 2\n    fruit_distribution(\"2 apples and 3 oranges\", 100) -> 100 - 2 - 3 = 95\n    fruit_distribution(\"100 apples and 1 oranges\",120) -> 120 - 100 - 1 = 19\n    \"\"\"\n\n    oranges_and_apples = int(s.split()[1])\n    total_fruits = n\n    mango_fruits = total_fruits - oranges_and_apples\n    return mango_fruits",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmp246van2j.py\", line 46, in <module>\\n    check(fruit_distribution)\\n  File \"/tmp/tmp246van2j.py\", line 38, in check\\n    assert candidate(\"5 apples and 6 oranges\",19) == 8\\n  File \"/tmp/tmp246van2j.py\", line 31, in fruit_distribution\\n    oranges_and_apples = int(s.split()[1])\\nValueError: invalid literal for int() with base 10: \\'apples\\'\\n'}",
        "data_source": "humaneval",
        "metadata": {
            "quality": "high",
            "llm_name": "internlm-20b"
        }
    },
    {
        "question": "Write a function to find the division of first even and odd number of a given list.",
        "unit_test": "assert div_even_odd([1,3,5,7,4,1,6,8])==4\nassert div_even_odd([1,2,3,4,5,6,7,8,9,10])==2\nassert div_even_odd([1,5,7,9,10])==10",
        "ground_truth_code": "def div_even_odd(list1):\r\n    first_even = next((el for el in list1 if el%2==0),-1)\r\n    first_odd = next((el for el in list1 if el%2!=0),-1)\r\n    return (first_even/first_odd)",
        "generation": "def div_even_odd(nums):\n    even = 0\n    odd = 0\n    for num in nums:\n        if num % 2 == 0:\n            even += num\n        else:\n            odd += num\n    return even // odd",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmpc1q1shbk.py\", line 26, in <module>\\n    assert div_even_odd([1,3,5,7,4,1,6,8])==4\\nAssertionError\\n'}",
        "data_source": "mbpp",
        "metadata": {
            "quality": "high",
            "llm_name": "internlm-20b"
        }
    },
    {
        "question": "Write a python function to check whether the given number can be represented as difference of two squares or not.",
        "unit_test": "assert dif_Square(5) == True\nassert dif_Square(10) == False\nassert dif_Square(15) == True",
        "ground_truth_code": "def dif_Square(n): \r\n    if (n % 4 != 2): \r\n        return True\r\n    return False",
        "generation": "def dif_Square(n):\n    for i in range(1, n):\n        if n - i*i == 0:\n            return True\n    return False",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmp7bt37s69.py\", line 22, in <module>\\n    assert dif_Square(5) == True\\nAssertionError\\n'}",
        "data_source": "mbpp",
        "metadata": {
            "quality": "low",
            "llm_name": "internlm-20b"
        }
    },
    {
        "question": "from typing import List\n\n\ndef parse_nested_parens(paren_string: str) -> List[int]:\n    \"\"\" Input to this function is a string represented multiple groups for nested parentheses separated by spaces.\n    For each of the group, output the deepest level of nesting of parentheses.\n    E.g. (()()) has maximum two levels of nesting while ((())) has three.\n\n    >>> parse_nested_parens('(()()) ((())) () ((())()())')\n    [2, 3, 1, 3]\n    \"\"\"",
        "unit_test": "METADATA = {\n    'author': 'jt',\n    'dataset': 'test'\n}\n\n\ndef check(candidate):\n    assert candidate('(()()) ((())) () ((())()())') == [2, 3, 1, 3]\n    assert candidate('() (()) ((())) (((())))') == [1, 2, 3, 4]\n    assert candidate('(()(())((())))') == [4]",
        "ground_truth_code": "def parse_paren_group(s):\n        depth = 0\n        max_depth = 0\n        for c in s:\n            if c == '(':\n                depth += 1\n                max_depth = max(depth, max_depth)\n            else:\n                depth -= 1\n\n        return max_depth\n\n    return [parse_paren_group(x) for x in paren_string.split(' ') if x]\n",
        "generation": "def parse_nested_parens(paren_string: str) -> List[int]:\n    \"\"\"\n    Input to this function is a string represented multiple groups for nested parentheses separated by spaces.\n    For each of the group, output the deepest level of nesting of parentheses.\n    E.g. (()()) has maximum two levels of nesting while ((())) has three.\n\n    :param paren_string: a string representing multiple groups for nested parentheses separated by spaces\n    :return: a list of integers representing the deepest level of nesting of parentheses in each group\n    \"\"\"\n    result = []\n    level = 0\n    for char in paren_string:\n        if char == '(':\n            level += 1\n        elif char == ')':\n            level -= 1\n        result.append(level)\n    return result",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmpdey4hofr.py\", line 48, in <module>\\n    check(parse_nested_parens)\\n  File \"/tmp/tmpdey4hofr.py\", line 44, in check\\n    assert candidate(\\'(()()) ((())) () ((())()())\\') == [2, 3, 1, 3]\\nAssertionError\\n'}",
        "data_source": "humaneval",
        "metadata": {
            "quality": "low",
            "llm_name": "qwen-7b"
        }
    },
    {
        "question": "def is_simple_power(x, n):\n    \"\"\"Your task is to write a function that returns true if a number x is a simple\n    power of n and false in other cases.\n    x is a simple power of n if n**int=x\n    For example:\n    is_simple_power(1, 4) => true\n    is_simple_power(2, 2) => true\n    is_simple_power(8, 2) => true\n    is_simple_power(3, 2) => false\n    is_simple_power(3, 1) => false\n    is_simple_power(5, 3) => false\n    \"\"\"",
        "unit_test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(16, 2)== True, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate(143214, 16)== False, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate(4, 2)==True, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate(9, 3)==True, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate(16, 4)==True, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate(24, 2)==False, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate(128, 4)==False, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate(12, 6)==False, \"This prints if this assert fails 1 (good for debugging!)\"\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate(1, 1)==True, \"This prints if this assert fails 2 (also good for debugging!)\"\n    assert candidate(1, 12)==True, \"This prints if this assert fails 2 (also good for debugging!)\"",
        "ground_truth_code": "if (n == 1): \n        return (x == 1) \n    power = 1\n    while (power < x): \n        power = power * n \n    return (power == x) \n",
        "generation": "def is_simple_power(x, n):\n    \"\"\"\n    Your task is to write a function that returns true if a number x is a simple\n    power of n and false in other cases.\n    x is a simple power of n if n**int=x\n    For example:\n    is_simple_power(1, 4) => true\n    is_simple_power(2, 2) => true\n    is_simple_power(8, 2) => true\n    is_simple_power(3, 2) => false\n    is_simple_power(3, 1) => false\n    is_simple_power(5, 3) => false\n    \"\"\"\n    if x == 0:\n        return False\n    if x == 1:\n        return True\n    if n == 1:\n        return False\n    if x == n:\n        return True\n    if x < n:\n        return False\n    if x % n == 0:\n        return False\n    return True",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmp41j0sp9h.py\", line 60, in <module>\\n    check(is_simple_power)\\n  File \"/tmp/tmp41j0sp9h.py\", line 46, in check\\n    assert candidate(16, 2)== True, \"This prints if this assert fails 1 (good for debugging!)\"\\nAssertionError: This prints if this assert fails 1 (good for debugging!)\\n'}",
        "data_source": "humaneval",
        "metadata": {
            "quality": "med",
            "llm_name": "qwen-7b"
        }
    },
    {
        "question": "def iscube(a):\n    '''\n    Write a function that takes an integer a and returns True \n    if this ingeger is a cube of some integer number.\n    Note: you may assume the input is always valid.\n    Examples:\n    iscube(1) ==> True\n    iscube(2) ==> False\n    iscube(-1) ==> True\n    iscube(64) ==> True\n    iscube(0) ==> True\n    iscube(180) ==> False\n    '''",
        "unit_test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(1) == True, \"First test error: \" + str(candidate(1))\n    assert candidate(2) == False, \"Second test error: \" + str(candidate(2))\n    assert candidate(-1) == True, \"Third test error: \" + str(candidate(-1))\n    assert candidate(64) == True, \"Fourth test error: \" + str(candidate(64))\n    assert candidate(180) == False, \"Fifth test error: \" + str(candidate(180))\n    assert candidate(1000) == True, \"Sixth test error: \" + str(candidate(1000))\n\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate(0) == True, \"1st edge test error: \" + str(candidate(0))\n    assert candidate(1729) == False, \"2nd edge test error: \" + str(candidate(1728))",
        "ground_truth_code": "a = abs(a)\n    return int(round(a ** (1. / 3))) ** 3 == a\n",
        "generation": "def iscube(a):\n    \"\"\"\n    Write a function that takes an integer a and returns True if this ingeger is a cube of some integer number.\n    Note: you may assume the input is always valid.\n    Examples:\n    iscube(1) == True\n    iscube(2) == False\n    iscube(-1) == True\n    iscube(64) == True\n    iscube(0) == True\n    iscube(180) == False\n    \"\"\"\n    # Check if a is a perfect cube\n    for i in range(1, int(a ** 0.5) + 1):\n        if a % i == 0:\n            return False\n    return True",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmpw3c0wfjb.py\", line 50, in <module>\\n    check(iscube)\\n  File \"/tmp/tmpw3c0wfjb.py\", line 37, in check\\n    assert candidate(1) == True, \"First test error: \" + str(candidate(1))\\nAssertionError: First test error: False\\n'}",
        "data_source": "humaneval",
        "metadata": {
            "quality": "med",
            "llm_name": "qwen-7b"
        }
    },
    {
        "question": "Write a function to count the most common words in a dictionary.",
        "unit_test": "assert count_common(['red','green','black','pink','black','white','black','eyes','white','black','orange','pink','pink','red','red','white','orange','white',\"black\",'pink','green','green','pink','green','pink','white','orange',\"orange\",'red']) == [('pink', 6), ('black', 5), ('white', 5), ('red', 4)]\nassert count_common(['one', 'two', 'three', 'four', 'five', 'one', 'two', 'one', 'three', 'one']) == [('one', 4), ('two', 2), ('three', 2), ('four', 1)]\nassert count_common(['Facebook', 'Apple', 'Amazon', 'Netflix', 'Google', 'Apple', 'Netflix', 'Amazon']) == [('Apple', 2), ('Amazon', 2), ('Netflix', 2), ('Facebook', 1)]",
        "ground_truth_code": "from collections import Counter\r\ndef count_common(words):\r\n  word_counts = Counter(words)\r\n  top_four = word_counts.most_common(4)\r\n  return (top_four)\r\n",
        "generation": "def count_common(words):\n    # Create an empty dictionary to store the count of each word\n    count_dict = {}\n    \n    # Iterate through each word in the input list\n    for word in words:\n        # If the word is already in the dictionary, increment its count\n        if word in count_dict:\n            count_dict[word] += 1\n        # Otherwise, add the word to the dictionary with a count of 1\n        else:\n            count_dict[word] = 1\n    \n    # Find the word with the highest count and return it\n    max_count = 0\n    most_common_word = None\n    for word, count in count_dict.items():\n        if count > max_count:\n            max_count = count\n            most_common_word = word\n    \n    return most_common_word, count_dict",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmpv29l4wbz.py\", line 39, in <module>\\n    assert count_common([\\'red\\',\\'green\\',\\'black\\',\\'pink\\',\\'black\\',\\'white\\',\\'black\\',\\'eyes\\',\\'white\\',\\'black\\',\\'orange\\',\\'pink\\',\\'pink\\',\\'red\\',\\'red\\',\\'white\\',\\'orange\\',\\'white\\',\"black\",\\'pink\\',\\'green\\',\\'green\\',\\'pink\\',\\'green\\',\\'pink\\',\\'white\\',\\'orange\\',\"orange\",\\'red\\']) == [(\\'pink\\', 6), (\\'black\\', 5), (\\'white\\', 5), (\\'red\\', 4)]\\nAssertionError\\n'}",
        "data_source": "mbpp",
        "metadata": {
            "quality": "med",
            "llm_name": "qwen-7b"
        }
    },
    {
        "question": "Write a function to find the top k integers that occur most frequently from given lists of sorted and distinct integers using heap queue algorithm.",
        "unit_test": "assert func([[1, 2, 6], [1, 3, 4, 5, 7, 8], [1, 3, 5, 6, 8, 9], [2, 5, 7, 11], [1, 4, 7, 8, 12]],3)==[5, 7, 1]\nassert func([[1, 2, 6], [1, 3, 4, 5, 7, 8], [1, 3, 5, 6, 8, 9], [2, 5, 7, 11], [1, 4, 7, 8, 12]],1)==[1]\nassert func([[1, 2, 6], [1, 3, 4, 5, 7, 8], [1, 3, 5, 6, 8, 9], [2, 5, 7, 11], [1, 4, 7, 8, 12]],5)==[6, 5, 7, 8, 1]",
        "ground_truth_code": "def func(nums, k):\r\n    import collections\r\n    d = collections.defaultdict(int)\r\n    for row in nums:\r\n        for i in row:\r\n            d[i] += 1\r\n    temp = []\r\n    import heapq\r\n    for key, v in d.items():\r\n        if len(temp) < k:\r\n            temp.append((v, key))\r\n            if len(temp) == k:\r\n                heapq.heapify(temp)\r\n        else:\r\n            if v > temp[0][0]:\r\n                heapq.heappop(temp)\r\n                heapq.heappush(temp, (v, key))\r\n    result = []\r\n    while temp:\r\n        v, key = heapq.heappop(temp)\r\n        result.append(key)\r\n    return result",
        "generation": "import heapq\n\ndef func(l, k):\n    heap = []\n    for num in l:\n        heapq.heappush(heap, num)\n    result = []\n    while heap and k > 0:\n        num = heapq.heappop(heap)\n        result.append(num)\n        k -= 1\n    return result",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmpj96re2m9.py\", line 29, in <module>\\n    assert func([[1, 2, 6], [1, 3, 4, 5, 7, 8], [1, 3, 5, 6, 8, 9], [2, 5, 7, 11], [1, 4, 7, 8, 12]],3)==[5, 7, 1]\\nAssertionError\\n'}",
        "data_source": "mbpp",
        "metadata": {
            "quality": "med",
            "llm_name": "qwen-7b"
        }
    },
    {
        "question": "Write a python function to find the largest number that can be formed with the given digits.",
        "unit_test": "assert find_Max_Num([1,2,3],3) == 321\nassert find_Max_Num([4,5,6,1],4) == 6541\nassert find_Max_Num([1,2,3,9],4) == 9321",
        "ground_truth_code": "def find_Max_Num(arr,n) : \r\n    arr.sort(reverse = True) \r\n    num = arr[0] \r\n    for i in range(1,n) : \r\n        num = num * 10 + arr[i] \r\n    return num ",
        "generation": "def find_Max_Num(digits, k):\n    \"\"\"\n    :type digits: List[int]\n    :type k: int\n    :rtype: int\n    \"\"\"\n    if not digits:\n        return 0\n    digits.sort(reverse=True)\n    result = 0\n    for i in range(len(digits)-k+1):\n        result = max(result, digits[i]*10**(k-i-1))\n    return result",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmpy65rv8s8.py\", line 30, in <module>\\n    assert find_Max_Num([1,2,3],3) == 321\\nAssertionError\\n'}",
        "data_source": "mbpp",
        "metadata": {
            "quality": "med",
            "llm_name": "qwen-7b"
        }
    },
    {
        "question": "Write a function to find the n-th number in newman conway sequence.",
        "unit_test": "assert sequence(10) == 6\nassert sequence(2) == 1\nassert sequence(3) == 2",
        "ground_truth_code": "def sequence(n): \r\n\tif n == 1 or n == 2: \r\n\t\treturn 1\r\n\telse: \r\n\t\treturn sequence(sequence(n-1)) + sequence(n-sequence(n-1))",
        "generation": "def sequence(n):\n    if n <= 0:\n        return None\n    elif n == 1:\n        return 1\n    else:\n        return sequence(n-1) + sequence(n-2)",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmp3h8y11t9.py\", line 24, in <module>\\n    assert sequence(10) == 6\\n  File \"/tmp/tmp3h8y11t9.py\", line 23, in sequence\\n    return sequence(n-1) + sequence(n-2)\\n  File \"/tmp/tmp3h8y11t9.py\", line 23, in sequence\\n    return sequence(n-1) + sequence(n-2)\\n  File \"/tmp/tmp3h8y11t9.py\", line 23, in sequence\\n    return sequence(n-1) + sequence(n-2)\\n  [Previous line repeated 6 more times]\\nTypeError: unsupported operand type(s) for +: \\'int\\' and \\'NoneType\\'\\n'}",
        "data_source": "mbpp",
        "metadata": {
            "quality": "med",
            "llm_name": "qwen-7b"
        }
    },
    {
        "question": "Write a function to find nth centered hexagonal number.",
        "unit_test": "assert centered_hexagonal_number(10) == 271\nassert centered_hexagonal_number(2) == 7\nassert centered_hexagonal_number(9) == 217",
        "ground_truth_code": "def centered_hexagonal_number(n):\r\n  return 3 * n * (n - 1) + 1",
        "generation": "def centered_hexagonal_number(n):\n    if n <= 0:\n        return None\n    elif n == 1:\n        return 1\n    else:\n        # calculate the nth centered hexagonal number\n        result = (n - 1) * (6 * n**2 - 1) // 2 + 1\n        return result",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmpj4qyxrpi.py\", line 26, in <module>\\n    assert centered_hexagonal_number(10) == 271\\nAssertionError\\n'}",
        "data_source": "mbpp",
        "metadata": {
            "quality": "high",
            "llm_name": "qwen-7b"
        }
    },
    {
        "question": "from typing import List\n\n\ndef parse_nested_parens(paren_string: str) -> List[int]:\n    \"\"\" Input to this function is a string represented multiple groups for nested parentheses separated by spaces.\n    For each of the group, output the deepest level of nesting of parentheses.\n    E.g. (()()) has maximum two levels of nesting while ((())) has three.\n\n    >>> parse_nested_parens('(()()) ((())) () ((())()())')\n    [2, 3, 1, 3]\n    \"\"\"",
        "unit_test": "METADATA = {\n    'author': 'jt',\n    'dataset': 'test'\n}\n\n\ndef check(candidate):\n    assert candidate('(()()) ((())) () ((())()())') == [2, 3, 1, 3]\n    assert candidate('() (()) ((())) (((())))') == [1, 2, 3, 4]\n    assert candidate('(()(())((())))') == [4]",
        "ground_truth_code": "def parse_paren_group(s):\n        depth = 0\n        max_depth = 0\n        for c in s:\n            if c == '(':\n                depth += 1\n                max_depth = max(depth, max_depth)\n            else:\n                depth -= 1\n\n        return max_depth\n\n    return [parse_paren_group(x) for x in paren_string.split(' ') if x]\n",
        "generation": "from typing import List\n\ndef parse_nested_parens(paren_string: str) -> List[int]:\n    \"\"\"\n    Input to this function is a string represented multiple groups for nested parentheses separated by spaces.\n    For each of the group, output the deepest level of nesting of parentheses.\n    E.g. (()()) has maximum two levels of nesting while ((())) has three.\n\n    Args:\n        paren_string (str): A string represented multiple groups for nested parentheses separated by spaces.\n\n    Returns:\n        List[int]: A list of integers representing the deepest level of nesting of parentheses for each group.\n    \"\"\"\n    levels = []\n    max_level = 0\n    current_level = 0\n    for char in paren_string:\n        if char == '(':\n            current_level += 1\n            if current_level > max_level:\n                max_level = current_level\n        elif char == ')':\n            current_level -= 1\n    levels.append(max_level)\n    return levels",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmpp52knctc.py\", line 56, in <module>\\n    check(parse_nested_parens)\\n  File \"/tmp/tmpp52knctc.py\", line 52, in check\\n    assert candidate(\\'(()()) ((())) () ((())()())\\') == [2, 3, 1, 3]\\nAssertionError\\n'}",
        "data_source": "humaneval",
        "metadata": {
            "quality": "med",
            "llm_name": "qwen-14b"
        }
    },
    {
        "question": "def fruit_distribution(s,n):\n    \"\"\"\n    In this task, you will be given a string that represents a number of apples and oranges \n    that are distributed in a basket of fruit this basket contains \n    apples, oranges, and mango fruits. Given the string that represents the total number of \n    the oranges and apples and an integer that represent the total number of the fruits \n    in the basket return the number of the mango fruits in the basket.\n    for examble:\n    fruit_distribution(\"5 apples and 6 oranges\", 19) ->19 - 5 - 6 = 8\n    fruit_distribution(\"0 apples and 1 oranges\",3) -> 3 - 0 - 1 = 2\n    fruit_distribution(\"2 apples and 3 oranges\", 100) -> 100 - 2 - 3 = 95\n    fruit_distribution(\"100 apples and 1 oranges\",120) -> 120 - 100 - 1 = 19\n    \"\"\"",
        "unit_test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(\"5 apples and 6 oranges\",19) == 8\n    assert candidate(\"5 apples and 6 oranges\",21) == 10\n    assert candidate(\"0 apples and 1 oranges\",3) == 2\n    assert candidate(\"1 apples and 0 oranges\",3) == 2\n    assert candidate(\"2 apples and 3 oranges\",100) == 95\n    assert candidate(\"2 apples and 3 oranges\",5) == 0\n    assert candidate(\"1 apples and 100 oranges\",120) == 19",
        "ground_truth_code": "lis = list()\n    for i in s.split(' '):\n        if i.isdigit():\n            lis.append(int(i))\n    return n - sum(lis)\n",
        "generation": "def fruit_distribution(s,n):\n    \"\"\"\n    In this task, you will be given a string that represents a number of apples and oranges \n    that are distributed in a basket of fruit this basket contains \n    apples, oranges, and mango fruits. Given the string that represents the total number of \n    the oranges and apples and an integer that represent the total number of the fruits \n    in the basket return the number of the mango fruits in the basket.\n    for examble:\n    fruit_distribution(\"5 apples and 6 oranges\", 19) ->19 - 5 - 6 = 8\n    fruit_distribution(\"0 apples and 1 oranges\",3) -> 3 - 0 - 1 = 2\n    fruit_distribution(\"2 apples and 3 oranges\", 100) -> 100 - 2 - 3 = 95\n    fruit_distribution(\"100 apples and 1 oranges\",120) -> 120 - 100 - 1 = 19\n    \"\"\"\n    apples, oranges = map(int, s.split(\" and \"))\n    return n - apples - oranges",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmp17sa7t9f.py\", line 43, in <module>\\n    check(fruit_distribution)\\n  File \"/tmp/tmp17sa7t9f.py\", line 35, in check\\n    assert candidate(\"5 apples and 6 oranges\",19) == 8\\n  File \"/tmp/tmp17sa7t9f.py\", line 30, in fruit_distribution\\n    apples, oranges = map(int, s.split(\" and \"))\\nValueError: invalid literal for int() with base 10: \\'5 apples\\'\\n'}",
        "data_source": "humaneval",
        "metadata": {
            "quality": "med",
            "llm_name": "qwen-14b"
        }
    },
    {
        "question": "Write a function to split a string at lowercase letters.",
        "unit_test": "assert split_lowerstring(\"AbCd\")==['bC','d']\nassert split_lowerstring(\"Python\")==['y', 't', 'h', 'o', 'n']\nassert split_lowerstring(\"Programming\")==['r', 'o', 'g', 'r', 'a', 'm', 'm', 'i', 'n', 'g']",
        "ground_truth_code": "import re\r\ndef split_lowerstring(text):\r\n return (re.findall('[a-z][^a-z]*', text))",
        "generation": "def split_lowerstring(s):\n    result = []\n    start = 0\n    for i in range(len(s)):\n        if s[i].islower():\n            result.append(s[start:i])\n            start = i + 1\n    result.append(s[start:])\n    return result",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmp1_99cakg.py\", line 26, in <module>\\n    assert split_lowerstring(\"AbCd\")==[\\'bC\\',\\'d\\']\\nAssertionError\\n'}",
        "data_source": "mbpp",
        "metadata": {
            "quality": "high",
            "llm_name": "qwen-14b"
        }
    },
    {
        "question": "Write a function to find the top k integers that occur most frequently from given lists of sorted and distinct integers using heap queue algorithm.",
        "unit_test": "assert func([[1, 2, 6], [1, 3, 4, 5, 7, 8], [1, 3, 5, 6, 8, 9], [2, 5, 7, 11], [1, 4, 7, 8, 12]],3)==[5, 7, 1]\nassert func([[1, 2, 6], [1, 3, 4, 5, 7, 8], [1, 3, 5, 6, 8, 9], [2, 5, 7, 11], [1, 4, 7, 8, 12]],1)==[1]\nassert func([[1, 2, 6], [1, 3, 4, 5, 7, 8], [1, 3, 5, 6, 8, 9], [2, 5, 7, 11], [1, 4, 7, 8, 12]],5)==[6, 5, 7, 8, 1]",
        "ground_truth_code": "def func(nums, k):\r\n    import collections\r\n    d = collections.defaultdict(int)\r\n    for row in nums:\r\n        for i in row:\r\n            d[i] += 1\r\n    temp = []\r\n    import heapq\r\n    for key, v in d.items():\r\n        if len(temp) < k:\r\n            temp.append((v, key))\r\n            if len(temp) == k:\r\n                heapq.heapify(temp)\r\n        else:\r\n            if v > temp[0][0]:\r\n                heapq.heappop(temp)\r\n                heapq.heappush(temp, (v, key))\r\n    result = []\r\n    while temp:\r\n        v, key = heapq.heappop(temp)\r\n        result.append(key)\r\n    return result",
        "generation": "import heapq\n\ndef topKFrequent(nums, k):\n    \"\"\"\n    :type nums: List[int]\n    :type k: int\n    :rtype: List[int]\n    \"\"\"\n    count = {}\n    for num in nums:\n        if num in count:\n            count[num] += 1\n        else:\n            count[num] = 1\n    heap = []\n    for num, freq in count.items():\n        heapq.heappush(heap, (freq, num))\n    res = []\n    while heap and len(res) < k:\n        freq, num = heapq.heappop(heap)\n        res.append(num)\n    return res",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmp1py8mv49.py\", line 39, in <module>\\n    assert func([[1, 2, 6], [1, 3, 4, 5, 7, 8], [1, 3, 5, 6, 8, 9], [2, 5, 7, 11], [1, 4, 7, 8, 12]],3)==[5, 7, 1]\\nNameError: name \\'func\\' is not defined\\n'}",
        "data_source": "mbpp",
        "metadata": {
            "quality": "high",
            "llm_name": "qwen-14b"
        }
    },
    {
        "question": "Write a function to find frequency of the elements in a given list of lists using collections module.",
        "unit_test": "assert freq_element([[1, 2, 3, 2], [4, 5, 6, 2], [7, 1, 9, 5]])==({2: 3, 1: 2, 5: 2, 3: 1, 4: 1, 6: 1, 7: 1, 9: 1})\nassert freq_element([[1,2,3,4],[5,6,7,8],[9,10,11,12]])==({1: 1, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: 1, 9: 1, 10: 1, 11: 1, 12: 1})\nassert freq_element([[15,20,30,40],[80,90,100,110],[30,30,80,90]])==({30: 3, 80: 2, 90: 2, 15: 1, 20: 1, 40: 1, 100: 1, 110: 1})",
        "ground_truth_code": "from collections import Counter\r\nfrom itertools import chain\r\ndef freq_element(nums):\r\n  result = Counter(chain.from_iterable(nums))\r\n  return result",
        "generation": "from collections import Counter\n\ndef freq_element(list_of_lists):\n    freq_dict = {}\n    for inner_list in list_of_lists:\n        inner_dict = Counter(inner_list)\n        freq_dict.update(inner_dict)\n    return freq_dict",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmpbq93a996.py\", line 25, in <module>\\n    assert freq_element([[1, 2, 3, 2], [4, 5, 6, 2], [7, 1, 9, 5]])==({2: 3, 1: 2, 5: 2, 3: 1, 4: 1, 6: 1, 7: 1, 9: 1})\\nAssertionError\\n'}",
        "data_source": "mbpp",
        "metadata": {
            "quality": "high",
            "llm_name": "qwen-14b"
        }
    },
    {
        "question": "Write a python function to find the last digit when factorial of a divides factorial of b.",
        "unit_test": "assert compute_Last_Digit(2,4) == 2\nassert compute_Last_Digit(6,8) == 6\nassert compute_Last_Digit(1,2) == 2",
        "ground_truth_code": "def compute_Last_Digit(A,B): \r\n    variable = 1\r\n    if (A == B): \r\n        return 1\r\n    elif ((B - A) >= 5):  \r\n        return 0\r\n    else:   \r\n        for i in range(A + 1,B + 1): \r\n            variable = (variable * (i % 10)) % 10\r\n        return variable % 10",
        "generation": "def compute_Last_Digit(a, b):\n    \"\"\"\n    Compute the last digit when factorial of a divides factorial of b.\n    \n    Args:\n    a (int): The first integer.\n    b (int): The second integer.\n    \n    Returns:\n    int: The last digit of the result.\n    \"\"\"\n    if a == 0 or b == 0:\n        return 1\n    \n    a_factorial = 1\n    b_factorial = 1\n    \n    for i in range(1, a + 1):\n        a_factorial *= i\n        if a_factorial % 10 == 0:\n            break\n    \n    for i in range(1, b + 1):\n        b_factorial *= i\n        if b_factorial % 10 == 0:\n            break\n    \n    return a_factorial % 10",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmpsmdkbdr0.py\", line 46, in <module>\\n    assert compute_Last_Digit(6,8) == 6\\nAssertionError\\n'}",
        "data_source": "mbpp",
        "metadata": {
            "quality": "high",
            "llm_name": "qwen-14b"
        }
    },
    {
        "question": "Write a function to find the n-th number in newman conway sequence.",
        "unit_test": "assert sequence(10) == 6\nassert sequence(2) == 1\nassert sequence(3) == 2",
        "ground_truth_code": "def sequence(n): \r\n\tif n == 1 or n == 2: \r\n\t\treturn 1\r\n\telse: \r\n\t\treturn sequence(sequence(n-1)) + sequence(n-sequence(n-1))",
        "generation": "def sequence(n):\n    if n == 1:\n        return 1\n    elif n == 2:\n        return 1\n    else:\n        return sequence(sequence(n-1)) + sequence(n-2)",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmp0cfqgaja.py\", line 24, in <module>\\n    assert sequence(10) == 6\\n  File \"/tmp/tmp0cfqgaja.py\", line 23, in sequence\\n    return sequence(sequence(n-1)) + sequence(n-2)\\n  File \"/tmp/tmp0cfqgaja.py\", line 23, in sequence\\n    return sequence(sequence(n-1)) + sequence(n-2)\\n  File \"/tmp/tmp0cfqgaja.py\", line 23, in sequence\\n    return sequence(sequence(n-1)) + sequence(n-2)\\n  [Previous line repeated 995 more times]\\n  File \"/tmp/tmp0cfqgaja.py\", line 18, in sequence\\n    if n == 1:\\nRecursionError: maximum recursion depth exceeded in comparison\\n'}",
        "data_source": "mbpp",
        "metadata": {
            "quality": "high",
            "llm_name": "qwen-14b"
        }
    },
    {
        "question": "Write a function to find nth centered hexagonal number.",
        "unit_test": "assert centered_hexagonal_number(10) == 271\nassert centered_hexagonal_number(2) == 7\nassert centered_hexagonal_number(9) == 217",
        "ground_truth_code": "def centered_hexagonal_number(n):\r\n  return 3 * n * (n - 1) + 1",
        "generation": "def centered_hexagonal_number(n):\n    \"\"\"\n    Find nth centered hexagonal number.\n    \"\"\"\n    return (3*n**2 - n) // 2",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmp4tdretmf.py\", line 22, in <module>\\n    assert centered_hexagonal_number(10) == 271\\nAssertionError\\n'}",
        "data_source": "mbpp",
        "metadata": {
            "quality": "low",
            "llm_name": "qwen-14b"
        }
    },
    {
        "question": "Write a function to find the kth element in the given array.",
        "unit_test": "assert kth_element([12,3,5,7,19], 5, 2) == 3\nassert kth_element([17,24,8,23], 4, 3) == 8\nassert kth_element([16,21,25,36,4], 5, 4) == 36",
        "ground_truth_code": "def kth_element(arr, n, k):\r\n  for i in range(n):\r\n    for j in range(0, n-i-1):\r\n      if arr[j] > arr[j+1]:\r\n        arr[j], arr[j+1] == arr[j+1], arr[j]\r\n  return arr[k-1]",
        "generation": "def kth_element(arr, k, mod):\n    \"\"\"\n    Find the kth element in the given array.\n\n    Args:\n        arr (list): The input array.\n        k (int): The index of the element to be found.\n        mod (int): The modulus to apply to the index.\n\n    Returns:\n        int: The kth element in the array.\n    \"\"\"\n    return arr[(k % mod) - 1]",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmpe6l8ve5j.py\", line 30, in <module>\\n    assert kth_element([12,3,5,7,19], 5, 2) == 3\\nAssertionError\\n'}",
        "data_source": "mbpp",
        "metadata": {
            "quality": "med",
            "llm_name": "qwen-14b"
        }
    },
    {
        "question": "def string_sequence(n: int) -> str:\n    \"\"\" Return a string containing space-delimited numbers starting from 0 upto n inclusive.\n    >>> string_sequence(0)\n    '0'\n    >>> string_sequence(5)\n    '0 1 2 3 4 5'\n    \"\"\"",
        "unit_test": "METADATA = {\n    'author': 'jt',\n    'dataset': 'test'\n}\n\n\ndef check(candidate):\n    assert candidate(0) == '0'\n    assert candidate(3) == '0 1 2 3'\n    assert candidate(10) == '0 1 2 3 4 5 6 7 8 9 10'",
        "ground_truth_code": "return ' '.join([str(x) for x in range(n + 1)])\n",
        "generation": "def string_sequence(n: int) -> str:\n    \"\"\" Return a string containing space-delimited numbers starting from 0 upto n inclusive.\n    \n    >>> string_sequence(0)\n    '0'\n    >>> string_sequence(5)\n    '0 1 2 3 4 5'\n    \"\"\"\n    result = \"\"\n    for i in range(n + 1):\n        result += str(i) + \" \"\n    return result",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmptwv6ob4b.py\", line 42, in <module>\\n    check(string_sequence)\\n  File \"/tmp/tmptwv6ob4b.py\", line 38, in check\\n    assert candidate(0) == \\'0\\'\\nAssertionError\\n'}",
        "data_source": "humaneval",
        "metadata": {
            "quality": "high",
            "llm_name": "baichuan2-13b"
        }
    },
    {
        "question": "def fizz_buzz(n: int):\n    \"\"\"Return the number of times the digit 7 appears in integers less than n which are divisible by 11 or 13.\n    >>> fizz_buzz(50)\n    0\n    >>> fizz_buzz(78)\n    2\n    >>> fizz_buzz(79)\n    3\n    \"\"\"",
        "unit_test": "METADATA = {}\n\n\ndef check(candidate):\n    assert candidate(50) == 0\n    assert candidate(78) == 2\n    assert candidate(79) == 3\n    assert candidate(100) == 3\n    assert candidate(200) == 6\n    assert candidate(4000) == 192\n    assert candidate(10000) == 639\n    assert candidate(100000) == 8026",
        "ground_truth_code": "ns = []\n    for i in range(n):\n        if i % 11 == 0 or i % 13 == 0:\n            ns.append(i)\n    s = ''.join(list(map(str, ns)))\n    ans = 0\n    for c in s:\n        ans += (c == '7')\n    return ans\n",
        "generation": "def fizz_buzz(n: int):\n    \"\"\"Return the number of times the digit 7 appears in integers less than n which are divisible by 11 or 13.\n    >>> fizz_buzz(50)\n    0\n    >>> fizz_buzz(78)\n    2\n    >>> fizz_buzz(79)\n    3\n    \"\"\"\n    # Your code here\n    pass\n\ncheck(fizz_buzz)",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmpopgh4ty5.py\", line 29, in <module>\\n    check(fizz_buzz)\\nNameError: name \\'check\\' is not defined\\n'}",
        "data_source": "humaneval",
        "metadata": {
            "quality": "low",
            "llm_name": "baichuan2-13b"
        }
    },
    {
        "question": "def change_base(x: int, base: int):\n    \"\"\"Change numerical base of input number x to base.\n    return string representation after the conversion.\n    base numbers are less than 10.\n    >>> change_base(8, 3)\n    '22'\n    >>> change_base(8, 2)\n    '1000'\n    >>> change_base(7, 2)\n    '111'\n    \"\"\"",
        "unit_test": "METADATA = {}\n\n\ndef check(candidate):\n    assert candidate(8, 3) == \"22\"\n    assert candidate(9, 3) == \"100\"\n    assert candidate(234, 2) == \"11101010\"\n    assert candidate(16, 2) == \"10000\"\n    assert candidate(8, 2) == \"1000\"\n    assert candidate(7, 2) == \"111\"\n    for x in range(2, 8):\n        assert candidate(x, x + 1) == str(x)",
        "ground_truth_code": "ret = \"\"\n    while x > 0:\n        ret = str(x % base) + ret\n        x //= base\n    return ret\n",
        "generation": "def change_base(x: int, base: int):\n    \"\"\"Change numerical base of input number x to base.\n    return string representation after the conversion.\n    base numbers are less than 10.\n    >>> change_base(8, 3)\n    '22'\n    >>> change_base(8, 2)\n    '1000'\n    >>> change_base(7, 2)\n    '111'\n    \"\"\"\n    if x < 0 or base < 2:\n        raise ValueError(\"Input should be positive integer\")\n    digits = \"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ\"\n    result = \"\"\n    while x > 0:\n        remainder = x % base\n        x = x // base\n        result = digits[remainder] + result\n    return result\n\ncheck(change_base)",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmpj99uh7id.py\", line 38, in <module>\\n    check(change_base)\\nNameError: name \\'check\\' is not defined\\n'}",
        "data_source": "humaneval",
        "metadata": {
            "quality": "high",
            "llm_name": "baichuan2-13b"
        }
    },
    {
        "question": "def encode_shift(s: str):\n    \"\"\"\n    returns encoded string by shifting every character by 5 in the alphabet.\n    \"\"\"\n    return \"\".join([chr(((ord(ch) + 5 - ord(\"a\")) % 26) + ord(\"a\")) for ch in s])\n\n\ndef decode_shift(s: str):\n    \"\"\"\n    takes as input string encoded with encode_shift function. Returns decoded string.\n    \"\"\"",
        "unit_test": "METADATA = {}\n\n\ndef check(candidate):\n    from random import randint, choice\n    import copy\n    import string\n\n    letters = string.ascii_lowercase\n    for _ in range(100):\n        str = ''.join(choice(letters) for i in range(randint(10, 20)))\n        encoded_str = encode_shift(str)\n        assert candidate(copy.deepcopy(encoded_str)) == str",
        "ground_truth_code": "return \"\".join([chr(((ord(ch) - 5 - ord(\"a\")) % 26) + ord(\"a\")) for ch in s])\n",
        "generation": "def decode_shift(s: str):\n    \"\"\"\n    takes as input string encoded with encode_shift function. Returns decoded string.\n    \"\"\"\n    return \"\".join([chr(((ord(ch) - 5 - ord(\"a\")) % 26) + ord(\"a\")) for ch in s])",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmp3fhcqfva.py\", line 39, in <module>\\n    check(decode_shift)\\n  File \"/tmp/tmp3fhcqfva.py\", line 35, in check\\n    encoded_str = encode_shift(str)\\nNameError: name \\'encode_shift\\' is not defined\\n'}",
        "data_source": "humaneval",
        "metadata": {
            "quality": "high",
            "llm_name": "baichuan2-13b"
        }
    },
    {
        "question": "FIX = \"\"\"\nAdd more test cases.\n\"\"\"\n\ndef vowels_count(s):\n    \"\"\"Write a function vowels_count which takes a string representing\n    a word as input and returns the number of vowels in the string.\n    Vowels in this case are 'a', 'e', 'i', 'o', 'u'. Here, 'y' is also a\n    vowel, but only when it is at the end of the given word.\n\n    Example:\n    >>> vowels_count(\"abcde\")\n    2\n    >>> vowels_count(\"ACEDY\")\n    3\n    \"\"\"",
        "unit_test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(\"abcde\") == 2, \"Test 1\"\n    assert candidate(\"Alone\") == 3, \"Test 2\"\n    assert candidate(\"key\") == 2, \"Test 3\"\n    assert candidate(\"bye\") == 1, \"Test 4\"\n    assert candidate(\"keY\") == 2, \"Test 5\"\n    assert candidate(\"bYe\") == 1, \"Test 6\"\n    assert candidate(\"ACEDY\") == 3, \"Test 7\"\n\n    # Check some edge cases that are easy to work out by hand.\n    assert True, \"This prints if this assert fails 2 (also good for debugging!)\"",
        "ground_truth_code": "vowels = \"aeiouAEIOU\"\n    n_vowels = sum(c in vowels for c in s)\n    if s[-1] == 'y' or s[-1] == 'Y':\n        n_vowels += 1\n    return n_vowels\n",
        "generation": "def vowels_count(s):\n    \"\"\"Write a function vowels_count which takes a string representing\n    a word as input and returns the number of vowels in the string.\n    Vowels in this case are 'a', 'e', 'i', 'o', 'u'. Here, 'y' is also a\n    vowel, but only when it is at the end of the given word.\n\n    Example:\n    >>> vowels_count(\"abcde\")\n    2\n    >>> vowels_count(\"ACEDY\")\n    3\n    \"\"\"\n    # Add your code here\n    s = list(s) # convert input string to a list of characters\n    vowels = ['a', 'e', 'i', 'o', 'u']\n    end_with_y = False\n    num_vowels = 0\n    for char in s:\n        if char.lower() in vowels:\n            num_vowels += 1\n        elif char.lower() == 'y' and not end_with_y:\n            end_with_y = True\n            num_vowels += 1\n        else:\n            end_with_y = False\n\n    return num_vowels\n\n# Your code will be tested with these unit tests\ndef check(candidate):\n\n    # Check some simple cases\n    assert candidate(\"abcde\") == 2, \"Test 1\"\n    assert candidate(\"Alone\") == 3, \"Test 2\"\n    assert candidate(\"key\") == 2, \"Test 3\"\n    assert candidate(\"bye\") == 1, \"Test 4\"\n    assert candidate(\"keY\") == 2, \"Test 5\"\n    assert candidate(\"bYe\") == 1, \"Test 6\"\n    assert candidate(\"ACEDY\") == 3, \"Test 7\"\n\n    # Check some edge cases that are easy to work out by hand.\n    assert True, \"This prints if this assert fails 2 (also good for debugging!)\"\n\n# This is where your code would be tested\ncheck(vowels_count)",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmpa9uxxvj0.py\", line 61, in <module>\\n    check(vowels_count)\\n  File \"/tmp/tmpa9uxxvj0.py\", line 52, in check\\n    assert candidate(\"bye\") == 1, \"Test 4\"\\nAssertionError: Test 4\\n'}",
        "data_source": "humaneval",
        "metadata": {
            "quality": "high",
            "llm_name": "baichuan2-13b"
        }
    },
    {
        "question": "def search(lst):\n    '''\n    You are given a non-empty list of positive integers. Return the greatest integer that is greater than \n    zero, and has a frequency greater than or equal to the value of the integer itself. \n    The frequency of an integer is the number of times it appears in the list.\n    If no such a value exist, return -1.\n    Examples:\n        search([4, 1, 2, 2, 3, 1]) == 2\n        search([1, 2, 2, 3, 3, 3, 4, 4, 4]) == 3\n        search([5, 5, 4, 4, 4]) == -1\n    '''",
        "unit_test": "def check(candidate):\n\n    # manually generated tests\n    assert candidate([5, 5, 5, 5, 1]) == 1\n    assert candidate([4, 1, 4, 1, 4, 4]) == 4\n    assert candidate([3, 3]) == -1\n    assert candidate([8, 8, 8, 8, 8, 8, 8, 8]) == 8\n    assert candidate([2, 3, 3, 2, 2]) == 2\n\n    # automatically generated tests\n    assert candidate([2, 7, 8, 8, 4, 8, 7, 3, 9, 6, 5, 10, 4, 3, 6, 7, 1, 7, 4, 10, 8, 1]) == 1\n    assert candidate([3, 2, 8, 2]) == 2\n    assert candidate([6, 7, 1, 8, 8, 10, 5, 8, 5, 3, 10]) == 1\n    assert candidate([8, 8, 3, 6, 5, 6, 4]) == -1\n    assert candidate([6, 9, 6, 7, 1, 4, 7, 1, 8, 8, 9, 8, 10, 10, 8, 4, 10, 4, 10, 1, 2, 9, 5, 7, 9]) == 1\n    assert candidate([1, 9, 10, 1, 3]) == 1\n    assert candidate([6, 9, 7, 5, 8, 7, 5, 3, 7, 5, 10, 10, 3, 6, 10, 2, 8, 6, 5, 4, 9, 5, 3, 10]) == 5\n    assert candidate([1]) == 1\n    assert candidate([8, 8, 10, 6, 4, 3, 5, 8, 2, 4, 2, 8, 4, 6, 10, 4, 2, 1, 10, 2, 1, 1, 5]) == 4\n    assert candidate([2, 10, 4, 8, 2, 10, 5, 1, 2, 9, 5, 5, 6, 3, 8, 6, 4, 10]) == 2\n    assert candidate([1, 6, 10, 1, 6, 9, 10, 8, 6, 8, 7, 3]) == 1\n    assert candidate([9, 2, 4, 1, 5, 1, 5, 2, 5, 7, 7, 7, 3, 10, 1, 5, 4, 2, 8, 4, 1, 9, 10, 7, 10, 2, 8, 10, 9, 4]) == 4\n    assert candidate([2, 6, 4, 2, 8, 7, 5, 6, 4, 10, 4, 6, 3, 7, 8, 8, 3, 1, 4, 2, 2, 10, 7]) == 4\n    assert candidate([9, 8, 6, 10, 2, 6, 10, 2, 7, 8, 10, 3, 8, 2, 6, 2, 3, 1]) == 2\n    assert candidate([5, 5, 3, 9, 5, 6, 3, 2, 8, 5, 6, 10, 10, 6, 8, 4, 10, 7, 7, 10, 8]) == -1\n    assert candidate([10]) == -1\n    assert candidate([9, 7, 7, 2, 4, 7, 2, 10, 9, 7, 5, 7, 2]) == 2\n    assert candidate([5, 4, 10, 2, 1, 1, 10, 3, 6, 1, 8]) == 1\n    assert candidate([7, 9, 9, 9, 3, 4, 1, 5, 9, 1, 2, 1, 1, 10, 7, 5, 6, 7, 6, 7, 7, 6]) == 1\n    assert candidate([3, 10, 10, 9, 2]) == -1",
        "ground_truth_code": "frq = [0] * (max(lst) + 1)\n    for i in lst:\n        frq[i] += 1;\n\n    ans = -1\n    for i in range(1, len(frq)):\n        if frq[i] >= i:\n            ans = i\n    \n    return ans\n",
        "generation": "def search(nums):\n    \"\"\"\n    Solution: https://leetcode.com/problems/11119-cloning-binary-search-trees-ii/\n    This solution uses a recursive function to find the largest root node with a given tree height.\n    Then, we can use this function to get the largest root node to find larger nodes.\n    \"\"\"\n    def helper(root):\n        nonans 1\n        if ans == -1:\n            return ans\n        if not ns[root]:\n            return -1\n        else if ns[root] > == 1:\n                return helper(left)\n        else:\n            return helper(right)+1\n\n    lst = [helper(i) for i in nums]\n    return max(set(lst))",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': '  File \"/tmp/tmp25ae_bwp.py\", line 24\\n    nonans 1\\n           ^\\nSyntaxError: invalid syntax\\n'}",
        "data_source": "humaneval",
        "metadata": {
            "quality": "low",
            "llm_name": "baichuan2-13b"
        }
    },
    {
        "question": "def triangle_area(a, b, c):\n    '''\n    Given the lengths of the three sides of a triangle. Return the area of\n    the triangle rounded to 2 decimal points if the three sides form a valid triangle. \n    Otherwise return -1\n    Three sides make a valid triangle when the sum of any two sides is greater \n    than the third side.\n    Example:\n    triangle_area(3, 4, 5) == 6.00\n    triangle_area(1, 2, 10) == -1\n    '''",
        "unit_test": "def check(candidate):\n\n    # Check some simple cases\n    assert candidate(3, 4, 5) == 6.00, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate(1, 2, 10) == -1\n    assert candidate(4, 8, 5) == 8.18\n    assert candidate(2, 2, 2) == 1.73\n    assert candidate(1, 2, 3) == -1\n    assert candidate(10, 5, 7) == 16.25\n    assert candidate(2, 6, 3) == -1\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate(1, 1, 1) == 0.43, \"This prints if this assert fails 2 (also good for debugging!)\"\n    assert candidate(2, 2, 10) == -1",
        "ground_truth_code": "if a + b <= c or a + c <= b or b + c <= a:\n        return -1 \n    s = (a + b + c)/2    \n    area = (s * (s - a) * (s - b) * (s - c)) ** 0.5\n    area = round(area, 2)\n    return area\n",
        "generation": "def triangle_area(a, b, c):\n    '''\n    Given the lengths of the three sides of a triangle. Return the area of\n    the triangle rounded to 2 decimal points if the three sides form a valid triangle. \n    Otherwise return -1\n    Three sides make a valid triangle when the sum of any two sides is greater \n    than the third side.\n    Example:\n    triangle_area(3, 4, 5) == 6.00\n    triangle_area(1, 2, 10) == -1\n    '''\n    if (a + b > c) and (a + c > b) and (b + c > a):\n        return round(0.5 * abs((a + b + c) ** 2 - 2 * a * b - 2 * a * c - 2 * b * c), 2)\n    else:\n        return -1\n\n# Check the function with unit test\ndef check(candidate):\n\n    # Check some simple cases\n    assert candidate(3, 4, 5) == 6.00, \"This prints if this assert fails 1 (good for debugging!)\"\n    assert candidate(1, 2, 10) == -1\n    assert candidate(4, 8, 5) == 8.18\n    assert candidate(2, 2, 2) == 1.73\n    assert candidate(1, 2, 3) == -1\n    assert candidate(10, 5, 7) == 16.25\n    assert candidate(2, 6, 3) == -1\n\n    # Check some edge cases that are easy to work out by hand.\n    assert candidate(1, 1, 1) == 0.43, \"This prints if this assert fails 2 (also good for debugging!)\"\n    assert candidate(2, 2, 10) == -1",
        "exec_rest": "{'status': 'NOT PASSED', 'detail': 'Traceback (most recent call last):\\n  File \"/tmp/tmpyn8uc7sj.py\", line 64, in <module>\\n    check(triangle_area)\\n  File \"/tmp/tmpyn8uc7sj.py\", line 51, in check\\n    assert candidate(3, 4, 5) == 6.00, \"This prints if this assert fails 1 (good for debugging!)\"\\nAssertionError: This prints if this assert fails 1 (good for debugging!)\\n'}",
        "data_source": "humaneval",
        "metadata": {
            "quality": "low",
            "llm_name": "baichuan2-13b"
        }
    }
]