[
  {
    "nl": "divide the values with same keys of two dictionary `d1` and `d2`",
    "cmd": "{k: (float(d2[k]) / d1[k]) for k in d2}",
    "question_id": "11840111-96",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#float"
    ],
    "clean_cmd": "{k: (float(d2[k]) / d1[k])  k  d2}",
    "canonical_cmd": "{k: (float(VAR_STR[k]) / VAR_STR[k]) for k in VAR_STR}"
  },
  {
    "nl": "divide values associated with each key in dictionary `d1` from values associated with the same key in dictionary `d2`",
    "cmd": "dict((k, float(d2[k]) / d1[k]) for k in d2)",
    "question_id": "11840111-75",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#float",
      "python.library.stdtypes#dict"
    ],
    "clean_cmd": "dict((k, float(d2[k]) / d1[k])  k  d2)",
    "canonical_cmd": "dict((k, float(VAR_STR[k]) / VAR_STR[k]) for k in VAR_STR)"
  },
  {
    "nl": "download \"http://randomsite.com/file.gz\" from http and save as \"file.gz\"",
    "cmd": "testfile = urllib.request.URLopener()\ntestfile.retrieve('http://randomsite.com/file.gz', 'file.gz')",
    "question_id": "19602931-64",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.request#urllib.request.URLopener",
      "python.library.urllib.request#urllib.request.URLopener.retrieve"
    ],
    "clean_cmd": "testfile = urllib.request.URLopener()testfile.retrieve(, )",
    "canonical_cmd": "testfile = urllib.request.URLopener()\ntestfile.retrieve('VAR_STR', 'VAR_STR')"
  },
  {
    "nl": "download file from http url \"http://randomsite.com/file.gz\" and save as \"file.gz\"",
    "cmd": "urllib.request.urlretrieve('http://randomsite.com/file.gz', 'file.gz')",
    "question_id": "19602931-43",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.request#urllib.request.urlretrieve"
    ],
    "clean_cmd": "urllib.request.urlretrieve(, )",
    "canonical_cmd": "urllib.request.urlretrieve('VAR_STR', 'VAR_STR')"
  },
  {
    "nl": "filter a Django model `MyModel` to have charfield length of max `255`",
    "cmd": "MyModel.objects.extra(where=['CHAR_LENGTH(text) > 254'])",
    "question_id": "23351183-26",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.zipfile#zipfile.ZipInfo.extra"
    ],
    "clean_cmd": "MyModel.objects.extra(where=[])",
    "canonical_cmd": "VAR_STR.objects.extra(where=['CHAR_LENGTH(text) > 254'])"
  },
  {
    "nl": "get logical xor of `a` and `b`",
    "cmd": "((a and (not b)) or ((not a) and b))",
    "question_id": "432842-3",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "((a  ( b))  (( a)  b))",
    "canonical_cmd": "VAR_STR and not VAR_STR or not VAR_STR and VAR_STR"
  },
  {
    "nl": "get the date 1 month from today",
    "cmd": "(date(2010, 12, 31) + relativedelta(months=(+ 1)))",
    "question_id": "546321-12",
    "cmd_name": "conala",
    "oracle_man": [
      "matplotlib.dates_api#matplotlib.dates.relativedelta",
      "python.library.datetime#datetime.date"
    ],
    "clean_cmd": "(date(, , ) + relativedelta(months=(+ )))",
    "canonical_cmd": "date(2010, 12, 31) + relativedelta(months=+1)"
  },
  {
    "nl": "get the date 2 months from today",
    "cmd": "(date(2010, 12, 31) + relativedelta(months=(+ 2)))",
    "question_id": "546321-62",
    "cmd_name": "conala",
    "oracle_man": [
      "matplotlib.dates_api#matplotlib.dates.relativedelta",
      "python.library.datetime#datetime.date"
    ],
    "clean_cmd": "(date(, , ) + relativedelta(months=(+ )))",
    "canonical_cmd": "date(2010, 12, 31) + relativedelta(months=+2)"
  },
  {
    "nl": "call a Python script \"test1.py\"",
    "cmd": "subprocess.call('test1.py', shell=True)",
    "question_id": "1186789-34",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.subprocess#subprocess.call"
    ],
    "clean_cmd": "subprocess.call(, shell=True)",
    "canonical_cmd": "subprocess.call('VAR_STR', shell=True)"
  },
  {
    "nl": "check if object `obj` has attribute 'attr_name'",
    "cmd": "hasattr(obj, 'attr_name')",
    "question_id": "843277-70",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#hasattr"
    ],
    "clean_cmd": "hasattr(obj, )",
    "canonical_cmd": "hasattr(VAR_STR, 'VAR_STR')"
  },
  {
    "nl": "slice list `[1, 2, 3, 4, 5, 6, 7]` into lists of two elements each",
    "cmd": "list(grouper(2, [1, 2, 3, 4, 5, 6, 7]))",
    "question_id": "2231663-53",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list",
      "pandas.reference.api.pandas.grouper"
    ],
    "clean_cmd": "list(grouper(, [, , , , , , ]))",
    "canonical_cmd": "list(grouper(2, [VAR_STR]))"
  },
  {
    "nl": "Slicing a list into a list of sub-lists",
    "cmd": "[input[i:i + n] for i in range(0, len(input), n)]",
    "question_id": "2231663-54",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len",
      "python.library.functions#range"
    ],
    "clean_cmd": "[input[i:i + n]  i  range(, len(input), n)]",
    "canonical_cmd": "[input[i:i + n] for i in range(0, len(input), n)]"
  },
  {
    "nl": "fetch smilies matching regex pattern '(?::|;|=)(?:-)?(?:\\\\)|\\\\(|D|P)' in string `s`",
    "cmd": "re.findall('(?::|;|=)(?:-)?(?:\\\\)|\\\\(|D|P)', s)",
    "question_id": "14571103-73",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "re.findall(, s)",
    "canonical_cmd": "re.findall('VAR_STR', VAR_STR)"
  },
  {
    "nl": "Get data of dataframe `df` where the sum of column 'X' grouped by column 'User' is equal to 0",
    "cmd": "df.loc[df.groupby('User')['X'].transform(sum) == 0]",
    "question_id": "27868020-14",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.loc",
      "pandas.reference.api.pandas.dataframe.groupby",
      "pandas.reference.api.pandas.dataframe.transform"
    ],
    "clean_cmd": "df.loc[df.groupby()[].transform(sum) == ]",
    "canonical_cmd": "VAR_STR.loc[VAR_STR.groupby('VAR_STR')['VAR_STR'].transform(sum) == 0]"
  },
  {
    "nl": "Get data from dataframe `df` where column 'X' is equal to 0",
    "cmd": "df.groupby('User')['X'].transform(sum) == 0",
    "question_id": "27868020-52",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.groupby",
      "pandas.reference.api.pandas.dataframe.transform"
    ],
    "clean_cmd": "df.groupby()[].transform(sum) == ",
    "canonical_cmd": "VAR_STR.groupby('User')['VAR_STR'].transform(sum) == 0"
  },
  {
    "nl": "webbrowser open url 'http://example.com'",
    "cmd": "webbrowser.open('http://example.com')",
    "question_id": "4302027-39",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.webbrowser#webbrowser.open"
    ],
    "clean_cmd": "webbrowser.open()",
    "canonical_cmd": "webbrowser.open('VAR_STR')"
  },
  {
    "nl": "Access environment variable \"HOME\"",
    "cmd": "os.environ['HOME']",
    "question_id": "4906977-61",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "os.environ[]",
    "canonical_cmd": "os.environ['VAR_STR']"
  },
  {
    "nl": "get value of environment variable \"HOME\"",
    "cmd": "os.environ['HOME']",
    "question_id": "4906977-33",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "os.environ[]",
    "canonical_cmd": "os.environ['VAR_STR']"
  },
  {
    "nl": "print all environment variables",
    "cmd": "print(os.environ)",
    "question_id": "4906977-98",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "print(os.environ)",
    "canonical_cmd": "print(os.environ)"
  },
  {
    "nl": "get all environment variables",
    "cmd": "os.environ",
    "question_id": "4906977-9",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "os.environ",
    "canonical_cmd": "os.environ"
  },
  {
    "nl": "get value of the environment variable 'KEY_THAT_MIGHT_EXIST' with default value `default_value`",
    "cmd": "print(os.getenv('KEY_THAT_MIGHT_EXIST', default_value))",
    "question_id": "4906977-77",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.getenv"
    ],
    "clean_cmd": "print(os.getenv(, default_value))",
    "canonical_cmd": "print(os.getenv('VAR_STR', VAR_STR))"
  },
  {
    "nl": "sort dictionary `dictionary` in ascending order by its values",
    "cmd": "sorted(list(dictionary.items()), key=operator.itemgetter(1))",
    "question_id": "674509-17",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.operator#operator.itemgetter",
      "python.library.functions#sorted",
      "python.library.functions#list",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "sorted(list(dictionary.items()), key=operator.itemgetter())",
    "canonical_cmd": "sorted(list(VAR_STR.items()), key=operator.itemgetter(1))"
  },
  {
    "nl": "iterate over a python dictionary, ordered by values",
    "cmd": "sorted(list(dictionary.items()), key=lambda x: x[1])",
    "question_id": "674509-45",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.functions#list",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "sorted(list(dictionary.items()), key= x: x[])",
    "canonical_cmd": "sorted(list(dictionary.items()), key=lambda x: x[1])"
  },
  {
    "nl": "pandas split strings in column 'stats' by ',' into columns in dataframe `df`",
    "cmd": "df['stats'].str[1:-1].str.split(',', expand=True).astype(float)",
    "question_id": "29370211-80",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "df[].str[:-].str.split(, expand=True).astype(float)",
    "canonical_cmd": "VAR_STR['VAR_STR'].str[1:-1].str.split('VAR_STR', expand=True).astype(float)"
  },
  {
    "nl": "split string in column 'stats' by ',' into separate columns in dataframe `df`",
    "cmd": "df['stats'].str[1:-1].str.split(',').apply(pd.Series).astype(float)",
    "question_id": "29370211-55",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.split",
      "pandas.reference.api.pandas.series.astype",
      "pandas.reference.api.pandas.series.apply"
    ],
    "clean_cmd": "df[].str[:-].str.split().apply(pd.Series).astype(float)",
    "canonical_cmd": "VAR_STR['VAR_STR'].str[1:-1].str.split('VAR_STR').apply(pd.Series).astype(float)"
  },
  {
    "nl": "Remove characters \"!@#$\" from a string `line`",
    "cmd": "line.translate(None, '!@#$')",
    "question_id": "3939361-35",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.translate"
    ],
    "clean_cmd": "line.translate(None, )",
    "canonical_cmd": "VAR_STR.translate(None, 'VAR_STR')"
  },
  {
    "nl": "Remove characters \"!@#$\" from a string `line`",
    "cmd": "line = re.sub('[!@#$]', '', line)",
    "question_id": "3939361-33",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "line = re.sub(, , line)",
    "canonical_cmd": "VAR_STR = re.sub('[!@#$]', '', VAR_STR)"
  },
  {
    "nl": "Remove string \"1\" from string `string`",
    "cmd": "string.replace('1', '')",
    "question_id": "3939361-18",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.replace"
    ],
    "clean_cmd": "string.replace(, )",
    "canonical_cmd": "VAR_STR.replace('VAR_STR', '')"
  },
  {
    "nl": "Remove characters in '!@#$' from a string `line`",
    "cmd": "line = line.translate(string.maketrans('', ''), '!@#$')",
    "question_id": "3939361-89",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.maketrans",
      "python.library.stdtypes#str.translate"
    ],
    "clean_cmd": "line = line.translate(string.maketrans(, ), )",
    "canonical_cmd": "VAR_STR = VAR_STR.translate(string.maketrans('', ''), 'VAR_STR')"
  },
  {
    "nl": "subtract 1 hour and 10 minutes from time object `t`",
    "cmd": "(t - datetime.timedelta(hours=1, minutes=10))",
    "question_id": "14043934-80",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.datetime#datetime.timedelta"
    ],
    "clean_cmd": "(t - datetime.timedelta(hours=, minutes=))",
    "canonical_cmd": "VAR_STR - datetime.timedelta(hours=1, minutes=10)"
  },
  {
    "nl": "subtract 5 hours from the time object `dt`",
    "cmd": "dt -= datetime.timedelta(hours=5)",
    "question_id": "14043934-42",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.datetime#datetime.timedelta"
    ],
    "clean_cmd": "dt -= datetime.timedelta(hours=)",
    "canonical_cmd": "VAR_STR -= datetime.timedelta(hours=5)"
  },
  {
    "nl": "generate a 12-digit random number",
    "cmd": "random.randint(100000000000, 999999999999)",
    "question_id": "13496087-84",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.random#random.randint"
    ],
    "clean_cmd": "random.randint(, )",
    "canonical_cmd": "random.randint(100000000000, 999999999999)"
  },
  {
    "nl": "generate a 12-digit random number",
    "cmd": "'%0.12d' % random.randint(0, 999999999999)",
    "question_id": "13496087-95",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.random#random.randint"
    ],
    "clean_cmd": " % random.randint(, )",
    "canonical_cmd": "'%0.12d' % random.randint(0, 999999999999)"
  },
  {
    "nl": "Parsing webpage 'http://www.google.com/' using BeautifulSoup",
    "cmd": "page = urllib.request.urlopen('http://www.google.com/')\nsoup = BeautifulSoup(page)",
    "question_id": "11709079-81",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.request#urllib.request.urlopen"
    ],
    "clean_cmd": "page = urllib.request.urlopen()soup = BeautifulSoup(page)",
    "canonical_cmd": "page = urllib.request.urlopen('VAR_STR')\nsoup = BeautifulSoup(page)"
  },
  {
    "nl": "make a window `root` jump to the front",
    "cmd": "root.lift()",
    "question_id": "1892339-46",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "root.lift()",
    "canonical_cmd": "VAR_STR.lift()"
  },
  {
    "nl": "print number `value` as thousands separators",
    "cmd": "'{:,}'.format(value)",
    "question_id": "1823058-26",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#format"
    ],
    "clean_cmd": ".format(value)",
    "canonical_cmd": "\"\"\"{:,}\"\"\".format(VAR_STR)"
  },
  {
    "nl": "move an x-axis label to the top of a plot `ax` in matplotlib",
    "cmd": "ax.xaxis.set_label_position('top')",
    "question_id": "14406214-5",
    "cmd_name": "conala",
    "oracle_man": [
      "matplotlib._as_gen.matplotlib.axis.xaxis.set_label_position"
    ],
    "clean_cmd": "ax.xaxis.set_label_position()",
    "canonical_cmd": "VAR_STR.xaxis.set_label_position('top')"
  },
  {
    "nl": "move x-axis to the top of a plot `ax`",
    "cmd": "ax.xaxis.tick_top()",
    "question_id": "14406214-43",
    "cmd_name": "conala",
    "oracle_man": [
      "matplotlib._as_gen.matplotlib.axis.xaxis.tick_top"
    ],
    "clean_cmd": "ax.xaxis.tick_top()",
    "canonical_cmd": "VAR_STR.xaxis.tick_top()"
  },
  {
    "nl": "check if array `b` contains all elements of array `a`",
    "cmd": "numpy.in1d(b, a).all()",
    "question_id": "10565598-66",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.in1d",
      "python.library.functions#all"
    ],
    "clean_cmd": "numpy.in1d(b, a).all()",
    "canonical_cmd": "numpy.in1d(VAR_STR, VAR_STR).all()"
  },
  {
    "nl": "convert decimal 8 to a list of its binary values",
    "cmd": "list('{0:0b}'.format(8))",
    "question_id": "13557937-8",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#format",
      "python.library.functions#list"
    ],
    "clean_cmd": "list(.format())",
    "canonical_cmd": "list('{0:0b}'.format(8))"
  },
  {
    "nl": "convert decimal integer 8 to a list of its binary values as elements",
    "cmd": "[int(x) for x in list('{0:0b}'.format(8))]",
    "question_id": "13557937-9",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#int",
      "python.library.functions#format",
      "python.library.functions#list"
    ],
    "clean_cmd": "[int(x)  x  list(.format())]",
    "canonical_cmd": "[int(x) for x in list('{0:0b}'.format(8))]"
  },
  {
    "nl": "print current date and time in a regular format",
    "cmd": "time.strftime('%Y-%m-%d %H:%M')",
    "question_id": "311627-76",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.time#time.strftime"
    ],
    "clean_cmd": "time.strftime()",
    "canonical_cmd": "time.strftime('%Y-%m-%d %H:%M')"
  },
  {
    "nl": "reverse a list `L`",
    "cmd": "L[::(-1)]",
    "question_id": "3940128-52",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "L[::(-)]",
    "canonical_cmd": "VAR_STR[::-1]"
  },
  {
    "nl": "reverse a list `L`",
    "cmd": "L.reverse()",
    "question_id": "3940128-21",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.audioop#audioop.reverse"
    ],
    "clean_cmd": "L.reverse()",
    "canonical_cmd": "VAR_STR.reverse()"
  },
  {
    "nl": "Calling an external command \"ls -l\"",
    "cmd": "from subprocess import call",
    "question_id": "89228-90",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "from subprocess import call",
    "canonical_cmd": "from subprocess import call"
  },
  {
    "nl": "Calling an external command \"some_command with args\"",
    "cmd": "os.system('some_command with args')",
    "question_id": "89228-91",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.system"
    ],
    "clean_cmd": "os.system()",
    "canonical_cmd": "os.system('VAR_STR')"
  },
  {
    "nl": "Calling an external command \"some_command < input_file | another_command > output_file\"",
    "cmd": "os.system('some_command < input_file | another_command > output_file')",
    "question_id": "89228-25",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.system"
    ],
    "clean_cmd": "os.system()",
    "canonical_cmd": "os.system('VAR_STR')"
  },
  {
    "nl": "Calling an external command \"some_command with args\"",
    "cmd": "stream = os.popen('some_command with args')",
    "question_id": "89228-76",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.popen"
    ],
    "clean_cmd": "stream = os.popen()",
    "canonical_cmd": "stream = os.popen('VAR_STR')"
  },
  {
    "nl": "Calling an external command \"echo Hello World\"",
    "cmd": "return_code = subprocess.call('echo Hello World', shell=True)",
    "question_id": "89228-97",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.subprocess#subprocess.call"
    ],
    "clean_cmd": "return_code = subprocess.call(, shell=True)",
    "canonical_cmd": "return_code = subprocess.call('VAR_STR', shell=True)"
  },
  {
    "nl": "Calling an external command \"ls -l\"",
    "cmd": "call(['ls', '-l'])",
    "question_id": "89228-44",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.subprocess#subprocess.call"
    ],
    "clean_cmd": "call([, ])",
    "canonical_cmd": "call(['ls', '-l'])"
  },
  {
    "nl": "find the current directory",
    "cmd": "os.path.realpath(__file__)",
    "question_id": "5137497-23",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os.path#os.path.realpath"
    ],
    "clean_cmd": "os.path.realpath(__file__)",
    "canonical_cmd": "os.path.realpath(__file__)"
  },
  {
    "nl": "get the directory name of `path`",
    "cmd": "os.path.dirname(path)",
    "question_id": "5137497-33",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os.path#os.path.dirname"
    ],
    "clean_cmd": "os.path.dirname(path)",
    "canonical_cmd": "os.VAR_STR.dirname(VAR_STR)"
  },
  {
    "nl": "get the canonical path of file `path`",
    "cmd": "os.path.realpath(path)",
    "question_id": "5137497-66",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os.path#os.path.realpath"
    ],
    "clean_cmd": "os.path.realpath(path)",
    "canonical_cmd": "os.VAR_STR.realpath(VAR_STR)"
  },
  {
    "nl": "Find name of current directory",
    "cmd": "dir_path = os.path.dirname(os.path.realpath(__file__))",
    "question_id": "5137497-91",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os.path#os.path.dirname",
      "python.library.os.path#os.path.realpath"
    ],
    "clean_cmd": "dir_path = os.path.dirname(os.path.realpath(__file__))",
    "canonical_cmd": "dir_path = os.path.dirname(os.path.realpath(__file__))"
  },
  {
    "nl": "Find the full path of current directory",
    "cmd": "full_path = os.path.realpath(__file__)",
    "question_id": "5137497-76",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os.path#os.path.realpath"
    ],
    "clean_cmd": "full_path = os.path.realpath(__file__)",
    "canonical_cmd": "full_path = os.path.realpath(__file__)"
  },
  {
    "nl": "unpack hexadecimal string `s` to a list of integer values",
    "cmd": "struct.unpack('11B', s)",
    "question_id": "14961562-66",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.struct#struct.unpack"
    ],
    "clean_cmd": "struct.unpack(, s)",
    "canonical_cmd": "struct.unpack('11B', VAR_STR)"
  },
  {
    "nl": "convert column of date objects 'DateObj' in pandas dataframe `df` to strings in new column 'DateStr'",
    "cmd": "df['DateStr'] = df['DateObj'].dt.strftime('%d%m%Y')",
    "question_id": "19738169-65",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.series.dt.strftime"
    ],
    "clean_cmd": "df[] = df[].dt.strftime()",
    "canonical_cmd": "VAR_STR['VAR_STR'] = VAR_STR['VAR_STR'].dt.strftime('%d%m%Y')"
  },
  {
    "nl": "generate all permutations of list `[1, 2, 3]` and list `[4, 5, 6]`",
    "cmd": "print(list(itertools.product([1, 2, 3], [4, 5, 6])))",
    "question_id": "104420-82",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.itertools#itertools.product",
      "python.library.functions#list"
    ],
    "clean_cmd": "print(list(itertools.product([, , ], [, , ])))",
    "canonical_cmd": "print(list(itertools.product([VAR_STR], [VAR_STR])))"
  },
  {
    "nl": "generate all permutations of a list `[1, 2, 3]`",
    "cmd": "itertools.permutations([1, 2, 3])",
    "question_id": "104420-100",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.itertools#itertools.permutations"
    ],
    "clean_cmd": "itertools.permutations([, , ])",
    "canonical_cmd": "itertools.permutations([VAR_STR])"
  },
  {
    "nl": "Exclude column names when writing dataframe `df` to a csv file `filename.csv`",
    "cmd": "df.to_csv('filename.csv', header=False)",
    "question_id": "19781609-19",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.to_csv"
    ],
    "clean_cmd": "df.to_csv(, header=False)",
    "canonical_cmd": "VAR_STR.to_csv('VAR_STR', header=False)"
  },
  {
    "nl": "How do I get rid of Python Tkinter root window?",
    "cmd": "root.destroy()",
    "question_id": "1406145-57",
    "cmd_name": "conala",
    "oracle_man": [
      "matplotlib.backend_tools_api#matplotlib.backend_tools.ToolBase.destroy"
    ],
    "clean_cmd": "root.destroy()",
    "canonical_cmd": "root.destroy()"
  },
  {
    "nl": "create a list `files` containing all files in directory '.' that starts with numbers between 0 and 9 and ends with the extension '.jpg'",
    "cmd": "files = [f for f in os.listdir('.') if re.match('[0-9]+.*\\\\.jpg', f)]",
    "question_id": "2225564-15",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.listdir",
      "python.library.re#re.match"
    ],
    "clean_cmd": "files = [f  f  os.listdir()  re.match(, f)]",
    "canonical_cmd": "VAR_STR = [f for f in os.listdir('VAR_STR') if re.match('[0-9]+.*\\\\.jpg', f)]"
  },
  {
    "nl": "convert dataframe `df` into a pivot table using column 'order' as index and values of column 'sample' as columns",
    "cmd": "df.pivot(index='order', columns='sample')",
    "question_id": "30406725-90",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.pivot"
    ],
    "clean_cmd": "df.pivot(index=, columns=)",
    "canonical_cmd": "VAR_STR.pivot(index='VAR_STR', columns='VAR_STR')"
  },
  {
    "nl": "python pickle/unpickle a list to/from a file 'afile'",
    "cmd": "pickle.load(open('afile', 'rb'))",
    "question_id": "18229082-25",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.pickle#pickle.load",
      "python.library.urllib.request#open"
    ],
    "clean_cmd": "pickle.load(open(, ))",
    "canonical_cmd": "pickle.load(open('VAR_STR', 'rb'))"
  },
  {
    "nl": "Extract first two substrings in string `phrase` that end in `.`, `?` or `!`",
    "cmd": "re.match('(.*?[.?!](?:\\\\s+.*?[.?!]){0,1})', phrase).group(1)",
    "question_id": "3329386-17",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.match",
      "python.library.re#re.Match.group"
    ],
    "clean_cmd": "re.match(, phrase).group()",
    "canonical_cmd": "re.match('(.*?[.?!](?:\\\\s+.*?[.?!]){0,1})', VAR_STR).group(1)"
  },
  {
    "nl": "how to write a unicode csv in Python 2.7",
    "cmd": "self.writer.writerow([str(s).encode('utf-8') for s in row])",
    "question_id": "22733642-1",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str",
      "python.library.stdtypes#str.encode",
      "python.library.csv#csv.csvwriter.writerow"
    ],
    "clean_cmd": "self.writer.writerow([str(s).encode()  s  row])",
    "canonical_cmd": "self.writer.writerow([str(s).encode('utf-8') for s in row])"
  },
  {
    "nl": "calculat the difference between each row and the row previous to it in dataframe `data`",
    "cmd": "data.set_index('Date').diff()",
    "question_id": "13114512-57",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.diff",
      "pandas.reference.api.pandas.dataframe.set_index"
    ],
    "clean_cmd": "data.set_index().diff()",
    "canonical_cmd": "VAR_STR.set_index('Date').diff()"
  },
  {
    "nl": "get index of elements in array `A` that occur in another array `B`",
    "cmd": "np.where(np.in1d(A, B))[0]",
    "question_id": "28901311-21",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.in1d",
      "numpy.reference.generated.numpy.where"
    ],
    "clean_cmd": "np.where(np.in1d(A, B))[]",
    "canonical_cmd": "np.where(np.in1d(VAR_STR, VAR_STR))[0]"
  },
  {
    "nl": "query all data from table `Task` where the value of column `time_spent` is bigger than 3 hours",
    "cmd": "session.query(Task).filter(Task.time_spent > timedelta(hours=3)).all()",
    "question_id": "18102109-63",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#all",
      "python.library.functions#filter",
      "pandas.reference.api.pandas.timedelta"
    ],
    "clean_cmd": "session.query(Task).filter(Task.time_spent &gt; timedelta(hours=)).all()",
    "canonical_cmd": "session.query(VAR_STR).filter(VAR_STR.VAR_STR > timedelta(hours=3)).all()"
  },
  {
    "nl": "get current date and time",
    "cmd": "datetime.datetime.now()",
    "question_id": "415511-22",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.datetime#datetime.datetime.now"
    ],
    "clean_cmd": "datetime.datetime.now()",
    "canonical_cmd": "datetime.datetime.now()"
  },
  {
    "nl": "get current time",
    "cmd": "datetime.datetime.now().time()",
    "question_id": "415511-29",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.datetime#datetime.datetime.now",
      "python.library.datetime#datetime.datetime.time"
    ],
    "clean_cmd": "datetime.datetime.now().time()",
    "canonical_cmd": "datetime.datetime.now().time()"
  },
  {
    "nl": "get current time in pretty format",
    "cmd": "strftime('%Y-%m-%d %H:%M:%S', gmtime())",
    "question_id": "415511-26",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.time#time.gmtime",
      "python.library.time#time.strftime"
    ],
    "clean_cmd": "strftime(, gmtime())",
    "canonical_cmd": "strftime('%Y-%m-%d %H:%M:%S', gmtime())"
  },
  {
    "nl": "get current time in string format",
    "cmd": "str(datetime.now())",
    "question_id": "415511-14",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str",
      "python.library.datetime#datetime.datetime.now"
    ],
    "clean_cmd": "str(datetime.now())",
    "canonical_cmd": "str(datetime.now())"
  },
  {
    "nl": "get current time",
    "cmd": "datetime.datetime.time(datetime.datetime.now())",
    "question_id": "415511-33",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.datetime#datetime.datetime.now",
      "python.library.datetime#datetime.datetime.time"
    ],
    "clean_cmd": "datetime.datetime.time(datetime.datetime.now())",
    "canonical_cmd": "datetime.datetime.time(datetime.datetime.now())"
  },
  {
    "nl": "adding a 1-d array `[1, 2, 3, 4, 5, 6, 7, 8, 9]` to a 3-d array `np.zeros((6, 9, 20))`",
    "cmd": "np.zeros((6, 9, 20)) + np.array([1, 2, 3, 4, 5, 6, 7, 8, 9])[(None), :, (None)]",
    "question_id": "32283692-10",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.zeros",
      "numpy.reference.generated.numpy.array"
    ],
    "clean_cmd": "np.zeros((, , )) + np.array([, , , , , , , , ])[(None), :, (None)]",
    "canonical_cmd": "np.zeros((6, 9, 20)) + np.array([VAR_STR])[(None), :, (None)]"
  },
  {
    "nl": "add array of shape `(6, 9, 20)` to array `[1, 2, 3, 4, 5, 6, 7, 8, 9]`",
    "cmd": "np.zeros((6, 9, 20)) + np.array([1, 2, 3, 4, 5, 6, 7, 8, 9]).reshape((1, 9, 1))",
    "question_id": "32283692-46",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.zeros",
      "numpy.reference.generated.numpy.array",
      "numpy.reference.generated.numpy.reshape"
    ],
    "clean_cmd": "np.zeros((, , )) + np.array([, , , , , , , , ]).reshape((, , ))",
    "canonical_cmd": "np.zeros((VAR_STR)) + np.array([VAR_STR]).reshape((1, 9, 1))"
  },
  {
    "nl": "extract first and last row of a dataframe `df`",
    "cmd": "pd.concat([df.head(1), df.tail(1)])",
    "question_id": "36542169-84",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.tail",
      "pandas.reference.api.pandas.dataframe.head",
      "pandas.reference.api.pandas.concat"
    ],
    "clean_cmd": "pd.concat([df.head(), df.tail()])",
    "canonical_cmd": "pd.concat([VAR_STR.head(1), VAR_STR.tail(1)])"
  },
  {
    "nl": "convert date string '24052010' to date object in format '%d%m%Y'",
    "cmd": "datetime.datetime.strptime('24052010', '%d%m%Y').date()",
    "question_id": "2803852-74",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.datetime#datetime.datetime.strptime",
      "python.library.datetime#datetime.datetime.date"
    ],
    "clean_cmd": "datetime.datetime.strptime(, ).date()",
    "canonical_cmd": "datetime.datetime.strptime('VAR_STR', 'VAR_STR').date()"
  },
  {
    "nl": "calculate the mean of the nonzero values' indices of dataframe `df`",
    "cmd": "np.flatnonzero(x).mean()",
    "question_id": "39719140-55",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.flatnonzero",
      "numpy.reference.generated.numpy.mean"
    ],
    "clean_cmd": "np.flatnonzero(x).mean()",
    "canonical_cmd": "np.flatnonzero(x).mean()"
  },
  {
    "nl": "Get index of numpy array `a` with another numpy array `b`",
    "cmd": "a[tuple(b)]",
    "question_id": "5508352-50",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#tuple"
    ],
    "clean_cmd": "a[tuple(b)]",
    "canonical_cmd": "VAR_STR[tuple(VAR_STR)]"
  },
  {
    "nl": "input an integer tuple from user",
    "cmd": "tuple(map(int, input().split(',')))",
    "question_id": "2233917-10",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#tuple",
      "python.library.functions#input",
      "python.library.functions#map"
    ],
    "clean_cmd": "tuple(map(int, input().split()))",
    "canonical_cmd": "tuple(map(int, input().split(',')))"
  },
  {
    "nl": "input a tuple of integers from user",
    "cmd": "tuple(int(x.strip()) for x in input().split(','))",
    "question_id": "2233917-8",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#tuple",
      "python.library.functions#input",
      "python.library.functions#int",
      "python.library.stdtypes#str.strip"
    ],
    "clean_cmd": "tuple(int(x.strip())  x  input().split())",
    "canonical_cmd": "tuple(int(x.strip()) for x in input().split(','))"
  },
  {
    "nl": "rotate x-axis text labels of plot `ax` 45 degrees",
    "cmd": "ax.set_xticklabels(ax.xaxis.get_majorticklabels(), rotation=45)",
    "question_id": "10998621-33",
    "cmd_name": "conala",
    "oracle_man": [
      "matplotlib._as_gen.matplotlib.axis.axis.get_majorticklabels",
      "matplotlib._as_gen.matplotlib.axes.axes.set_xticklabels"
    ],
    "clean_cmd": "ax.set_xticklabels(ax.xaxis.get_majorticklabels(), rotation=)",
    "canonical_cmd": "VAR_STR.set_xticklabels(VAR_STR.xaxis.get_majorticklabels(), rotation=45)"
  },
  {
    "nl": "sum the 3 largest integers in groupby by 'STNAME' and 'COUNTY_POP'",
    "cmd": "df.groupby('STNAME')['COUNTY_POP'].agg(lambda x: x.nlargest(3).sum())",
    "question_id": "40517350-9",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.groupby",
      "python.library.functions#sum",
      "pandas.reference.api.pandas.dataframe.agg",
      "pandas.reference.api.pandas.dataframe.nlargest"
    ],
    "clean_cmd": "df.groupby()[].agg( x: x.nlargest().sum())",
    "canonical_cmd": "df.groupby('VAR_STR')['VAR_STR'].agg(lambda x: x.nlargest(3).sum())"
  },
  {
    "nl": "transform time series `df` into a pivot table aggregated by column 'Close' using column `df.index.date` as index and values of column `df.index.time` as columns",
    "cmd": "pd.pivot_table(df, index=df.index.date, columns=df.index.time, values='Close')",
    "question_id": "28664103-14",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.pivot_table"
    ],
    "clean_cmd": "pd.pivot_table(df, index=df.index.date, columns=df.index.time, values=)",
    "canonical_cmd": "pd.pivot_table(VAR_STR, index=VAR_STR.index.date, columns=VAR_STR.index.time,\n    values='VAR_STR')"
  },
  {
    "nl": "Create array `a` containing integers from stdin",
    "cmd": "a.fromlist([int(val) for val in stdin.read().split()])",
    "question_id": "8192379-98",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#int",
      "python.library.array#array.array.fromlist",
      "python.library.os#os.read",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "a.fromlist([int(val)  val  stdin.read().split()])",
    "canonical_cmd": "VAR_STR.fromlist([int(val) for val in stdin.read().split()])"
  },
  {
    "nl": "reverse list `yourdata`",
    "cmd": "sorted(yourdata, reverse=True)",
    "question_id": "13237941-54",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted"
    ],
    "clean_cmd": "sorted(yourdata, reverse=True)",
    "canonical_cmd": "sorted(VAR_STR, reverse=True)"
  },
  {
    "nl": "sort list of nested dictionaries `yourdata` in reverse based on values associated with each dictionary's key 'subkey'",
    "cmd": "sorted(yourdata, key=lambda d: d.get('key', {}).get('subkey'), reverse=True)",
    "question_id": "13237941-56",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.stdtypes#dict.get"
    ],
    "clean_cmd": "sorted(yourdata, key= d: d.get(, {}).get(), reverse=True)",
    "canonical_cmd": "sorted(VAR_STR, key=lambda d: d.get('key', {}).get('subkey'), reverse=True)"
  },
  {
    "nl": "sort list of nested dictionaries `yourdata` in reverse order of 'key' and 'subkey'",
    "cmd": "yourdata.sort(key=lambda e: e['key']['subkey'], reverse=True)",
    "question_id": "13237941-14",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#list.sort"
    ],
    "clean_cmd": "yourdata.sort(key= e: e[][], reverse=True)",
    "canonical_cmd": "VAR_STR.sort(VAR_STR=lambda e: e['VAR_STR']['VAR_STR'], reverse=True)"
  },
  {
    "nl": "Draw node labels `labels` on networkx graph `G ` at position `pos`",
    "cmd": "networkx.draw_networkx_labels(G, pos, labels)",
    "question_id": "15548506-61",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "networkx.draw_networkx_labels(G, pos, labels)",
    "canonical_cmd": "networkx.draw_networkx_labels(VAR_STR, VAR_STR, VAR_STR)"
  },
  {
    "nl": "get the common prefix from comparing two absolute paths '/usr/var' and '/usr/var2/log'",
    "cmd": "os.path.commonprefix(['/usr/var', '/usr/var2/log'])",
    "question_id": "7287996-89",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os.path#os.path.commonprefix"
    ],
    "clean_cmd": "os.path.commonprefix([, ])",
    "canonical_cmd": "os.path.commonprefix(['VAR_STR', 'VAR_STR'])"
  },
  {
    "nl": "get relative path of path '/usr/var' regarding path '/usr/var/log/'",
    "cmd": "print(os.path.relpath('/usr/var/log/', '/usr/var'))",
    "question_id": "7287996-17",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os.path#os.path.relpath"
    ],
    "clean_cmd": "print(os.path.relpath(, ))",
    "canonical_cmd": "print(os.path.relpath('VAR_STR', 'VAR_STR'))"
  },
  {
    "nl": "Evaluate a nested dictionary `myobject.id.number` to get `number` if `myobject` is present with getattr",
    "cmd": "getattr(getattr(myobject, 'id', None), 'number', None)",
    "question_id": "14925239-71",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#getattr"
    ],
    "clean_cmd": "getattr(getattr(myobject, , None), , None)",
    "canonical_cmd": "getattr(getattr(VAR_STR, 'id', None), 'VAR_STR', None)"
  },
  {
    "nl": "outer product of each column of a 2d `X` array to form a 3d array `X`",
    "cmd": "np.einsum('ij,kj->jik', X, X)",
    "question_id": "41469647-32",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.einsum"
    ],
    "clean_cmd": "np.einsum(, X, X)",
    "canonical_cmd": "np.einsum('ij,kj->jik', VAR_STR, VAR_STR)"
  },
  {
    "nl": "create a list containing a four elements long tuples of permutations of binary values",
    "cmd": "itertools.product(list(range(2)), repeat=4)",
    "question_id": "32292554-35",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.itertools#itertools.product",
      "python.library.functions#range",
      "python.library.functions#list"
    ],
    "clean_cmd": "itertools.product(list(range()), repeat=)",
    "canonical_cmd": "itertools.product(list(range(2)), repeat=4)"
  },
  {
    "nl": "match regex pattern '\\\\$[0-9]+[^\\\\$]*$' on string '$1 off delicious $5 ham.'",
    "cmd": "re.match('\\\\$[0-9]+[^\\\\$]*$', '$1 off delicious $5 ham.')",
    "question_id": "3166619-87",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.match"
    ],
    "clean_cmd": "re.match(, )",
    "canonical_cmd": "re.match('VAR_STR', 'VAR_STR')"
  },
  {
    "nl": "generate 6 random numbers between 1 and 50",
    "cmd": "random.sample(range(1, 50), 6)",
    "question_id": "13628725-82",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.random#random.sample",
      "python.library.functions#range"
    ],
    "clean_cmd": "random.sample(range(, ), )",
    "canonical_cmd": "random.sample(range(1, 50), 6)"
  },
  {
    "nl": "generate six unique random numbers in the range of 1 to 49.",
    "cmd": "random.sample(range(1, 50), 6)",
    "question_id": "13628725-59",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.random#random.sample",
      "python.library.functions#range"
    ],
    "clean_cmd": "random.sample(range(, ), )",
    "canonical_cmd": "random.sample(range(1, 50), 6)"
  },
  {
    "nl": "get current utc time",
    "cmd": "datetime.utcnow()",
    "question_id": "15940280-74",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.datetime#datetime.datetime.utcnow"
    ],
    "clean_cmd": "datetime.utcnow()",
    "canonical_cmd": "datetime.utcnow()"
  },
  {
    "nl": "Change the current directory one level up",
    "cmd": "os.chdir('..')",
    "question_id": "12280143-55",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.chdir"
    ],
    "clean_cmd": "os.chdir()",
    "canonical_cmd": "os.chdir('..')"
  },
  {
    "nl": "set the font 'Purisa' of size 12 for a canvas' text item `k`",
    "cmd": "canvas.create_text(x, y, font=('Purisa', 12), text=k)",
    "question_id": "15457504-91",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "canvas.create_text(x, y, font=(, ), text=k)",
    "canonical_cmd": "canvas.create_text(x, y, font=('VAR_STR', 12), text=VAR_STR)"
  },
  {
    "nl": "create a pandas data frame from list of nested dictionaries `my_list`",
    "cmd": "pd.concat([pd.DataFrame(l) for l in my_list], axis=1).T",
    "question_id": "41946927-22",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.concat",
      "pandas.reference.api.pandas.dataframe"
    ],
    "clean_cmd": "pd.concat([pd.DataFrame(l)  l  my_list], axis=).T",
    "canonical_cmd": "pd.concat([pd.DataFrame(l) for l in VAR_STR], axis=1).T"
  },
  {
    "nl": "check if string `one` exists in the values of dictionary `d`",
    "cmd": "'one' in list(d.values())",
    "question_id": "8214932-71",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list",
      "python.library.stdtypes#dict.values"
    ],
    "clean_cmd": "  list(d.values())",
    "canonical_cmd": "'VAR_STR' in list(VAR_STR.values())"
  },
  {
    "nl": "Check if value 'one' is among the values of dictionary `d`",
    "cmd": "'one' in iter(d.values())",
    "question_id": "8214932-94",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#iter",
      "python.library.stdtypes#dict.values"
    ],
    "clean_cmd": "  iter(d.values())",
    "canonical_cmd": "'VAR_STR' in iter(VAR_STR.values())"
  },
  {
    "nl": "change legend size to 'x-small' in upper-left location",
    "cmd": "pyplot.legend(loc=2, fontsize='x-small')",
    "question_id": "7125009-76",
    "cmd_name": "conala",
    "oracle_man": [
      "matplotlib._as_gen.matplotlib.pyplot.legend",
      "pandas.reference.api.pandas.dataframe.loc"
    ],
    "clean_cmd": "pyplot.legend(loc=, fontsize=)",
    "canonical_cmd": "pyplot.legend(loc=2, fontsize='VAR_STR')"
  },
  {
    "nl": "change legend font size with matplotlib.pyplot to 6",
    "cmd": "plot.legend(loc=2, prop={'size': 6})",
    "question_id": "7125009-57",
    "cmd_name": "conala",
    "oracle_man": [
      "matplotlib.legend_api#matplotlib.legend.Legend",
      "pandas.reference.api.pandas.dataframe.loc"
    ],
    "clean_cmd": "plot.legend(loc=, prop={: })",
    "canonical_cmd": "plot.legend(loc=2, prop={'size': 6})"
  },
  {
    "nl": "remove all whitespace in a string `sentence`",
    "cmd": "sentence.replace(' ', '')",
    "question_id": "8270092-77",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.replace"
    ],
    "clean_cmd": "sentence.replace(, )",
    "canonical_cmd": "VAR_STR.replace(' ', '')"
  },
  {
    "nl": "remove all whitespace in a string `sentence`",
    "cmd": "pattern = re.compile('\\\\s+')\nsentence = re.sub(pattern, '', sentence)",
    "question_id": "8270092-68",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.compile",
      "python.library.re#re.sub"
    ],
    "clean_cmd": "pattern = re.compile()sentence = re.sub(pattern, , sentence)",
    "canonical_cmd": "pattern = re.compile('\\\\s+')\nVAR_STR = re.sub(pattern, '', VAR_STR)"
  },
  {
    "nl": "remove whitespace in string `sentence` from beginning and end",
    "cmd": "sentence.strip()",
    "question_id": "8270092-97",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.strip"
    ],
    "clean_cmd": "sentence.strip()",
    "canonical_cmd": "VAR_STR.strip()"
  },
  {
    "nl": "remove all whitespaces in string `sentence`",
    "cmd": "sentence = re.sub('\\\\s+', '', sentence, flags=re.UNICODE)",
    "question_id": "8270092-87",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "sentence = re.sub(, , sentence, flags=re.UNICODE)",
    "canonical_cmd": "VAR_STR = re.sub('\\\\s+', '', VAR_STR, flags=re.UNICODE)"
  },
  {
    "nl": "remove all whitespaces in a string `sentence`",
    "cmd": "sentence = ''.join(sentence.split())",
    "question_id": "8270092-31",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.join",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "sentence = .join(sentence.split())",
    "canonical_cmd": "VAR_STR = ''.join(VAR_STR.split())"
  },
  {
    "nl": "create dataframe `df` with content of hdf store file '/home/.../data.h5' with key of 'firstSet'",
    "cmd": "df1 = pd.read_hdf('/home/.../data.h5', 'firstSet')",
    "question_id": "14591855-94",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.read_hdf"
    ],
    "clean_cmd": "df1 = pd.read_hdf(, )",
    "canonical_cmd": "df1 = pd.read_hdf('VAR_STR', 'VAR_STR')"
  },
  {
    "nl": "read a text file 'very_Important.txt' into a string variable `str`",
    "cmd": "str = open('very_Important.txt', 'r').read()",
    "question_id": "8369219-15",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.request#open",
      "python.library.os#os.read"
    ],
    "clean_cmd": "str = open(, ).read()",
    "canonical_cmd": "VAR_STR = open('VAR_STR', 'r').read()"
  },
  {
    "nl": "handle the `urlfetch_errors ` exception for imaplib request to url `url`",
    "cmd": "urlfetch.fetch(url, deadline=10 * 60)",
    "question_id": "19445682-35",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.imaplib#imaplib.IMAP4.fetch"
    ],
    "clean_cmd": "urlfetch.fetch(url, deadline= * )",
    "canonical_cmd": "urlfetch.fetch(VAR_STR, deadline=10 * 60)"
  },
  {
    "nl": "remove the fragment identifier `#something` from a url `http://www.address.com/something#something`",
    "cmd": "urlparse.urldefrag('http://www.address.com/something#something')",
    "question_id": "6250046-45",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.parse#urllib.parse.urldefrag"
    ],
    "clean_cmd": "urlparse.urldefrag()",
    "canonical_cmd": "urlparse.urldefrag('VAR_STR')"
  },
  {
    "nl": "reverse sort items in dictionary `mydict` by value",
    "cmd": "sorted(iter(mydict.items()), key=itemgetter(1), reverse=True)",
    "question_id": "9849192-97",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.functions#iter",
      "python.library.operator#operator.itemgetter",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "sorted(iter(mydict.items()), key=itemgetter(), reverse=True)",
    "canonical_cmd": "sorted(iter(VAR_STR.items()), key=itemgetter(1), reverse=True)"
  },
  {
    "nl": "Flask get value of request variable 'firstname'",
    "cmd": "first_name = request.args.get('firstname')",
    "question_id": "13279399-43",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.webbrowser#webbrowser.get"
    ],
    "clean_cmd": "first_name = request.args.get()",
    "canonical_cmd": "first_name = request.args.get('VAR_STR')"
  },
  {
    "nl": "Flask get posted form data 'firstname'",
    "cmd": "first_name = request.form.get('firstname')",
    "question_id": "13279399-5",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.webbrowser#webbrowser.get"
    ],
    "clean_cmd": "first_name = request.form.get()",
    "canonical_cmd": "first_name = request.form.get('VAR_STR')"
  },
  {
    "nl": "read the contents of the file 'file.txt' into `txt`",
    "cmd": "txt = open('file.txt').read()",
    "question_id": "3278850-59",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.request#open",
      "python.library.os#os.read"
    ],
    "clean_cmd": "txt = open().read()",
    "canonical_cmd": "VAR_STR = open('VAR_STR').read()"
  },
  {
    "nl": "using python's datetime module, get the year that utc-11 is currently in",
    "cmd": "(datetime.datetime.utcnow() - datetime.timedelta(hours=11)).year",
    "question_id": "14043080-28",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.datetime#datetime.datetime.utcnow",
      "python.library.datetime#datetime.timedelta"
    ],
    "clean_cmd": "(datetime.datetime.utcnow() - datetime.timedelta(hours=)).year",
    "canonical_cmd": "(datetime.datetime.utcnow() - datetime.timedelta(hours=11)).year"
  },
  {
    "nl": "Filter model 'Entry' where 'id' is not equal to 3 in Django",
    "cmd": "Entry.objects.filter(~Q(id=3))",
    "question_id": "687295-63",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.logging#logging.Filter.filter"
    ],
    "clean_cmd": "Entry.objects.filter(~Q(id=))",
    "canonical_cmd": "VAR_STR.objects.filter(~Q(VAR_STR=3))"
  },
  {
    "nl": "using beautifulsoup to select div blocks within html `soup`",
    "cmd": "soup.find_all('div', class_='crBlock ')",
    "question_id": "19011613-15",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "soup.find_all(, class_=)",
    "canonical_cmd": "VAR_STR.find_all('div', class_='crBlock ')"
  },
  {
    "nl": "convert hex triplet string `rgbstr` to rgb tuple",
    "cmd": "struct.unpack('BBB', rgbstr.decode('hex'))",
    "question_id": "4296249-87",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.struct#struct.unpack",
      "python.library.stdtypes#bytearray.decode"
    ],
    "clean_cmd": "struct.unpack(, rgbstr.decode())",
    "canonical_cmd": "struct.unpack('BBB', VAR_STR.decode('hex'))"
  },
  {
    "nl": "reverse a string `a` by 2 characters at a time",
    "cmd": "\"\"\"\"\"\".join(reversed([a[i:i + 2] for i in range(0, len(a), 2)]))",
    "question_id": "5864271-27",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len",
      "python.library.functions#range",
      "python.library.functions#reversed",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": ".join(reversed([a[i:i + ]  i  range(, len(a), )]))",
    "canonical_cmd": "\"\"\"\"\"\".join(reversed([VAR_STR[i:i + 2] for i in range(0, len(VAR_STR), 2)]))"
  },
  {
    "nl": "terminate the program",
    "cmd": "sys.exit()",
    "question_id": "73663-61",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.sys#sys.exit"
    ],
    "clean_cmd": "sys.exit()",
    "canonical_cmd": "sys.exit()"
  },
  {
    "nl": "terminate the program",
    "cmd": "quit()",
    "question_id": "73663-36",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.constants#quit"
    ],
    "clean_cmd": "quit()",
    "canonical_cmd": "quit()"
  },
  {
    "nl": "Terminating a Python script with error message \"some error message\"",
    "cmd": "sys.exit('some error message')",
    "question_id": "73663-66",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.sys#sys.exit"
    ],
    "clean_cmd": "sys.exit()",
    "canonical_cmd": "sys.exit('VAR_STR')"
  },
  {
    "nl": "Mysql commit current transaction",
    "cmd": "con.commit()",
    "question_id": "21974169-23",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.msilib#msilib.CAB.commit"
    ],
    "clean_cmd": "con.commit()",
    "canonical_cmd": "con.commit()"
  },
  {
    "nl": "hide output of subprocess `['espeak', text]`",
    "cmd": "subprocess.check_output(['espeak', text], stderr=subprocess.STDOUT)",
    "question_id": "11269575-66",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.subprocess#subprocess.check_output"
    ],
    "clean_cmd": "subprocess.check_output([, text], stderr=subprocess.STDOUT)",
    "canonical_cmd": "subprocess.check_output([VAR_STR], stderr=subprocess.STDOUT)"
  },
  {
    "nl": "convert a string `s` containing a decimal to an integer",
    "cmd": "int(Decimal(s))",
    "question_id": "1094717-63",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#int",
      "python.library.decimal#decimal.Decimal"
    ],
    "clean_cmd": "int(Decimal(s))",
    "canonical_cmd": "int(Decimal(VAR_STR))"
  },
  {
    "nl": "Convert a string to integer with decimal in Python",
    "cmd": "int(s.split('.')[0])",
    "question_id": "1094717-69",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#int",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "int(s.split()[])",
    "canonical_cmd": "int(s.split('.')[0])"
  },
  {
    "nl": "elementwise product of 3d arrays `A` and `B`",
    "cmd": "np.einsum('ijk,ikl->ijl', A, B)",
    "question_id": "31957364-53",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.einsum"
    ],
    "clean_cmd": "np.einsum(, A, B)",
    "canonical_cmd": "np.einsum('ijk,ikl->ijl', VAR_STR, VAR_STR)"
  },
  {
    "nl": "run shell command 'rm -r some.file' in the background",
    "cmd": "subprocess.Popen(['rm', '-r', 'some.file'])",
    "question_id": "1196074-89",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.subprocess#subprocess.Popen"
    ],
    "clean_cmd": "subprocess.Popen([, , ])",
    "canonical_cmd": "subprocess.Popen(['rm', '-r', 'some.file'])"
  },
  {
    "nl": "round off entries in dataframe `df` column `Alabama_exp` to two decimal places, and entries in column `Credit_exp` to three decimal places",
    "cmd": "df.round({'Alabama_exp': 2, 'Credit_exp': 3})",
    "question_id": "19100540-33",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.round"
    ],
    "clean_cmd": "df.round({: , : })",
    "canonical_cmd": "VAR_STR.round({'VAR_STR': 2, 'VAR_STR': 3})"
  },
  {
    "nl": "retrieve an element from a set `s` without removing it",
    "cmd": "e = next(iter(s))",
    "question_id": "59825-84",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#iter",
      "python.library.functions#next"
    ],
    "clean_cmd": "e = next(iter(s))",
    "canonical_cmd": "e = next(iter(VAR_STR))"
  },
  {
    "nl": "Parse string `datestr` into a datetime object using format pattern '%Y-%m-%d'",
    "cmd": "dateobj = datetime.datetime.strptime(datestr, '%Y-%m-%d').date()",
    "question_id": "5868374-42",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.datetime#datetime.datetime.strptime",
      "python.library.datetime#datetime.datetime.date"
    ],
    "clean_cmd": "dateobj = datetime.datetime.strptime(datestr, ).date()",
    "canonical_cmd": "dateobj = datetime.datetime.strptime(VAR_STR, 'VAR_STR').date()"
  },
  {
    "nl": "encode a pdf file `pdf_reference.pdf` with `base64` encoding",
    "cmd": "a = open('pdf_reference.pdf', 'rb').read().encode('base64')",
    "question_id": "208894-61",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.request#open",
      "python.library.stdtypes#str.encode",
      "python.library.os#os.read"
    ],
    "clean_cmd": "a = open(, ).read().encode()",
    "canonical_cmd": "a = open('VAR_STR', 'rb').read().encode('VAR_STR')"
  },
  {
    "nl": "change current working directory to directory 'chapter3'",
    "cmd": "os.chdir('chapter3')",
    "question_id": "20796355-35",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.chdir"
    ],
    "clean_cmd": "os.chdir()",
    "canonical_cmd": "os.chdir('VAR_STR')"
  },
  {
    "nl": "change current working directory",
    "cmd": "os.chdir('C:\\\\Users\\\\username\\\\Desktop\\\\headfirstpython\\\\chapter3')",
    "question_id": "20796355-88",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.chdir"
    ],
    "clean_cmd": "os.chdir()",
    "canonical_cmd": "os.chdir('C:\\\\Users\\\\username\\\\Desktop\\\\headfirstpython\\\\chapter3')"
  },
  {
    "nl": "change current working directory",
    "cmd": "os.chdir('.\\\\chapter3')",
    "question_id": "20796355-81",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.chdir"
    ],
    "clean_cmd": "os.chdir()",
    "canonical_cmd": "os.chdir('.\\\\chapter3')"
  },
  {
    "nl": "upload file with Python Mechanize",
    "cmd": "br.form.add_file(open(filename), 'text/plain', filename)",
    "question_id": "1299855-22",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.request#open",
      "python.library.msilib#msilib.Directory.add_file"
    ],
    "clean_cmd": "br.form.add_file(open(filename), , filename)",
    "canonical_cmd": "br.form.add_file(open(filename), 'text/plain', filename)"
  },
  {
    "nl": "Match regex '[a-zA-Z][\\\\w-]*\\\\Z' on string 'A\\n'",
    "cmd": "re.match('[a-zA-Z][\\\\w-]*\\\\Z', 'A\\n')",
    "question_id": "2317134-81",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.match"
    ],
    "clean_cmd": "re.match(, )",
    "canonical_cmd": "re.match('VAR_STR', 'VAR_STR')"
  },
  {
    "nl": "match regex '[a-zA-Z][\\\\w-]*$' on string '!A_B'",
    "cmd": "re.match('[a-zA-Z][\\\\w-]*$', '!A_B')",
    "question_id": "2317134-43",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.match"
    ],
    "clean_cmd": "re.match(, )",
    "canonical_cmd": "re.match('VAR_STR', 'VAR_STR')"
  },
  {
    "nl": "Find all `div` tags whose classes has the value `comment-` in a beautiful soup object `soup`",
    "cmd": "soup.find_all('div', class_=re.compile('comment-'))",
    "question_id": "13794532-6",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.compile"
    ],
    "clean_cmd": "soup.find_all(, class_=re.compile())",
    "canonical_cmd": "VAR_STR.find_all('VAR_STR', class_=re.compile('VAR_STR'))"
  },
  {
    "nl": "Matplotlib clear the current axes.",
    "cmd": "plt.cla()",
    "question_id": "741877-37",
    "cmd_name": "conala",
    "oracle_man": [
      "matplotlib.spines_api#matplotlib.spines.Spine.cla"
    ],
    "clean_cmd": "plt.cla()",
    "canonical_cmd": "plt.cla()"
  },
  {
    "nl": "Open file 'sample.json' in read mode with encoding of 'utf-8-sig'",
    "cmd": "json.load(codecs.open('sample.json', 'r', 'utf-8-sig'))",
    "question_id": "13156395-50",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.json#json.load",
      "python.library.codecs#codecs.open"
    ],
    "clean_cmd": "json.load(codecs.open(, , ))",
    "canonical_cmd": "json.load(codecs.open('VAR_STR', 'r', 'VAR_STR'))"
  },
  {
    "nl": "load json file 'sample.json' with utf-8 bom header",
    "cmd": "json.loads(open('sample.json').read().decode('utf-8-sig'))",
    "question_id": "13156395-75",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.json#json.loads",
      "python.library.urllib.request#open",
      "python.library.json#json.JSONDecoder.decode"
    ],
    "clean_cmd": "json.loads(open().read().decode())",
    "canonical_cmd": "json.loads(open('VAR_STR').read().decode('utf-8-sig'))"
  },
  {
    "nl": "properly quit a program",
    "cmd": "sys.exit(0)",
    "question_id": "13022385-76",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.sys#sys.exit"
    ],
    "clean_cmd": "sys.exit()",
    "canonical_cmd": "sys.exit(0)"
  },
  {
    "nl": "convert string representation `s2` of binary string rep of integer to floating point number",
    "cmd": "struct.unpack('d', struct.pack('Q', int(s2, 0)))[0]",
    "question_id": "8751653-82",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.struct#struct.unpack",
      "python.library.struct#struct.pack",
      "python.library.functions#int"
    ],
    "clean_cmd": "struct.unpack(, struct.pack(, int(s2, )))[]",
    "canonical_cmd": "struct.unpack('d', struct.pack('Q', int(VAR_STR, 0)))[0]"
  },
  {
    "nl": "convert a binary '-0b1110' to a float number",
    "cmd": "float(int('-0b1110', 0))",
    "question_id": "8751653-25",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#float",
      "python.library.functions#int"
    ],
    "clean_cmd": "float(int(, ))",
    "canonical_cmd": "float(int('VAR_STR', 0))"
  },
  {
    "nl": "convert a binary `b8` to a float number",
    "cmd": "struct.unpack('d', b8)[0]",
    "question_id": "8751653-54",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.struct#struct.unpack"
    ],
    "clean_cmd": "struct.unpack(, b8)[]",
    "canonical_cmd": "struct.unpack('d', VAR_STR)[0]"
  },
  {
    "nl": "in django, check if a user is in a group 'Member'",
    "cmd": "return user.groups.filter(name='Member').exists()",
    "question_id": "4789021-9",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.logging#logging.Filter.filter",
      "python.library.zipfile#zipfile.Path.exists"
    ],
    "clean_cmd": " user.groups.filter(name=).exists()",
    "canonical_cmd": "return user.groups.filter(name='VAR_STR').exists()"
  },
  {
    "nl": "check if a user `user` is in a group from list of groups `['group1', 'group2']`",
    "cmd": "return user.groups.filter(name__in=['group1', 'group2']).exists()",
    "question_id": "4789021-88",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.logging#logging.Filter.filter",
      "python.library.zipfile#zipfile.Path.exists"
    ],
    "clean_cmd": " user.groups.filter(name__in=[, ]).exists()",
    "canonical_cmd": "return VAR_STR.groups.filter(name__in=[VAR_STR]).exists()"
  },
  {
    "nl": "find rows matching `(0,1)` in a 2 dimensional numpy array `vals`",
    "cmd": "np.where((vals == (0, 1)).all(axis=1))",
    "question_id": "25823608-56",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.where",
      "python.library.functions#all"
    ],
    "clean_cmd": "np.where((vals == (, )).all(axis=))",
    "canonical_cmd": "np.where((VAR_STR == (0, 1)).all(axis=1))"
  },
  {
    "nl": "change directory to the directory of a python script",
    "cmd": "os.chdir(os.path.dirname(__file__))",
    "question_id": "509742-28",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os.path#os.path.dirname",
      "python.library.os#os.chdir"
    ],
    "clean_cmd": "os.chdir(os.path.dirname(__file__))",
    "canonical_cmd": "os.chdir(os.path.dirname(__file__))"
  },
  {
    "nl": "export a table dataframe `df` in pyspark to csv 'mycsv.csv'",
    "cmd": "df.toPandas().to_csv('mycsv.csv')",
    "question_id": "31385363-83",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.to_csv"
    ],
    "clean_cmd": "df.toPandas().to_csv()",
    "canonical_cmd": "VAR_STR.toPandas().to_csv('VAR_STR')"
  },
  {
    "nl": "Write DataFrame `df` to csv file 'mycsv.csv'",
    "cmd": "df.write.csv('mycsv.csv')",
    "question_id": "31385363-50",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "df.write.csv()",
    "canonical_cmd": "VAR_STR.write.csv('VAR_STR')"
  },
  {
    "nl": "zip a list of tuples `[(1, 4), (2, 5), (3, 6)]` into a list of tuples according to original tuple index",
    "cmd": "zip(*[(1, 4), (2, 5), (3, 6)])",
    "question_id": "8092877-26",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip"
    ],
    "clean_cmd": "zip(*[(, ), (, ), (, )])",
    "canonical_cmd": "zip(*[VAR_STR])"
  },
  {
    "nl": "split a list of tuples `data` into sub-lists of the same tuple field using itertools",
    "cmd": "[list(group) for key, group in itertools.groupby(data, operator.itemgetter(1))]",
    "question_id": "8092877-98",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.operator#operator.itemgetter",
      "python.library.itertools#itertools.groupby",
      "python.library.functions#list"
    ],
    "clean_cmd": "[list(group)  key, group  itertools.groupby(data, operator.itemgetter())]",
    "canonical_cmd": "[list(group) for key, group in itertools.groupby(VAR_STR, operator.itemgetter(1))\n    ]"
  },
  {
    "nl": "pandas dataframe `df` column 'a' to list",
    "cmd": "df['a'].values.tolist()",
    "question_id": "23748995-25",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#memoryview.tolist"
    ],
    "clean_cmd": "df[].values.tolist()",
    "canonical_cmd": "VAR_STR['VAR_STR'].values.tolist()"
  },
  {
    "nl": "Get a list of all values in column `a` in pandas data frame `df`",
    "cmd": "df['a'].tolist()",
    "question_id": "23748995-42",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#memoryview.tolist"
    ],
    "clean_cmd": "df[].tolist()",
    "canonical_cmd": "VAR_STR['VAR_STR'].tolist()"
  },
  {
    "nl": "parse UTF-8 encoded HTML response `response` to BeautifulSoup object",
    "cmd": "soup = BeautifulSoup(response.read().decode('utf-8'))",
    "question_id": "20205455-79",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#bytearray.decode",
      "python.library.os#os.read"
    ],
    "clean_cmd": "soup = BeautifulSoup(response.read().decode())",
    "canonical_cmd": "soup = BeautifulSoup(VAR_STR.read().decode('utf-8'))"
  },
  {
    "nl": "get all the values from a numpy array `a` excluding index 3",
    "cmd": "a[np.arange(len(a)) != 3]",
    "question_id": "7429118-70",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.arange",
      "python.library.functions#len"
    ],
    "clean_cmd": "a[np.arange(len(a)) != ]",
    "canonical_cmd": "VAR_STR[np.arange(len(VAR_STR)) != 3]"
  },
  {
    "nl": "get the output of a subprocess command `echo \"foo\"` in command line",
    "cmd": "subprocess.check_output('echo \"foo\"', shell=True)",
    "question_id": "8217613-41",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.subprocess#subprocess.check_output"
    ],
    "clean_cmd": "subprocess.check_output(, shell=True)",
    "canonical_cmd": "subprocess.check_output('VAR_STR', shell=True)"
  },
  {
    "nl": "extract unique dates from time series 'Date' in dataframe `df`",
    "cmd": "df['Date'].map(lambda t: t.date()).unique()",
    "question_id": "14673394-12",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#map",
      "numpy.reference.generated.numpy.unique"
    ],
    "clean_cmd": "df[].map( t: t.date()).unique()",
    "canonical_cmd": "VAR_STR['VAR_STR'].map(lambda t: t.date()).unique()"
  },
  {
    "nl": "check if file `filename` is descendant of directory '/the/dir/'",
    "cmd": "os.path.commonprefix(['/the/dir/', os.path.realpath(filename)]) == '/the/dir/'",
    "question_id": "3328012-17",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os.path#os.path.commonprefix",
      "python.library.os.path#os.path.realpath"
    ],
    "clean_cmd": "os.path.commonprefix([, os.path.realpath(filename)]) == ",
    "canonical_cmd": "os.path.commonprefix(['VAR_STR', os.path.realpath(VAR_STR)]) == 'VAR_STR'"
  },
  {
    "nl": "create a dictionary of pairs from a list of tuples `myListOfTuples`",
    "cmd": "dict(x[1:] for x in reversed(myListOfTuples))",
    "question_id": "3457673-86",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#reversed",
      "python.library.stdtypes#dict"
    ],
    "clean_cmd": "dict(x[:]  x  reversed(myListOfTuples))",
    "canonical_cmd": "dict(x[1:] for x in reversed(VAR_STR))"
  },
  {
    "nl": "sort a list of dictionary values by 'date' in reverse order",
    "cmd": "list.sort(key=lambda item: item['date'], reverse=True)",
    "question_id": "652291-62",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#list.sort"
    ],
    "clean_cmd": "list.sort(key= item: item[], reverse=True)",
    "canonical_cmd": "list.sort(key=lambda item: item['VAR_STR'], reverse=True)"
  },
  {
    "nl": "export a pandas data frame `df` to a file `mydf.tsv` and retain the indices",
    "cmd": "df.to_csv('mydf.tsv', sep='\\t')",
    "question_id": "11041411-88",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.to_csv"
    ],
    "clean_cmd": "df.to_csv(, sep=)",
    "canonical_cmd": "VAR_STR.to_csv('VAR_STR', sep='\\t')"
  },
  {
    "nl": "Get the difference between two lists `[1, 2, 2, 2, 3]` and `[1, 2]` that may have duplicate values",
    "cmd": "Counter([1, 2, 2, 2, 3]) - Counter([1, 2])",
    "question_id": "33435418-32",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.collections#collections.Counter"
    ],
    "clean_cmd": "Counter([, , , , ]) - Counter([, ])",
    "canonical_cmd": "Counter([VAR_STR]) - Counter([VAR_STR])"
  },
  {
    "nl": "compare contents  at filehandles `file1` and `file2` using difflib",
    "cmd": "difflib.SequenceMatcher(None, file1.read(), file2.read())",
    "question_id": "977491-44",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.difflib#difflib.SequenceMatcher",
      "python.library.os#os.read"
    ],
    "clean_cmd": "difflib.SequenceMatcher(None, file1.read(), file2.read())",
    "canonical_cmd": "difflib.SequenceMatcher(None, VAR_STR.read(), VAR_STR.read())"
  },
  {
    "nl": "Get a minimum value from a list of tuples `list` with values of type `string` and `float` with nan",
    "cmd": "min(list, key=lambda x: float('inf') if math.isnan(x[1]) else x[1])",
    "question_id": "15148684-45",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.math#math.isnan",
      "python.library.functions#float",
      "python.library.functions#min"
    ],
    "clean_cmd": "min(list, key= x: float()  math.isnan(x[])  x[])",
    "canonical_cmd": "min(VAR_STR, key=lambda x: VAR_STR('inf') if math.isnan(x[1]) else x[1])"
  },
  {
    "nl": "convert list `lst` of tuples of floats to list `str_list` of tuples of strings of floats in scientific notation with eight decimal point precision",
    "cmd": "str_list = [tuple('{0:.8e}'.format(flt) for flt in sublist) for sublist in lst]",
    "question_id": "16127862-90",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#tuple",
      "python.library.functions#format"
    ],
    "clean_cmd": "str_list = [tuple(.format(flt)  flt  sublist)  sublist  lst]",
    "canonical_cmd": "VAR_STR = [tuple('{0:.8e}'.format(flt) for flt in sublist) for sublist in VAR_STR]"
  },
  {
    "nl": "convert list of sublists `lst` of floats to a list of sublists `str_list` of strings of integers in scientific notation with 8 decimal points",
    "cmd": "str_list = [['{0:.8e}'.format(flt) for flt in sublist] for sublist in lst]",
    "question_id": "16127862-87",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#format"
    ],
    "clean_cmd": "str_list = [[.format(flt)  flt  sublist]  sublist  lst]",
    "canonical_cmd": "VAR_STR = [['{0:.8e}'.format(flt) for flt in sublist] for sublist in VAR_STR]"
  },
  {
    "nl": "get element at index 0 of first row and element at index 1 of second row in array `A`",
    "cmd": "A[[0, 1], [0, 1]]",
    "question_id": "2111163-71",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "A[[, ], [, ]]",
    "canonical_cmd": "VAR_STR[[0, 1], [0, 1]]"
  },
  {
    "nl": "subset numpy array `a` by column and row, returning the values from the first row, first column and the second row, second column and the third row, first column.",
    "cmd": "a[np.arange(3), (0, 1, 0)]",
    "question_id": "2111163-33",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.arange"
    ],
    "clean_cmd": "a[np.arange(), (, , )]",
    "canonical_cmd": "VAR_STR[np.arange(3), (0, 1, 0)]"
  },
  {
    "nl": "Find all the tags `a` and `div` from Beautiful Soup object `soup`",
    "cmd": "soup.find_all(['a', 'div'])",
    "question_id": "24748445-94",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "soup.find_all([, ])",
    "canonical_cmd": "VAR_STR.find_all(['VAR_STR', 'VAR_STR'])"
  },
  {
    "nl": "Reverse a string 'hello world'",
    "cmd": "'hello world'[::(-1)]",
    "question_id": "931092-70",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[::(-)]",
    "canonical_cmd": "\"\"\"VAR_STR\"\"\"[::-1]"
  },
  {
    "nl": "Reverse list `s`",
    "cmd": "s[::(-1)]",
    "question_id": "931092-64",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "s[::(-)]",
    "canonical_cmd": "VAR_STR[::-1]"
  },
  {
    "nl": "Reverse string 'foo'",
    "cmd": "''.join(reversed('foo'))",
    "question_id": "931092-5",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#reversed",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": ".join(reversed())",
    "canonical_cmd": "\"\"\"\"\"\".join(reversed('VAR_STR'))"
  },
  {
    "nl": "Reverse a string `string`",
    "cmd": "''.join(reversed(string))",
    "question_id": "931092-74",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#reversed",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": ".join(reversed(string))",
    "canonical_cmd": "\"\"\"\"\"\".join(reversed(VAR_STR))"
  },
  {
    "nl": "Reverse a string \"foo\"",
    "cmd": "'foo'[::(-1)]",
    "question_id": "931092-65",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[::(-)]",
    "canonical_cmd": "\"\"\"VAR_STR\"\"\"[::-1]"
  },
  {
    "nl": "Reverse a string `a_string`",
    "cmd": "a_string[::(-1)]",
    "question_id": "931092-99",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "a_string[::(-)]",
    "canonical_cmd": "VAR_STR[::-1]"
  },
  {
    "nl": "Reverse a string `a_string`",
    "cmd": "def reversed_string(a_string):\n    return a_string[::(-1)]",
    "question_id": "931092-20",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": " reversed_string(a_string): a_string[::(-)]",
    "canonical_cmd": "def reversed_string(VAR_STR):\n    return VAR_STR[::-1]"
  },
  {
    "nl": "Reverse a string `s`",
    "cmd": "''.join(reversed(s))",
    "question_id": "931092-77",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#reversed",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": ".join(reversed(s))",
    "canonical_cmd": "\"\"\"\"\"\".join(reversed(VAR_STR))"
  },
  {
    "nl": "set the stdin of the process 'grep f' to be b'one\\ntwo\\nthree\\nfour\\nfive\\nsix\\n'",
    "cmd": "p = Popen(['grep', 'f'], stdout=PIPE, stdin=PIPE, stderr=STDOUT)\ngrep_stdout = p.communicate(input='one\\ntwo\\nthree\\nfour\\nfive\\nsix\\n')[0]",
    "question_id": "163542-7",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.subprocess#subprocess.Popen.communicate",
      "python.library.os#os.popen"
    ],
    "clean_cmd": "p = Popen([, ], stdout=PIPE, stdin=PIPE, stderr=STDOUT)grep_stdout = p.communicate(input=)[]",
    "canonical_cmd": "p = Popen(['grep', 'f'], stdout=PIPE, stdin=PIPE, stderr=STDOUT)\ngrep_stdout = p.communicate(input='VAR_STR')[0]"
  },
  {
    "nl": "set the stdin of the process 'grep f' to be 'one\\ntwo\\nthree\\nfour\\nfive\\nsix\\n'",
    "cmd": "p = subprocess.Popen(['grep', 'f'], stdout=subprocess.PIPE, stdin=subprocess.PIPE)\np.stdin.write('one\\ntwo\\nthree\\nfour\\nfive\\nsix\\n')\np.communicate()[0]\np.stdin.close()",
    "question_id": "163542-94",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.subprocess#subprocess.Popen",
      "python.library.subprocess#subprocess.Popen.communicate",
      "python.library.os#os.write"
    ],
    "clean_cmd": "p = subprocess.Popen([, ], stdout=subprocess.PIPE, stdin=subprocess.PIPE)p.stdin.write()p.communicate()[]p.stdin.close()",
    "canonical_cmd": "p = subprocess.Popen(['grep', 'f'], stdout=subprocess.PIPE, stdin=\n    subprocess.PIPE)\np.stdin.write('VAR_STR')\np.communicate()[0]\np.stdin.close()"
  },
  {
    "nl": "normalize a pandas dataframe `df` by row",
    "cmd": "df.div(df.sum(axis=1), axis=0)",
    "question_id": "18594469-11",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.div",
      "pandas.reference.api.pandas.dataframe.sum"
    ],
    "clean_cmd": "df.div(df.sum(axis=), axis=)",
    "canonical_cmd": "VAR_STR.div(VAR_STR.sum(axis=1), axis=0)"
  },
  {
    "nl": "convert `i` to string",
    "cmd": "str(i)",
    "question_id": "961632-89",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str"
    ],
    "clean_cmd": "str(i)",
    "canonical_cmd": "str(VAR_STR)"
  },
  {
    "nl": "convert `a` to string",
    "cmd": "a.__str__()",
    "question_id": "961632-44",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.datetime#datetime.datetime.__str__"
    ],
    "clean_cmd": "a.__str__()",
    "canonical_cmd": "VAR_STR.__str__()"
  },
  {
    "nl": "convert `a` to string",
    "cmd": "str(a)",
    "question_id": "961632-33",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str"
    ],
    "clean_cmd": "str(a)",
    "canonical_cmd": "str(VAR_STR)"
  },
  {
    "nl": "read a file from redirected stdin and save to variable `result`",
    "cmd": "result = sys.stdin.read()",
    "question_id": "27318022-32",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.read"
    ],
    "clean_cmd": "result = sys.stdin.read()",
    "canonical_cmd": "VAR_STR = sys.stdin.read()"
  },
  {
    "nl": "remove elements from an array `A` that are in array `B`",
    "cmd": "A[np.all(np.any(A - B[:, (None)], axis=2), axis=0)]",
    "question_id": "40055835-26",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.all",
      "numpy.reference.generated.numpy.any"
    ],
    "clean_cmd": "A[np.all(np.any(A - B[:, (None)], axis=), axis=)]",
    "canonical_cmd": "VAR_STR[np.all(np.any(VAR_STR - VAR_STR[:, (None)], axis=2), axis=0)]"
  },
  {
    "nl": "drop rows of dataframe `df` whose index is smaller than the value of `start_remove` or bigger than the value of`end_remove`",
    "cmd": "df.query('index < @start_remove or index > @end_remove')",
    "question_id": "41513324-64",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.query"
    ],
    "clean_cmd": "df.query()",
    "canonical_cmd": "VAR_STR.query('index < @start_remove or index > @end_remove')"
  },
  {
    "nl": "Drop the rows in pandas timeseries `df` from the row containing index `start_remove` to the row containing index `end_remove`",
    "cmd": "df.loc[(df.index < start_remove) | (df.index > end_remove)]",
    "question_id": "41513324-67",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.loc"
    ],
    "clean_cmd": "df.loc[(df.index &lt; start_remove) | (df.index &gt; end_remove)]",
    "canonical_cmd": "VAR_STR.loc[(VAR_STR.index < VAR_STR) | (VAR_STR.index > VAR_STR)]"
  },
  {
    "nl": "Get all indexes of boolean numpy array where boolean value `mask` is True",
    "cmd": "numpy.where(mask)",
    "question_id": "8218032-86",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.where"
    ],
    "clean_cmd": "numpy.where(mask)",
    "canonical_cmd": "numpy.where(VAR_STR)"
  },
  {
    "nl": "Convert CSV file `Result.csv` to Pandas dataframe using separator ' '",
    "cmd": "df.to_csv('Result.csv', index=False, sep=' ')",
    "question_id": "32533944-71",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.to_csv"
    ],
    "clean_cmd": "df.to_csv(, index=False, sep=)",
    "canonical_cmd": "df.to_csv('VAR_STR', index=False, sep=' ')"
  },
  {
    "nl": "convert a list of dictionaries `d` to pandas data frame",
    "cmd": "pd.DataFrame(d)",
    "question_id": "20638006-85",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe"
    ],
    "clean_cmd": "pd.DataFrame(d)",
    "canonical_cmd": "pd.DataFrame(VAR_STR)"
  },
  {
    "nl": "Open gzip-compressed file encoded as utf-8 'file.gz' in text mode",
    "cmd": "gzip.open('file.gz', 'rt', encoding='utf-8')",
    "question_id": "1883604-86",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.gzip#gzip.open"
    ],
    "clean_cmd": "gzip.open(, , encoding=)",
    "canonical_cmd": "gzip.open('VAR_STR', 'rt', encoding='utf-8')"
  },
  {
    "nl": "navigate to webpage given by url `http://www.python.org` using Selenium",
    "cmd": "driver.get('http://www.google.com.br')",
    "question_id": "29983106-25",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.webbrowser#webbrowser.get"
    ],
    "clean_cmd": "driver.get()",
    "canonical_cmd": "driver.get('http://www.google.com.br')"
  },
  {
    "nl": "check if \"blah\" is in string `somestring`",
    "cmd": "if ('blah' not in somestring):\n    pass",
    "question_id": "3437059-20",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": " (   somestring):",
    "canonical_cmd": "if 'VAR_STR' not in VAR_STR:\n    pass"
  },
  {
    "nl": "check if string `needle` is in `haystack`",
    "cmd": "if (needle in haystack):\n    pass",
    "question_id": "3437059-17",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": " (needle  haystack):",
    "canonical_cmd": "if VAR_STR in VAR_STR:\n    pass"
  },
  {
    "nl": "check if string \"substring\" is in string",
    "cmd": "string.find('substring')",
    "question_id": "3437059-17",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "string.find()",
    "canonical_cmd": "string.find('VAR_STR')"
  },
  {
    "nl": "check if string `s` contains \"is\"",
    "cmd": "if (s.find('is') == (-1)):\n    print(\"No 'is' here!\")\nelse:\n    print(\"Found 'is' in the string.\")",
    "question_id": "3437059-83",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.find"
    ],
    "clean_cmd": " (s.find() == (-)):print():print()",
    "canonical_cmd": "if VAR_STR.find('VAR_STR') == -1:\n    print(\"No 'is' here!\")\nelse:\n    print(\"Found 'is' in the string.\")"
  },
  {
    "nl": "calling a function named 'myfunction' in the module",
    "cmd": "globals()['myfunction']()",
    "question_id": "3061-93",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#globals"
    ],
    "clean_cmd": "globals()[]()",
    "canonical_cmd": "globals()['VAR_STR']()"
  },
  {
    "nl": "get the dimensions of numpy array `a`",
    "cmd": "a.shape",
    "question_id": "3061761-63",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "a.shape",
    "canonical_cmd": "VAR_STR.shape"
  },
  {
    "nl": "get the dimensions of numpy array `a`",
    "cmd": "N.shape(a)",
    "question_id": "3061761-55",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.shape"
    ],
    "clean_cmd": "N.shape(a)",
    "canonical_cmd": "N.shape(VAR_STR)"
  },
  {
    "nl": "get the dimensions of array `a`",
    "cmd": "N.shape(a)",
    "question_id": "3061761-89",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.turtle#turtle.Shape"
    ],
    "clean_cmd": "N.shape(a)",
    "canonical_cmd": "N.shape(VAR_STR)"
  },
  {
    "nl": "get the dimensions of numpy array `a`",
    "cmd": "a.shape",
    "question_id": "3061761-31",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "a.shape",
    "canonical_cmd": "VAR_STR.shape"
  },
  {
    "nl": "convert double 0.00582811585976 to float",
    "cmd": "struct.unpack('f', struct.pack('f', 0.00582811585976))",
    "question_id": "13291539-54",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.struct#struct.unpack",
      "python.library.struct#struct.pack"
    ],
    "clean_cmd": "struct.unpack(, struct.pack(, 0.00582811585976))",
    "canonical_cmd": "struct.unpack('f', struct.pack('f', 0.00582811585976))"
  },
  {
    "nl": "how to uniqify a list of dict in python",
    "cmd": "[dict(y) for y in set(tuple(x.items()) for x in d)]",
    "question_id": "6280978-9",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#tuple",
      "python.library.stdtypes#dict",
      "python.library.stdtypes#set",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "[dict(y)  y  set(tuple(x.items())  x  d)]",
    "canonical_cmd": "[dict(y) for y in set(tuple(x.items()) for x in d)]"
  },
  {
    "nl": "get a list of words `words` of a file 'myfile'",
    "cmd": "words = open('myfile').read().split()",
    "question_id": "7745260-45",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.request#open",
      "python.library.os#os.read",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "words = open().read().split()",
    "canonical_cmd": "VAR_STR = open('VAR_STR').read().split()"
  },
  {
    "nl": "Convert array `a` to numpy array",
    "cmd": "a = np.array(a)",
    "question_id": "7717380-65",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.array"
    ],
    "clean_cmd": "a = np.array(a)",
    "canonical_cmd": "VAR_STR = np.array(VAR_STR)"
  },
  {
    "nl": "create a django query for a list of values `1, 4, 7`",
    "cmd": "Blog.objects.filter(pk__in=[1, 4, 7])",
    "question_id": "9304908-37",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.logging#logging.Filter.filter"
    ],
    "clean_cmd": "Blog.objects.filter(pk__in=[, , ])",
    "canonical_cmd": "Blog.objects.filter(pk__in=[1, 4, 7])"
  },
  {
    "nl": "case insensitive comparison of strings `string1` and `string2`",
    "cmd": "if (string1.lower() == string2.lower()):\n    print('The strings are the same (case insensitive)')\nelse:\n    print('The strings are not the same (case insensitive)')",
    "question_id": "319426-66",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.lower"
    ],
    "clean_cmd": " (string1.lower() == string2.lower()):print():print()",
    "canonical_cmd": "if VAR_STR.lower() == VAR_STR.lower():\n    print('The strings are the same (case insensitive)')\nelse:\n    print('The strings are not the same (case insensitive)')"
  },
  {
    "nl": "case insensitive string comparison between `string1` and `string2`",
    "cmd": "if (string1.lower() == string2.lower()):\n    pass",
    "question_id": "319426-3",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.lower"
    ],
    "clean_cmd": " (string1.lower() == string2.lower()):",
    "canonical_cmd": "if VAR_STR.lower() == VAR_STR.lower():\n    pass"
  },
  {
    "nl": "case insensitive string comparison between `string1` and `string2`",
    "cmd": "(string1.lower() == string2.lower())",
    "question_id": "319426-84",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.lower"
    ],
    "clean_cmd": "(string1.lower() == string2.lower())",
    "canonical_cmd": "VAR_STR.lower() == VAR_STR.lower()"
  },
  {
    "nl": "case insensitive string comparison between `first` and `second`",
    "cmd": "(first.lower() == second.lower())",
    "question_id": "319426-70",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.lower"
    ],
    "clean_cmd": "(first.lower() == second.lower())",
    "canonical_cmd": "VAR_STR.lower() == VAR_STR.lower()"
  },
  {
    "nl": "case insensitive comparison between strings `first` and `second`",
    "cmd": "(first.upper() == second.upper())",
    "question_id": "319426-18",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.upper"
    ],
    "clean_cmd": "(first.upper() == second.upper())",
    "canonical_cmd": "VAR_STR.upper() == VAR_STR.upper()"
  },
  {
    "nl": "Create a pandas dataframe of values from a dictionary `d` which contains dictionaries of dictionaries",
    "cmd": "pd.concat(map(pd.DataFrame, iter(d.values())), keys=list(d.keys())).stack().unstack(0)",
    "question_id": "15455388-38",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.concat",
      "python.library.functions#iter",
      "python.library.functions#list",
      "python.library.functions#map",
      "pandas.reference.api.pandas.dataframe.unstack",
      "pandas.reference.api.pandas.dataframe.values",
      "pandas.reference.api.pandas.dataframe.stack"
    ],
    "clean_cmd": "pd.concat(map(pd.DataFrame, iter(d.values())), keys=list(d.keys())).stack().unstack()",
    "canonical_cmd": "pd.concat(map(pd.DataFrame, iter(VAR_STR.values())), keys=list(VAR_STR.keys())\n    ).stack().unstack(0)"
  },
  {
    "nl": "get count of rows in each series grouped by column 'col5' and column 'col2' of dataframe `df`",
    "cmd": "df.groupby(['col5', 'col2']).size().groupby(level=1).max()",
    "question_id": "17679089-100",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.groupby",
      "python.library.functions#max",
      "pandas.reference.api.pandas.core.groupby.groupby.size"
    ],
    "clean_cmd": "df.groupby([, ]).size().groupby(level=).max()",
    "canonical_cmd": "VAR_STR.groupby(['VAR_STR', 'VAR_STR']).size().groupby(level=1).max()"
  },
  {
    "nl": "get the attribute `x` from object `your_obj`",
    "cmd": "getattr(your_obj, x)",
    "question_id": "9396706-78",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#getattr"
    ],
    "clean_cmd": "getattr(your_obj, x)",
    "canonical_cmd": "getattr(VAR_STR, VAR_STR)"
  },
  {
    "nl": "convert datetime object to date object in python",
    "cmd": "datetime.datetime.now().date()",
    "question_id": "3743222-35",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.datetime#datetime.datetime.now",
      "python.library.datetime#datetime.datetime.date"
    ],
    "clean_cmd": "datetime.datetime.now().date()",
    "canonical_cmd": "datetime.datetime.now().date()"
  },
  {
    "nl": "How do I convert datetime to date (in Python)?",
    "cmd": "datetime.datetime.now().date()",
    "question_id": "3743222-73",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.datetime#datetime.datetime.now",
      "python.library.datetime#datetime.datetime.date"
    ],
    "clean_cmd": "datetime.datetime.now().date()",
    "canonical_cmd": "datetime.datetime.now().date()"
  },
  {
    "nl": "sort numpy float array `A` column by column",
    "cmd": "A = np.array(sorted(A, key=tuple))",
    "question_id": "12496531-86",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "numpy.reference.generated.numpy.array"
    ],
    "clean_cmd": "A = np.array(sorted(A, key=tuple))",
    "canonical_cmd": "VAR_STR = np.array(sorted(VAR_STR, key=tuple))"
  },
  {
    "nl": "convert string `s` to lowercase",
    "cmd": "s.lower()",
    "question_id": "6797984-32",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.lower"
    ],
    "clean_cmd": "s.lower()",
    "canonical_cmd": "VAR_STR.lower()"
  },
  {
    "nl": "convert utf-8 string `s` to lowercase",
    "cmd": "s.decode('utf-8').lower()",
    "question_id": "6797984-49",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.lower",
      "python.library.stdtypes#bytearray.decode"
    ],
    "clean_cmd": "s.decode().lower()",
    "canonical_cmd": "VAR_STR.decode('utf-8').lower()"
  },
  {
    "nl": "Update a user's name as `Bob Marley` having id `123` in SQLAlchemy",
    "cmd": "session.query(User).filter_by(id=123).update({'name': 'Bob Marley'})",
    "question_id": "2631935-11",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.turtle#turtle.update"
    ],
    "clean_cmd": "session.query(User).filter_by(id=).update({: })",
    "canonical_cmd": "session.query(User).filter_by(id=123).update({'name': 'VAR_STR'})"
  },
  {
    "nl": "define global variable `something` with value `bob`",
    "cmd": "globals()['something'] = 'bob'",
    "question_id": "13627865-82",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#globals"
    ],
    "clean_cmd": "globals()[] = ",
    "canonical_cmd": "globals()['VAR_STR'] = 'VAR_STR'"
  },
  {
    "nl": "Check whether a numpy array `a` contains a given row `[1, 2]`",
    "cmd": "any(np.equal(a, [1, 2]).all(1))",
    "question_id": "14766194-35",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.equal",
      "python.library.functions#all",
      "python.library.functions#any"
    ],
    "clean_cmd": "any(np.equal(a, [, ]).all())",
    "canonical_cmd": "any(np.equal(VAR_STR, [VAR_STR]).all(1))"
  },
  {
    "nl": "convert ndarray with shape 3x3 to array",
    "cmd": "np.zeros((3, 3)).ravel()",
    "question_id": "18200052-13",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.zeros",
      "numpy.reference.generated.numpy.ravel"
    ],
    "clean_cmd": "np.zeros((, )).ravel()",
    "canonical_cmd": "np.zeros((3, 3)).ravel()"
  },
  {
    "nl": "convert dict `result` to numpy structured array",
    "cmd": "numpy.array([(key, val) for key, val in result.items()], dtype)",
    "question_id": "15579649-73",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.array",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "numpy.array([(key, val)  key, val  result.items()], dtype)",
    "canonical_cmd": "numpy.array([(key, val) for key, val in VAR_STR.items()], dtype)"
  },
  {
    "nl": "quit program",
    "cmd": "sys.exit(0)",
    "question_id": "2823472-99",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.sys#sys.exit"
    ],
    "clean_cmd": "sys.exit()",
    "canonical_cmd": "sys.exit(0)"
  },
  {
    "nl": "sort list `mylist` alphabetically",
    "cmd": "mylist.sort(key=lambda x: x.lower())",
    "question_id": "36139-44",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.lower",
      "python.library.stdtypes#list.sort"
    ],
    "clean_cmd": "mylist.sort(key= x: x.lower())",
    "canonical_cmd": "VAR_STR.sort(key=lambda x: x.lower())"
  },
  {
    "nl": "sort list `mylist` in alphabetical order",
    "cmd": "mylist.sort(key=str.lower)",
    "question_id": "36139-62",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#list.sort"
    ],
    "clean_cmd": "mylist.sort(key=str.lower)",
    "canonical_cmd": "VAR_STR.sort(key=str.lower)"
  },
  {
    "nl": "sort a list of strings 'mylist'.",
    "cmd": "mylist.sort()",
    "question_id": "36139-23",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#list.sort"
    ],
    "clean_cmd": "mylist.sort()",
    "canonical_cmd": "VAR_STR.sort()"
  },
  {
    "nl": "sort a list of strings `list`",
    "cmd": "list.sort()",
    "question_id": "36139-32",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#list.sort"
    ],
    "clean_cmd": "list.sort()",
    "canonical_cmd": "VAR_STR.sort()"
  },
  {
    "nl": "load a file `file.py` into the python console",
    "cmd": "exec(compile(open('file.py').read(), 'file.py', 'exec'))",
    "question_id": "5280178-72",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#exec",
      "python.library.functions#compile",
      "python.library.urllib.request#open",
      "python.library.os#os.read"
    ],
    "clean_cmd": "exec(compile(open().read(), , ))",
    "canonical_cmd": "exec(compile(open('VAR_STR').read(), 'VAR_STR', 'exec'))"
  },
  {
    "nl": "check if string `test.mp3` ends with one of the strings from a tuple `('.mp3', '.avi')`",
    "cmd": "\"\"\"test.mp3\"\"\".endswith(('.mp3', '.avi'))",
    "question_id": "18351951-41",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.endswith"
    ],
    "clean_cmd": ".endswith((, ))",
    "canonical_cmd": "\"\"\"VAR_STR\"\"\".endswith((VAR_STR))"
  },
  {
    "nl": "unpack first and second bytes of byte string `pS` into integer",
    "cmd": "struct.unpack('h', pS[0:2])",
    "question_id": "826284-77",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.struct#struct.unpack"
    ],
    "clean_cmd": "struct.unpack(, pS[:])",
    "canonical_cmd": "struct.unpack('h', VAR_STR[0:2])"
  },
  {
    "nl": "check if all string elements in list `words` are upper-cased",
    "cmd": "print(all(word[0].isupper() for word in words))",
    "question_id": "3668964-35",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#all",
      "python.library.stdtypes#str.isupper"
    ],
    "clean_cmd": "print(all(word[].isupper()  word  words))",
    "canonical_cmd": "print(all(word[0].isupper() for word in VAR_STR))"
  },
  {
    "nl": "convert dataframe `df` to integer-type sparse object",
    "cmd": "df.to_sparse(0)",
    "question_id": "41154648-81",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "df.to_sparse()",
    "canonical_cmd": "VAR_STR.to_sparse(0)"
  },
  {
    "nl": "declare an array `variable`",
    "cmd": "variable = []",
    "question_id": "1514553-90",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "variable = []",
    "canonical_cmd": "VAR_STR = []"
  },
  {
    "nl": "declare an array with element 'i'",
    "cmd": "intarray = array('i')",
    "question_id": "1514553-71",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.array#array.array"
    ],
    "clean_cmd": "intarray = array()",
    "canonical_cmd": "intarray = array('VAR_STR')"
  },
  {
    "nl": "check if object `a` has property 'property'",
    "cmd": "if hasattr(a, 'property'):\n    pass",
    "question_id": "610883-92",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#hasattr"
    ],
    "clean_cmd": " hasattr(a, ):",
    "canonical_cmd": "if hasattr(VAR_STR, 'VAR_STR'):\n    pass"
  },
  {
    "nl": "check if object `a` has property 'property'",
    "cmd": "if hasattr(a, 'property'):\n    pass",
    "question_id": "610883-9",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#hasattr"
    ],
    "clean_cmd": " hasattr(a, ):",
    "canonical_cmd": "if hasattr(VAR_STR, 'VAR_STR'):\n    pass"
  },
  {
    "nl": "get the value of attribute 'property' of object `a` with default value 'default value'",
    "cmd": "getattr(a, 'property', 'default value')",
    "question_id": "610883-68",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#getattr"
    ],
    "clean_cmd": "getattr(a, , )",
    "canonical_cmd": "getattr(VAR_STR, 'VAR_STR', 'VAR_STR')"
  },
  {
    "nl": "create a list of all unique characters in string 'aaabcabccd'",
    "cmd": "\"\"\"\"\"\".join(list(OrderedDict.fromkeys('aaabcabccd').keys()))",
    "question_id": "13902805-83",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list",
      "python.library.stdtypes#dict.fromkeys",
      "python.library.stdtypes#dict.keys",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": ".join(list(OrderedDict.fromkeys().keys()))",
    "canonical_cmd": "\"\"\"\"\"\".join(list(OrderedDict.fromkeys('VAR_STR').keys()))"
  },
  {
    "nl": "get list of all unique characters in a string 'aaabcabccd'",
    "cmd": "list(set('aaabcabccd'))",
    "question_id": "13902805-92",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list",
      "python.library.stdtypes#set"
    ],
    "clean_cmd": "list(set())",
    "canonical_cmd": "list(set('VAR_STR'))"
  },
  {
    "nl": "List of all unique characters in a string?",
    "cmd": "\"\"\"\"\"\".join(set('aaabcabccd'))",
    "question_id": "13902805-87",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#set",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": ".join(set())",
    "canonical_cmd": "\"\"\"\"\"\".join(set('aaabcabccd'))"
  },
  {
    "nl": "find the index of sub string 'World' in `x`",
    "cmd": "x.find('World')",
    "question_id": "674764-8",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.find"
    ],
    "clean_cmd": "x.find()",
    "canonical_cmd": "VAR_STR.find('VAR_STR')"
  },
  {
    "nl": "find the index of sub string 'Aloha' in `x`",
    "cmd": "x.find('Aloha')",
    "question_id": "674764-33",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.find"
    ],
    "clean_cmd": "x.find()",
    "canonical_cmd": "VAR_STR.find('VAR_STR')"
  },
  {
    "nl": "find the index of sub string 'cc' in string 'sdfasdf'",
    "cmd": "'sdfasdf'.index('cc')",
    "question_id": "674764-19",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.index"
    ],
    "clean_cmd": ".index()",
    "canonical_cmd": "\"\"\"VAR_STR\"\"\".index('VAR_STR')"
  },
  {
    "nl": "find the index of sub string 'df' in string 'sdfasdf'",
    "cmd": "'sdfasdf'.index('df')",
    "question_id": "674764-45",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.index"
    ],
    "clean_cmd": ".index()",
    "canonical_cmd": "\"\"\"VAR_STR\"\"\".index('VAR_STR')"
  },
  {
    "nl": "find the index of sub string 'a' in string `str`",
    "cmd": "str.find('a')",
    "question_id": "674764-64",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.find"
    ],
    "clean_cmd": "str.find()",
    "canonical_cmd": "VAR_STR.find('VAR_STR')"
  },
  {
    "nl": "find the index of sub string 'g' in string `str`",
    "cmd": "str.find('g')",
    "question_id": "674764-91",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.find"
    ],
    "clean_cmd": "str.find()",
    "canonical_cmd": "VAR_STR.find('VAR_STR')"
  },
  {
    "nl": "find the index of sub string 's' in string `str` starting from index 11",
    "cmd": "str.find('s', 11)",
    "question_id": "674764-3",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.find"
    ],
    "clean_cmd": "str.find(, )",
    "canonical_cmd": "VAR_STR.find('VAR_STR', 11)"
  },
  {
    "nl": "find the index of sub string 's' in string `str` starting from index 15",
    "cmd": "str.find('s', 15)",
    "question_id": "674764-47",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.find"
    ],
    "clean_cmd": "str.find(, )",
    "canonical_cmd": "VAR_STR.find('VAR_STR', 15)"
  },
  {
    "nl": "find the index of sub string 's' in string `str` starting from index 16",
    "cmd": "str.find('s', 16)",
    "question_id": "674764-8",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.find"
    ],
    "clean_cmd": "str.find(, )",
    "canonical_cmd": "VAR_STR.find('VAR_STR', 16)"
  },
  {
    "nl": "find the index of sub string 's' in string `str` starting from index 11 and ending at index 14",
    "cmd": "str.find('s', 11, 14)",
    "question_id": "674764-5",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.find"
    ],
    "clean_cmd": "str.find(, , )",
    "canonical_cmd": "VAR_STR.find('VAR_STR', 11, 14)"
  },
  {
    "nl": "reverse sort items in default dictionary `cityPopulation` by the third item in each key's list of values",
    "cmd": "sorted(iter(cityPopulation.items()), key=lambda k_v: k_v[1][2], reverse=True)",
    "question_id": "10194713-64",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.functions#iter",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "sorted(iter(cityPopulation.items()), key= k_v: k_v[][], reverse=True)",
    "canonical_cmd": "sorted(iter(VAR_STR.items()), key=lambda k_v: k_v[1][2], reverse=True)"
  },
  {
    "nl": "Sort dictionary `u` in ascending order based on second elements of its values",
    "cmd": "sorted(list(u.items()), key=lambda v: v[1])",
    "question_id": "10194713-38",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.functions#list",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "sorted(list(u.items()), key= v: v[])",
    "canonical_cmd": "sorted(list(VAR_STR.items()), key=lambda v: v[1])"
  },
  {
    "nl": "reverse sort dictionary `d` based on its values",
    "cmd": "sorted(list(d.items()), key=lambda k_v: k_v[1], reverse=True)",
    "question_id": "10194713-1",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.functions#list",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "sorted(list(d.items()), key= k_v: k_v[], reverse=True)",
    "canonical_cmd": "sorted(list(VAR_STR.items()), key=lambda k_v: k_v[1], reverse=True)"
  },
  {
    "nl": "sorting a defaultdict `d` by value",
    "cmd": "sorted(list(d.items()), key=lambda k_v: k_v[1])",
    "question_id": "10194713-5",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.functions#list",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "sorted(list(d.items()), key= k_v: k_v[])",
    "canonical_cmd": "sorted(list(VAR_STR.items()), key=lambda k_v: k_v[1])"
  },
  {
    "nl": "terminate the script using status value 0",
    "cmd": "sys.exit(0)",
    "question_id": "179369-97",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.sys#sys.exit"
    ],
    "clean_cmd": "sys.exit()",
    "canonical_cmd": "sys.exit(0)"
  },
  {
    "nl": "abort the execution of the script using message 'aa! errors!'",
    "cmd": "sys.exit('aa! errors!')",
    "question_id": "179369-62",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.sys#sys.exit"
    ],
    "clean_cmd": "sys.exit()",
    "canonical_cmd": "sys.exit('VAR_STR')"
  },
  {
    "nl": "abort the execution of a python script",
    "cmd": "sys.exit()",
    "question_id": "179369-47",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.sys#sys.exit"
    ],
    "clean_cmd": "sys.exit()",
    "canonical_cmd": "sys.exit()"
  },
  {
    "nl": "How to get yesterday in python",
    "cmd": "datetime.datetime.now() - datetime.timedelta(days=1)",
    "question_id": "19779790-35",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.datetime#datetime.datetime.now",
      "python.library.datetime#datetime.timedelta"
    ],
    "clean_cmd": "datetime.datetime.now() - datetime.timedelta(days=)",
    "canonical_cmd": "datetime.datetime.now() - datetime.timedelta(days=1)"
  },
  {
    "nl": "create a pandas dataframe `df` from elements of a dictionary `nvalues`",
    "cmd": "df = pd.DataFrame.from_dict({k: v for k, v in list(nvalues.items()) if k != 'y3'})",
    "question_id": "37934969-37",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.from_dict",
      "python.library.functions#list",
      "pandas.reference.api.pandas.dataframe.items"
    ],
    "clean_cmd": "df = pd.DataFrame.from_dict({k: v  k, v  list(nvalues.items())  k != })",
    "canonical_cmd": "VAR_STR = pd.DataFrame.from_dict({k: v for k, v in list(VAR_STR.items()) if k !=\n    'y3'})"
  },
  {
    "nl": "find href value that has string 'follow?page' inside it",
    "cmd": "print(soup.find('a', href=re.compile('.*follow\\\\?page.*')))",
    "question_id": "11066874-58",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.compile",
      "python.library.stdtypes#str.find"
    ],
    "clean_cmd": "print(soup.find(, href=re.compile()))",
    "canonical_cmd": "print(soup.find('a', href=re.compile('.*follow\\\\?page.*')))"
  },
  {
    "nl": "Get the average values from two numpy arrays `old_set` and `new_set`",
    "cmd": "np.mean(np.array([old_set, new_set]), axis=0)",
    "question_id": "18461623-27",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.mean",
      "numpy.reference.generated.numpy.array"
    ],
    "clean_cmd": "np.mean(np.array([old_set, new_set]), axis=)",
    "canonical_cmd": "np.mean(np.array([VAR_STR, VAR_STR]), axis=0)"
  },
  {
    "nl": "Convert ascii value 'P' to binary",
    "cmd": "bin(ord('P'))",
    "question_id": "4523551-62",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#bin",
      "python.library.functions#ord"
    ],
    "clean_cmd": "bin(ord())",
    "canonical_cmd": "bin(ord('VAR_STR'))"
  },
  {
    "nl": "create a set containing all keys' names from dictionary `LoD`",
    "cmd": "{k for d in LoD for k in list(d.keys())}",
    "question_id": "11399384-83",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list",
      "python.library.stdtypes#dict.keys"
    ],
    "clean_cmd": "{k  d  LoD  k  list(d.keys())}",
    "canonical_cmd": "{k for d in VAR_STR for k in list(d.keys())}"
  },
  {
    "nl": "create a set containing all keys names from list of dictionaries `LoD`",
    "cmd": "set([i for s in [list(d.keys()) for d in LoD] for i in s])",
    "question_id": "11399384-59",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list",
      "python.library.stdtypes#set",
      "python.library.stdtypes#dict.keys"
    ],
    "clean_cmd": "set([i  s  [list(d.keys())  d  LoD]  i  s])",
    "canonical_cmd": "set([i for s in [list(d.keys()) for d in VAR_STR] for i in s])"
  },
  {
    "nl": "extract all keys from a list of dictionaries `LoD`",
    "cmd": "[i for s in [list(d.keys()) for d in LoD] for i in s]",
    "question_id": "11399384-52",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list",
      "python.library.stdtypes#dict.keys"
    ],
    "clean_cmd": "[i  s  [list(d.keys())  d  LoD]  i  s]",
    "canonical_cmd": "[i for s in [list(d.keys()) for d in VAR_STR] for i in s]"
  },
  {
    "nl": "create a list containing the digits values from binary string `x` as elements",
    "cmd": "[int(d) for d in str(bin(x))[2:]]",
    "question_id": "13081090-42",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#bin",
      "python.library.functions#int",
      "python.library.stdtypes#str"
    ],
    "clean_cmd": "[int(d)  d  str(bin(x))[:]]",
    "canonical_cmd": "[int(d) for d in str(bin(VAR_STR))[2:]]"
  },
  {
    "nl": "group dataframe `df` by columns 'Month' and 'Fruit'",
    "cmd": "df.groupby(['Month', 'Fruit']).sum().unstack(level=0)",
    "question_id": "42012589-82",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.groupby",
      "python.library.functions#sum",
      "pandas.reference.api.pandas.dataframe.unstack"
    ],
    "clean_cmd": "df.groupby([, ]).sum().unstack(level=)",
    "canonical_cmd": "VAR_STR.groupby(['VAR_STR', 'VAR_STR']).sum().unstack(level=0)"
  },
  {
    "nl": "request page 'https://www.mysite.com/' with credentials of username 'username' and password 'pwd'",
    "cmd": "requests.get('https://www.mysite.com/', auth=('username', 'pwd'))",
    "question_id": "3355822-45",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.webbrowser#webbrowser.get"
    ],
    "clean_cmd": "requests.get(, auth=(, ))",
    "canonical_cmd": "requests.get('VAR_STR', auth=('VAR_STR', 'VAR_STR'))"
  },
  {
    "nl": "list folders in zip file 'file' that ends with '/'",
    "cmd": "[x for x in file.namelist() if x.endswith('/')]",
    "question_id": "6510477-50",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.zipfile#zipfile.ZipFile.namelist",
      "python.library.stdtypes#str.endswith"
    ],
    "clean_cmd": "[x  x  file.namelist()  x.endswith()]",
    "canonical_cmd": "[x for x in VAR_STR.namelist() if x.endswith('VAR_STR')]"
  },
  {
    "nl": "remove all spaces from a string converted from dictionary `{'a': 1, 'b': 'as df'}`",
    "cmd": "str({'a': 1, 'b': 'as df'}).replace(': ', ':').replace(', ', ',')",
    "question_id": "39268928-38",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str",
      "python.library.stdtypes#str.replace"
    ],
    "clean_cmd": "str({: , : }).replace(, ).replace(, )",
    "canonical_cmd": "str({VAR_STR}).replace(': ', ':').replace(', ', ',')"
  },
  {
    "nl": "convert dictionary `dict` into a string formatted object",
    "cmd": "'{' + ','.join('{0!r}:{1!r}'.format(*x) for x in list(dct.items())) + '}'",
    "question_id": "39268928-72",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#format",
      "python.library.functions#list",
      "python.library.stdtypes#dict.items",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": " + .join(.format(*x)  x  list(dct.items())) + ",
    "canonical_cmd": "'{' + ','.join('{0!r}:{1!r}'.format(*x) for x in list(dct.items())) + '}'"
  },
  {
    "nl": "load a json data `json_string` into variable `json_data`",
    "cmd": "json_data = json.loads(json_string)",
    "question_id": "23970693-72",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.json#json.loads"
    ],
    "clean_cmd": "json_data = json.loads(json_string)",
    "canonical_cmd": "VAR_STR = json.loads(VAR_STR)"
  },
  {
    "nl": "Rename file from `src` to `dst`",
    "cmd": "os.rename(src, dst)",
    "question_id": "2759067-52",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.rename"
    ],
    "clean_cmd": "os.rename(src, dst)",
    "canonical_cmd": "os.rename(VAR_STR, VAR_STR)"
  },
  {
    "nl": "pygobject center window `window`",
    "cmd": "window.set_position(Gtk.WindowPosition.CENTER)",
    "question_id": "16389188-53",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.xdrlib#xdrlib.Unpacker.set_position"
    ],
    "clean_cmd": "window.set_position(Gtk.WindowPosition.CENTER)",
    "canonical_cmd": "VAR_STR.set_position(Gtk.WindowPosition.CENTER)"
  },
  {
    "nl": "Retrieve parameter 'var_name' from a GET request.",
    "cmd": "self.request.get('var_name')",
    "question_id": "1391026-74",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.webbrowser#webbrowser.get"
    ],
    "clean_cmd": "self.request.get()",
    "canonical_cmd": "self.request.get('VAR_STR')"
  },
  {
    "nl": "convert keys in dictionary `thedict` into case insensitive",
    "cmd": "theset = set(k.lower() for k in thedict)",
    "question_id": "3296499-25",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#set",
      "python.library.stdtypes#str.lower"
    ],
    "clean_cmd": "theset = set(k.lower()  k  thedict)",
    "canonical_cmd": "theset = set(k.lower() for k in VAR_STR)"
  },
  {
    "nl": "merge 2 dataframes `df1` and `df2` with same values in a column 'revenue' with and index 'date'",
    "cmd": "df2['revenue'] = df2.CET.map(df1.set_index('date')['revenue'])",
    "question_id": "41463763-99",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#map",
      "pandas.reference.api.pandas.dataframe.set_index"
    ],
    "clean_cmd": "df2[] = df2.CET.map(df1.set_index()[])",
    "canonical_cmd": "VAR_STR['VAR_STR'] = VAR_STR.CET.map(VAR_STR.set_index('VAR_STR')['VAR_STR'])"
  },
  {
    "nl": "permanently set the current directory to the 'C:/Users/Name/Desktop'",
    "cmd": "os.chdir('C:/Users/Name/Desktop')",
    "question_id": "30405804-73",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.chdir"
    ],
    "clean_cmd": "os.chdir()",
    "canonical_cmd": "os.chdir('VAR_STR')"
  },
  {
    "nl": "get a request parameter `a` in jinja2",
    "cmd": "{{request.args.get('a')}}",
    "question_id": "9647586-2",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.webbrowser#webbrowser.get"
    ],
    "clean_cmd": "{{request.args.get()}}",
    "canonical_cmd": "{{request.args.get('VAR_STR')}}"
  },
  {
    "nl": "get the  size of file 'C:\\\\Python27\\\\Lib\\\\genericpath.py'",
    "cmd": "os.stat('C:\\\\Python27\\\\Lib\\\\genericpath.py').st_size",
    "question_id": "6591931-80",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.stat"
    ],
    "clean_cmd": "os.stat().st_size",
    "canonical_cmd": "os.stat('VAR_STR').st_size"
  },
  {
    "nl": "urlencode a querystring 'string_of_characters_like_these:$#@=?%^Q^$' in python 2",
    "cmd": "urllib.parse.quote_plus('string_of_characters_like_these:$#@=?%^Q^$')",
    "question_id": "5607551-13",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.parse#urllib.parse.quote_plus"
    ],
    "clean_cmd": "urllib.parse.quote_plus()",
    "canonical_cmd": "urllib.parse.quote_plus('VAR_STR')"
  },
  {
    "nl": "generate random Decimal",
    "cmd": "decimal.Decimal(random.randrange(10000)) / 100",
    "question_id": "439115-86",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.random#random.randrange"
    ],
    "clean_cmd": "decimal.Decimal(random.randrange()) / ",
    "canonical_cmd": "decimal.Decimal(random.randrange(10000)) / 100"
  },
  {
    "nl": "How to get only the last part of a path in Python?",
    "cmd": "os.path.basename(os.path.normpath('/folderA/folderB/folderC/folderD/'))",
    "question_id": "3925096-46",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os.path#os.path.basename",
      "python.library.os.path#os.path.normpath"
    ],
    "clean_cmd": "os.path.basename(os.path.normpath())",
    "canonical_cmd": "os.path.basename(os.path.normpath('/folderA/folderB/folderC/folderD/'))"
  },
  {
    "nl": "get index of key 'c' in dictionary `x`",
    "cmd": "list(x.keys()).index('c')",
    "question_id": "14538885-47",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list",
      "python.library.stdtypes#dict.keys"
    ],
    "clean_cmd": "list(x.keys()).index()",
    "canonical_cmd": "list(VAR_STR.keys()).index('VAR_STR')"
  },
  {
    "nl": "create a list of values from the dictionary `programs` that have a key with a case insensitive match to 'new york'",
    "cmd": "[value for key, value in list(programs.items()) if 'new york' in key.lower()]",
    "question_id": "10484261-51",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list",
      "python.library.stdtypes#str.lower",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "[value  key, value  list(programs.items())    key.lower()]",
    "canonical_cmd": "[value for key, value in list(VAR_STR.items()) if 'VAR_STR' in key.lower()]"
  },
  {
    "nl": "Replace special characters in utf-8 encoded string `s` using the %xx escape",
    "cmd": "urllib.parse.quote(s.encode('utf-8'))",
    "question_id": "8905864-66",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.parse#urllib.parse.quote",
      "python.library.stdtypes#str.encode"
    ],
    "clean_cmd": "urllib.parse.quote(s.encode())",
    "canonical_cmd": "urllib.parse.quote(VAR_STR.encode('utf-8'))"
  },
  {
    "nl": "URL encoding in python",
    "cmd": "urllib.parse.quote_plus('a b')",
    "question_id": "8905864-78",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.parse#urllib.parse.quote_plus"
    ],
    "clean_cmd": "urllib.parse.quote_plus()",
    "canonical_cmd": "urllib.parse.quote_plus('a b')"
  },
  {
    "nl": "Get sum of values of columns  'Y1961', 'Y1962', 'Y1963' after group by on columns \"Country\" and \"Item_code\" in dataframe `df`.",
    "cmd": "df.groupby(['Country', 'Item_Code'])[['Y1961', 'Y1962', 'Y1963']].sum()",
    "question_id": "32751229-20",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.groupby",
      "python.library.functions#sum"
    ],
    "clean_cmd": "df.groupby([, ])[[, , ]].sum()",
    "canonical_cmd": "VAR_STR.groupby(['VAR_STR', 'Item_Code'])[['VAR_STR', 'VAR_STR', 'VAR_STR']].sum()"
  },
  {
    "nl": "get the number of values in list `j` that is greater than 5",
    "cmd": "sum(((i > 5) for i in j))",
    "question_id": "10543303-64",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sum"
    ],
    "clean_cmd": "sum(((i &gt; )  i  j))",
    "canonical_cmd": "sum(i > 5 for i in VAR_STR)"
  },
  {
    "nl": "get the number of values in list `j` that is greater than 5",
    "cmd": "len([1 for i in j if (i > 5)])",
    "question_id": "10543303-81",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len"
    ],
    "clean_cmd": "len([  i  j  (i &gt; )])",
    "canonical_cmd": "len([(1) for i in VAR_STR if i > 5])"
  },
  {
    "nl": "get the number of values in list `j` that is greater than `i`",
    "cmd": "j = np.array(j)\nsum((j > i))",
    "question_id": "10543303-37",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.array",
      "python.library.functions#sum"
    ],
    "clean_cmd": "j = np.array(j)sum((j &gt; i))",
    "canonical_cmd": "VAR_STR = np.array(VAR_STR)\nsum(VAR_STR > VAR_STR)"
  },
  {
    "nl": "check if a numpy array `a1` contains any element of another array `a2`",
    "cmd": "np.any(np.in1d(a1, a2))",
    "question_id": "36190533-1",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.in1d",
      "numpy.reference.generated.numpy.any"
    ],
    "clean_cmd": "np.any(np.in1d(a1, a2))",
    "canonical_cmd": "np.any(np.in1d(VAR_STR, VAR_STR))"
  },
  {
    "nl": "clamping floating number `my_value` to be between `min_value` and `max_value`",
    "cmd": "max(min(my_value, max_value), min_value)",
    "question_id": "9775731-24",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#min",
      "python.library.functions#max"
    ],
    "clean_cmd": "max(min(my_value, max_value), min_value)",
    "canonical_cmd": "max(min(VAR_STR, VAR_STR), VAR_STR)"
  },
  {
    "nl": "get all combination of n binary values",
    "cmd": "lst = list(itertools.product([0, 1], repeat=n))",
    "question_id": "14931769-56",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.itertools#itertools.product",
      "python.library.functions#list"
    ],
    "clean_cmd": "lst = list(itertools.product([, ], repeat=n))",
    "canonical_cmd": "lst = list(itertools.product([0, 1], repeat=n))"
  },
  {
    "nl": "get all combination of n binary values",
    "cmd": "lst = map(list, itertools.product([0, 1], repeat=n))",
    "question_id": "14931769-64",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.itertools#itertools.product",
      "python.library.functions#map"
    ],
    "clean_cmd": "lst = map(list, itertools.product([, ], repeat=n))",
    "canonical_cmd": "lst = map(list, itertools.product([0, 1], repeat=n))"
  },
  {
    "nl": "get all combination of 3 binary values",
    "cmd": "bin = [0, 1]\n[(x, y, z) for x in bin for y in bin for z in bin]",
    "question_id": "14931769-0",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "bin = [, ][(x, y, z)  x  bin  y  bin  z  bin]",
    "canonical_cmd": "bin = [0, 1]\n[(x, y, z) for x in bin for y in bin for z in bin]"
  },
  {
    "nl": "get all combination of 3 binary values",
    "cmd": "lst = list(itertools.product([0, 1], repeat=3))",
    "question_id": "14931769-1",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.itertools#itertools.product",
      "python.library.functions#list"
    ],
    "clean_cmd": "lst = list(itertools.product([, ], repeat=))",
    "canonical_cmd": "lst = list(itertools.product([0, 1], repeat=3))"
  },
  {
    "nl": "add one day and three hours to the present time from datetime.now()",
    "cmd": "datetime.datetime.now() + datetime.timedelta(days=1, hours=3)",
    "question_id": "6310475-49",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.datetime#datetime.datetime.now",
      "python.library.datetime#datetime.timedelta"
    ],
    "clean_cmd": "datetime.datetime.now() + datetime.timedelta(days=, hours=)",
    "canonical_cmd": "datetime.datetime.now() + datetime.timedelta(days=1, hours=3)"
  },
  {
    "nl": "unpack the binary data represented by the hexadecimal string '4081637ef7d0424a' to a float",
    "cmd": "struct.unpack('d', binascii.unhexlify('4081637ef7d0424a'))",
    "question_id": "38831808-53",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.binascii#binascii.unhexlify",
      "python.library.struct#struct.unpack"
    ],
    "clean_cmd": "struct.unpack(, binascii.unhexlify())",
    "canonical_cmd": "struct.unpack('d', binascii.unhexlify('VAR_STR'))"
  },
  {
    "nl": "add a colorbar to plot `plt` using image `im` on axes `ax`",
    "cmd": "plt.colorbar(im, ax=ax)",
    "question_id": "42387471-14",
    "cmd_name": "conala",
    "oracle_man": [
      "matplotlib.colorbar_api#matplotlib.colorbar.Colorbar"
    ],
    "clean_cmd": "plt.colorbar(im, ax=ax)",
    "canonical_cmd": "VAR_STR.colorbar(VAR_STR, VAR_STR=VAR_STR)"
  },
  {
    "nl": "remove first and last lines of string `s`",
    "cmd": "s[s.find('\\n') + 1:s.rfind('\\n')]",
    "question_id": "28134319-7",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.rfind",
      "python.library.stdtypes#str.find"
    ],
    "clean_cmd": "s[s.find() + :s.rfind()]",
    "canonical_cmd": "VAR_STR[VAR_STR.find('\\n') + 1:VAR_STR.rfind('\\n')]"
  },
  {
    "nl": "From multiIndexed dataframe `data` select columns `a` and `c` within each higher order column `one` and `two`",
    "cmd": "data.loc[:, (list(itertools.product(['one', 'two'], ['a', 'c'])))]",
    "question_id": "18470323-13",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.itertools#itertools.product",
      "python.library.functions#list",
      "pandas.reference.api.pandas.dataframe.loc"
    ],
    "clean_cmd": "data.loc[:, (list(itertools.product([, ], [, ])))]",
    "canonical_cmd": "VAR_STR.loc[:, (list(itertools.product(['VAR_STR', 'VAR_STR'], ['VAR_STR', 'VAR_STR'])))]"
  },
  {
    "nl": "select only specific columns 'a' and 'c' from a dataframe 'data' with multiindex columns",
    "cmd": "data.loc[:, ([('one', 'a'), ('one', 'c'), ('two', 'a'), ('two', 'c')])]",
    "question_id": "18470323-91",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.loc"
    ],
    "clean_cmd": "data.loc[:, ([(, ), (, ), (, ), (, )])]",
    "canonical_cmd": "VAR_STR.loc[:, ([('one', 'VAR_STR'), ('one', 'VAR_STR'), ('two', 'VAR_STR'), ('two',\n    'VAR_STR')])]"
  },
  {
    "nl": "filter `Users` by field `userprofile` with level greater than or equal to `0`",
    "cmd": "User.objects.filter(userprofile__level__gte=0)",
    "question_id": "10040143-71",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.logging#logging.Filter.filter"
    ],
    "clean_cmd": "User.objects.filter(userprofile__level__gte=)",
    "canonical_cmd": "User.objects.filter(userprofile__level__gte=0)"
  },
  {
    "nl": "make a list of lists in which each list `g` are the elements from list `test` which have the same characters up to the first `_` character",
    "cmd": "[list(g) for _, g in itertools.groupby(test, lambda x: x.split('_')[0])]",
    "question_id": "27659153-69",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.itertools#itertools.groupby",
      "python.library.functions#list",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "[list(g)  _, g  itertools.groupby(test,  x: x.split()[])]",
    "canonical_cmd": "[list(VAR_STR) for VAR_STR, VAR_STR in itertools.groupby(VAR_STR, lambda x: x.split\n    ('VAR_STR')[0])]"
  },
  {
    "nl": "How to group similar items in a list?",
    "cmd": "[list(g) for _, g in itertools.groupby(test, lambda x: x.partition('_')[0])]",
    "question_id": "27659153-76",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.itertools#itertools.groupby",
      "python.library.functions#list",
      "python.library.stdtypes#str.partition"
    ],
    "clean_cmd": "[list(g)  _, g  itertools.groupby(test,  x: x.partition()[])]",
    "canonical_cmd": "[list(g) for _, g in itertools.groupby(test, lambda x: x.partition('_')[0])]"
  },
  {
    "nl": "django get the value of key 'title' from POST request `request` if exists, else return empty string ''",
    "cmd": "request.POST.get('title', '')",
    "question_id": "11336548-53",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.webbrowser#webbrowser.get"
    ],
    "clean_cmd": "request.POST.get(, )",
    "canonical_cmd": "VAR_STR.POST.get('VAR_STR', 'VAR_STR')"
  },
  {
    "nl": "List comprehension with an accumulator in range of 10",
    "cmd": "list(accumulate(list(range(10))))",
    "question_id": "20222485-17",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list",
      "python.library.functions#range"
    ],
    "clean_cmd": "list(accumulate(list(range())))",
    "canonical_cmd": "list(accumulate(list(range(10))))"
  },
  {
    "nl": "Create a default empty json object if no json is available in request parameter `mydata`",
    "cmd": "json.loads(request.POST.get('mydata', '{}'))",
    "question_id": "16436133-47",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.json#json.loads",
      "python.library.webbrowser#webbrowser.get"
    ],
    "clean_cmd": "json.loads(request.POST.get(, ))",
    "canonical_cmd": "json.loads(request.POST.get('VAR_STR', '{}'))"
  },
  {
    "nl": "get the largest index of the last occurrence of characters '([{' in string `test_string`",
    "cmd": "max(test_string.rfind(i) for i in '([{')",
    "question_id": "31950612-15",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#max",
      "python.library.stdtypes#str.rfind"
    ],
    "clean_cmd": "max(test_string.rfind(i)  i  )",
    "canonical_cmd": "max(VAR_STR.rfind(i) for i in 'VAR_STR')"
  },
  {
    "nl": "get all characters in string 'foobar' up to the fourth index",
    "cmd": "\"\"\"foobar\"\"\"[:4]",
    "question_id": "8247792-19",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[:]",
    "canonical_cmd": "\"\"\"VAR_STR\"\"\"[:4]"
  },
  {
    "nl": "cut a string by delimiter '&'",
    "cmd": "s.rfind('&')",
    "question_id": "8247792-29",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.rfind"
    ],
    "clean_cmd": "s.rfind()",
    "canonical_cmd": "s.rfind('VAR_STR')"
  },
  {
    "nl": "cut a string using delimiter '&'",
    "cmd": "s[:s.rfind('&')]",
    "question_id": "8247792-90",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.rfind"
    ],
    "clean_cmd": "s[:s.rfind()]",
    "canonical_cmd": "s[:s.rfind('VAR_STR')]"
  },
  {
    "nl": "count unique index values in column 'A' in pandas dataframe `ex`",
    "cmd": "ex.groupby(level='A').agg(lambda x: x.index.get_level_values(1).nunique())",
    "question_id": "35178812-30",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.index.get_level_values",
      "pandas.reference.api.pandas.index.nunique",
      "pandas.reference.api.pandas.index.groupby",
      "pandas.reference.api.pandas.series.agg"
    ],
    "clean_cmd": "ex.groupby(level=).agg( x: x.index.get_level_values().nunique())",
    "canonical_cmd": "VAR_STR.groupby(level='VAR_STR').agg(lambda x: x.index.get_level_values(1).\n    nunique())"
  },
  {
    "nl": "List of lists into numpy array",
    "cmd": "numpy.array([[1, 2], [3, 4]])",
    "question_id": "10346336-28",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.array"
    ],
    "clean_cmd": "numpy.array([[, ], [, ]])",
    "canonical_cmd": "numpy.array([[1, 2], [3, 4]])"
  },
  {
    "nl": "Create a tuple `t` containing first element of each tuple in tuple `s`",
    "cmd": "t = tuple(x[0] for x in s)",
    "question_id": "2054416-71",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#tuple"
    ],
    "clean_cmd": "t = tuple(x[]  x  s)",
    "canonical_cmd": "VAR_STR = tuple(x[0] for x in VAR_STR)"
  },
  {
    "nl": "BeautifulSoup get value associated with attribute 'content' where attribute 'name' is equal to 'City' in tag 'meta' in HTML parsed string `soup`",
    "cmd": "soup.find('meta', {'name': 'City'})['content']",
    "question_id": "11205386-94",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.find"
    ],
    "clean_cmd": "soup.find(, {: })[]",
    "canonical_cmd": "VAR_STR.find('VAR_STR', {'VAR_STR': 'VAR_STR'})['VAR_STR']"
  },
  {
    "nl": "pivot dataframe `df` so that values for `upc` become column headings and values for `saleid` become the index",
    "cmd": "df.pivot_table(index='saleid', columns='upc', aggfunc='size', fill_value=0)",
    "question_id": "39353758-58",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.pivot_table"
    ],
    "clean_cmd": "df.pivot_table(index=, columns=, aggfunc=, fill_value=)",
    "canonical_cmd": "VAR_STR.pivot_table(index='VAR_STR', columns='VAR_STR', aggfunc='size', fill_value=0)"
  },
  {
    "nl": "changing permission of file `path` to `stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH`",
    "cmd": "os.chmod(path, stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH)",
    "question_id": "16249440-75",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.chmod"
    ],
    "clean_cmd": "os.chmod(path, stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH)",
    "canonical_cmd": "os.chmod(VAR_STR, stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH)"
  },
  {
    "nl": "write dataframe `df` to csv file `filename` with dates formatted as yearmonthday  `%Y%m%d`",
    "cmd": "df.to_csv(filename, date_format='%Y%m%d')",
    "question_id": "13999850-72",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.to_csv"
    ],
    "clean_cmd": "df.to_csv(filename, date_format=)",
    "canonical_cmd": "VAR_STR.to_csv(VAR_STR, date_format='VAR_STR')"
  },
  {
    "nl": "replace NaN values in array `a` with zeros",
    "cmd": "b = np.where(np.isnan(a), 0, a)",
    "question_id": "1800187-72",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.isnan",
      "numpy.reference.generated.numpy.where"
    ],
    "clean_cmd": "b = np.where(np.isnan(a), , a)",
    "canonical_cmd": "b = np.where(np.isnan(VAR_STR), 0, VAR_STR)"
  },
  {
    "nl": "get a numpy array that contains the element wise minimum of three 3x1 arrays",
    "cmd": "np.array([np.arange(3), np.arange(2, -1, -1), np.ones((3,))]).min(axis=0)",
    "question_id": "39277638-98",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.arange",
      "numpy.reference.generated.numpy.ones",
      "numpy.reference.generated.numpy.array",
      "python.library.functions#min"
    ],
    "clean_cmd": "np.array([np.arange(), np.arange(, -, -), np.ones((,))]).min(axis=)",
    "canonical_cmd": "np.array([np.arange(3), np.arange(2, -1, -1), np.ones((3,))]).min(axis=0)"
  },
  {
    "nl": "obtain the current day of the week in a 3 letter format from a datetime object",
    "cmd": "datetime.datetime.now().strftime('%a')",
    "question_id": "15509617-37",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.datetime#datetime.datetime.now",
      "python.library.datetime#datetime.datetime.strftime"
    ],
    "clean_cmd": "datetime.datetime.now().strftime()",
    "canonical_cmd": "datetime.datetime.now().strftime('%a')"
  },
  {
    "nl": "dictionary `d` to string, custom format",
    "cmd": "\"\"\"<br/>\"\"\".join([('%s:: %s' % (key, value)) for key, value in list(d.items())])",
    "question_id": "8519599-86",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list",
      "python.library.stdtypes#dict.items",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": ".join([( % (key, value))  key, value  list(d.items())])",
    "canonical_cmd": "\"\"\"<br/>\"\"\".join([('%s:: %s' % (key, value)) for key, value in list(VAR_STR.\n    items())])"
  },
  {
    "nl": "select rows of dataframe `df` whose value for column `A` is `foo`",
    "cmd": "print(df.loc[df['A'] == 'foo'])",
    "question_id": "17071871-46",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.loc"
    ],
    "clean_cmd": "print(df.loc[df[] == ])",
    "canonical_cmd": "print(VAR_STR.loc[VAR_STR['VAR_STR'] == 'VAR_STR'])"
  },
  {
    "nl": "select rows whose column value in column `column_name` does not equal `some_value` in pandas data frame",
    "cmd": "df.loc[df['column_name'] != some_value]",
    "question_id": "17071871-76",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.loc"
    ],
    "clean_cmd": "df.loc[df[] != some_value]",
    "canonical_cmd": "df.loc[df['VAR_STR'] != VAR_STR]"
  },
  {
    "nl": "select rows from a dataframe `df` whose value for column `column_name` is not in `some_values`",
    "cmd": "df.loc[~df['column_name'].isin(some_values)]",
    "question_id": "17071871-51",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.loc",
      "pandas.reference.api.pandas.dataframe.isin"
    ],
    "clean_cmd": "df.loc[~df[].isin(some_values)]",
    "canonical_cmd": "VAR_STR.loc[~VAR_STR['VAR_STR'].isin(VAR_STR)]"
  },
  {
    "nl": "select all rows whose values in a column `column_name` equals a scalar `some_value` in pandas data frame object `df`",
    "cmd": "df.loc[df['column_name'] == some_value]",
    "question_id": "17071871-83",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.loc"
    ],
    "clean_cmd": "df.loc[df[] == some_value]",
    "canonical_cmd": "VAR_STR.loc[VAR_STR['VAR_STR'] == VAR_STR]"
  },
  {
    "nl": "Select rows whose value of the \"B\" column is \"one\" or \"three\" in the DataFrame `df`",
    "cmd": "print(df.loc[df['B'].isin(['one', 'three'])])",
    "question_id": "17071871-79",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.loc",
      "pandas.reference.api.pandas.dataframe.isin"
    ],
    "clean_cmd": "print(df.loc[df[].isin([, ])])",
    "canonical_cmd": "print(VAR_STR.loc[VAR_STR['VAR_STR'].isin(['VAR_STR', 'VAR_STR'])])"
  },
  {
    "nl": "get a list of locally installed Python modules",
    "cmd": "help('modules')",
    "question_id": "739993-25",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#help"
    ],
    "clean_cmd": "help()",
    "canonical_cmd": "help('modules')"
  },
  {
    "nl": "Enable the SO_REUSEADDR socket option in socket object `s` to fix the error `only one usage of each socket address is normally permitted`",
    "cmd": "s.setsockopt(SOL_SOCKET, SO_REUSEADDR, 1)",
    "question_id": "12362542-44",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "s.setsockopt(SOL_SOCKET, SO_REUSEADDR, )",
    "canonical_cmd": "VAR_STR.setsockopt(SOL_SOCKET, SO_REUSEADDR, 1)"
  },
  {
    "nl": "import all classes from module `some.package`",
    "cmd": "globals().update(importlib.import_module('some.package').__dict__)",
    "question_id": "4116061-59",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.importlib#importlib.import_module",
      "python.library.functions#globals",
      "python.library.turtle#turtle.update"
    ],
    "clean_cmd": "globals().update(importlib.import_module().__dict__)",
    "canonical_cmd": "globals().update(importlib.import_module('VAR_STR').__dict__)"
  },
  {
    "nl": "Find indices of elements equal to zero from numpy array `x`",
    "cmd": "numpy.where((x == 0))[0]",
    "question_id": "4588628-92",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.where"
    ],
    "clean_cmd": "numpy.where((x == ))[]",
    "canonical_cmd": "numpy.where(VAR_STR == 0)[0]"
  },
  {
    "nl": "create a json response `response_data`",
    "cmd": "return HttpResponse(json.dumps(response_data), content_type='application/json')",
    "question_id": "2428092-52",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.json#json.dumps"
    ],
    "clean_cmd": " HttpResponse(json.dumps(response_data), content_type=)",
    "canonical_cmd": "return HttpResponse(json.dumps(VAR_STR), content_type='application/json')"
  },
  {
    "nl": "Convert a string `s` containing hex bytes to a hex string",
    "cmd": "s.decode('hex')",
    "question_id": "10824319-17",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#bytearray.decode"
    ],
    "clean_cmd": "s.decode()",
    "canonical_cmd": "VAR_STR.decode('hex')"
  },
  {
    "nl": "convert a string `s` containing hex bytes to a hex string",
    "cmd": "binascii.a2b_hex(s)",
    "question_id": "10824319-49",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.binascii#binascii.a2b_hex"
    ],
    "clean_cmd": "binascii.a2b_hex(s)",
    "canonical_cmd": "binascii.a2b_hex(VAR_STR)"
  },
  {
    "nl": "zip list `a`, `b`, `c` into a list of tuples",
    "cmd": "[(x + tuple(y)) for x, y in zip(zip(a, b), c)]",
    "question_id": "12655007-87",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip",
      "python.library.functions#tuple"
    ],
    "clean_cmd": "[(x + tuple(y))  x, y  zip(zip(a, b), c)]",
    "canonical_cmd": "[(x + tuple(y)) for x, y in zip(zip(VAR_STR, VAR_STR), VAR_STR)]"
  },
  {
    "nl": "convert dictionary `adict` into string",
    "cmd": "\"\"\"\"\"\".join('{}{}'.format(key, val) for key, val in sorted(adict.items()))",
    "question_id": "10472907-88",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.functions#format",
      "python.library.stdtypes#dict.items",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": ".join(.format(key, val)  key, val  sorted(adict.items()))",
    "canonical_cmd": "\"\"\"\"\"\".join('{}{}'.format(key, val) for key, val in sorted(VAR_STR.items()))"
  },
  {
    "nl": "convert dictionary `adict` into string",
    "cmd": "\"\"\"\"\"\".join('{}{}'.format(key, val) for key, val in list(adict.items()))",
    "question_id": "10472907-5",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#format",
      "python.library.functions#list",
      "python.library.stdtypes#dict.items",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": ".join(.format(key, val)  key, val  list(adict.items()))",
    "canonical_cmd": "\"\"\"\"\"\".join('{}{}'.format(key, val) for key, val in list(VAR_STR.items()))"
  },
  {
    "nl": "convert matlab engine array `x` to a numpy ndarray",
    "cmd": "np.array(x._data).reshape(x.size[::-1]).T",
    "question_id": "34155829-56",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.array",
      "numpy.reference.generated.numpy.reshape"
    ],
    "clean_cmd": "np.array(x._data).reshape(x.size[::-]).T",
    "canonical_cmd": "np.array(VAR_STR._data).reshape(VAR_STR.size[::-1]).T"
  },
  {
    "nl": "Convert list of booleans `walls` into a hex string",
    "cmd": "hex(int(''.join([str(int(b)) for b in walls]), 2))",
    "question_id": "17731822-88",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#int",
      "python.library.functions#hex",
      "python.library.stdtypes#str",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": "hex(int(.join([str(int(b))  b  walls]), ))",
    "canonical_cmd": "hex(int(''.join([str(int(b)) for b in VAR_STR]), 2))"
  },
  {
    "nl": "convert the sum of list `walls` into a hex presentation",
    "cmd": "hex(sum(b << i for i, b in enumerate(reversed(walls))))",
    "question_id": "17731822-21",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#enumerate",
      "python.library.functions#reversed",
      "python.library.functions#hex",
      "python.library.functions#sum"
    ],
    "clean_cmd": "hex(sum(b &lt;&lt; i  i, b  enumerate(reversed(walls))))",
    "canonical_cmd": "hex(sum(b << i for i, b in enumerate(reversed(VAR_STR))))"
  },
  {
    "nl": "set the current working directory to 'c:\\\\Users\\\\uname\\\\desktop\\\\python'",
    "cmd": "os.chdir('c:\\\\Users\\\\uname\\\\desktop\\\\python')",
    "question_id": "1810743-81",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.chdir"
    ],
    "clean_cmd": "os.chdir()",
    "canonical_cmd": "os.chdir('VAR_STR')"
  },
  {
    "nl": "set the current working directory to path `path`",
    "cmd": "os.chdir(path)",
    "question_id": "1810743-84",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.chdir"
    ],
    "clean_cmd": "os.chdir(path)",
    "canonical_cmd": "os.chdir(VAR_STR)"
  },
  {
    "nl": "Check the status code of url \"http://www.stackoverflow.com\"",
    "cmd": "urllib.request.urlopen('http://www.stackoverflow.com').getcode()",
    "question_id": "1949318-13",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.request#urllib.request.urlopen"
    ],
    "clean_cmd": "urllib.request.urlopen().getcode()",
    "canonical_cmd": "urllib.request.urlopen('VAR_STR').getcode()"
  },
  {
    "nl": "Check the status code of url \"www.python.org\"",
    "cmd": "conn = httplib.HTTPConnection('www.python.org')\nconn.request('HEAD', '/')\nr1 = conn.getresponse()\nprint(r1.status, r1.reason)",
    "question_id": "1949318-62",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.http.client#http.client.HTTPConnection.getresponse",
      "python.library.http.client#http.client.HTTPConnection.request",
      "python.library.http.client#http.client.HTTPConnection"
    ],
    "clean_cmd": "conn = httplib.HTTPConnection()conn.request(, )r1 = conn.getresponse()print(r1.status, r1.reason)",
    "canonical_cmd": "conn = httplib.HTTPConnection('VAR_STR')\nconn.request('HEAD', '/')\nr1 = conn.getresponse()\nprint(r1.status, r1.reason)"
  },
  {
    "nl": "Check the status code of url `url`",
    "cmd": "r = requests.head(url)\nreturn (r.status_code == 200)",
    "question_id": "1949318-89",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.head"
    ],
    "clean_cmd": "r = requests.head(url) (r.status_code == )",
    "canonical_cmd": "r = requests.head(VAR_STR)\nreturn r.status_code == 200"
  },
  {
    "nl": "Checking if website \"http://www.stackoverflow.com\" is up",
    "cmd": "print(urllib.request.urlopen('http://www.stackoverflow.com').getcode())",
    "question_id": "1949318-26",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.request#urllib.request.urlopen"
    ],
    "clean_cmd": "print(urllib.request.urlopen().getcode())",
    "canonical_cmd": "print(urllib.request.urlopen('VAR_STR').getcode())"
  },
  {
    "nl": "rename column 'gdp' in dataframe `data` to 'log(gdp)'",
    "cmd": "data.rename(columns={'gdp': 'log(gdp)'}, inplace=True)",
    "question_id": "19758364-39",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.rename"
    ],
    "clean_cmd": "data.rename(columns={: }, inplace=True)",
    "canonical_cmd": "VAR_STR.rename(columns={'VAR_STR': 'VAR_STR'}, inplace=True)"
  },
  {
    "nl": "commit all the changes after executing a query.",
    "cmd": "dbb.commit()",
    "question_id": "15271907-69",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.msilib#msilib.CAB.commit"
    ],
    "clean_cmd": "dbb.commit()",
    "canonical_cmd": "dbb.commit()"
  },
  {
    "nl": "best way to extract subset of key-value pairs with keys matching 'l', 'm', or 'n' from python dictionary object",
    "cmd": "{k: bigdict[k] for k in list(bigdict.keys()) & {'l', 'm', 'n'}}",
    "question_id": "5352546-44",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list",
      "python.library.stdtypes#dict.keys"
    ],
    "clean_cmd": "{k: bigdict[k]  k  list(bigdict.keys()) &amp; {, , }}",
    "canonical_cmd": "{k: bigdict[k] for k in list(bigdict.keys()) & {'VAR_STR', 'VAR_STR', 'VAR_STR'}}"
  },
  {
    "nl": "extract subset of key-value pairs with keys as `('l', 'm', 'n')` from dictionary object `bigdict`",
    "cmd": "dict((k, bigdict[k]) for k in ('l', 'm', 'n'))",
    "question_id": "5352546-80",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict"
    ],
    "clean_cmd": "dict((k, bigdict[k])  k  (, , ))",
    "canonical_cmd": "dict((k, VAR_STR[k]) for k in (VAR_STR))"
  },
  {
    "nl": "Get items from a dictionary `bigdict` where the keys are present in `('l', 'm', 'n')`",
    "cmd": "{k: bigdict.get(k, None) for k in ('l', 'm', 'n')}",
    "question_id": "5352546-22",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict.get"
    ],
    "clean_cmd": "{k: bigdict.get(k, None)  k  (, , )}",
    "canonical_cmd": "{k: VAR_STR.get(k, None) for k in (VAR_STR)}"
  },
  {
    "nl": "Extract subset of key value pair for keys 'l', 'm', 'n' from `bigdict` in python 3",
    "cmd": "{k: bigdict[k] for k in ('l', 'm', 'n')}",
    "question_id": "5352546-75",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "{k: bigdict[k]  k  (, , )}",
    "canonical_cmd": "{k: VAR_STR[k] for k in ('VAR_STR', 'VAR_STR', 'VAR_STR')}"
  },
  {
    "nl": "decode JSON string `u` to a dictionary",
    "cmd": "json.load(u)",
    "question_id": "2331943-5",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.json#json.load"
    ],
    "clean_cmd": "json.load(u)",
    "canonical_cmd": "json.load(VAR_STR)"
  },
  {
    "nl": "Search for string 'blabla' in txt file 'example.txt'",
    "cmd": "if ('blabla' in open('example.txt').read()):\n    pass",
    "question_id": "4940032-13",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.request#open",
      "python.library.os#os.read"
    ],
    "clean_cmd": " (  open().read()):",
    "canonical_cmd": "if 'VAR_STR' in open('VAR_STR').read():\n    pass"
  },
  {
    "nl": "Search for string 'blabla' in txt file 'example.txt'",
    "cmd": "f = open('example.txt')\ns = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)\nif (s.find('blabla') != (-1)):\n    pass",
    "question_id": "4940032-15",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.request#open",
      "python.library.mmap#mmap.mmap.find",
      "python.library.fileinput#fileinput.fileno"
    ],
    "clean_cmd": "f = open()s = mmap.mmap(f.fileno(), , access=mmap.ACCESS_READ) (s.find() != (-)):",
    "canonical_cmd": "f = open('VAR_STR')\ns = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)\nif s.find('VAR_STR') != -1:\n    pass"
  },
  {
    "nl": "Search for string `blabla` in txt file 'example.txt'",
    "cmd": "datafile = file('example.txt')\nfound = False\nfor line in datafile:\n    if (blabla in line):\n        return True\nreturn False",
    "question_id": "4940032-22",
    "cmd_name": "conala",
    "oracle_man": [
      "django.ref.files.file#django.core.files.File.file"
    ],
    "clean_cmd": "datafile = file()found = False line  datafile: (blabla  line): True False",
    "canonical_cmd": "datafile = file('VAR_STR')\nfound = False\nfor line in datafile:\n    if VAR_STR in line:\n        return True\nreturn False"
  },
  {
    "nl": "request http url `url`",
    "cmd": "r = requests.get(url)",
    "question_id": "4476373-46",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.webbrowser#webbrowser.get"
    ],
    "clean_cmd": "r = requests.get(url)",
    "canonical_cmd": "r = requests.get(VAR_STR)"
  },
  {
    "nl": "request http url `url` with parameters `payload`",
    "cmd": "r = requests.get(url, params=payload)",
    "question_id": "4476373-22",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.webbrowser#webbrowser.get"
    ],
    "clean_cmd": "r = requests.get(url, params=payload)",
    "canonical_cmd": "r = requests.get(VAR_STR, params=VAR_STR)"
  },
  {
    "nl": "post request url `url` with parameters `payload`",
    "cmd": "r = requests.post(url, data=payload)",
    "question_id": "4476373-71",
    "cmd_name": "conala",
    "oracle_man": [
      "pygame.ref.fastevent#pygame.fastevent.post"
    ],
    "clean_cmd": "r = requests.post(url, data=payload)",
    "canonical_cmd": "r = requests.post(VAR_STR, data=VAR_STR)"
  },
  {
    "nl": "make an HTTP post request with data `post_data`",
    "cmd": "post_response = requests.post(url='http://httpbin.org/post', json=post_data)",
    "question_id": "4476373-99",
    "cmd_name": "conala",
    "oracle_man": [
      "pygame.ref.fastevent#pygame.fastevent.post"
    ],
    "clean_cmd": "post_response = requests.post(url=, json=post_data)",
    "canonical_cmd": "post_response = requests.post(url='http://httpbin.org/post', json=VAR_STR)"
  },
  {
    "nl": "convert 173 to binary string",
    "cmd": "bin(173)",
    "question_id": "1476-16",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#bin"
    ],
    "clean_cmd": "bin()",
    "canonical_cmd": "bin(173)"
  },
  {
    "nl": "convert binary string '01010101111' to integer",
    "cmd": "int('01010101111', 2)",
    "question_id": "1476-88",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#int"
    ],
    "clean_cmd": "int(, )",
    "canonical_cmd": "int('VAR_STR', 2)"
  },
  {
    "nl": "convert binary string '010101' to integer",
    "cmd": "int('010101', 2)",
    "question_id": "1476-71",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#int"
    ],
    "clean_cmd": "int(, )",
    "canonical_cmd": "int('VAR_STR', 2)"
  },
  {
    "nl": "convert binary string '0b0010101010' to integer",
    "cmd": "int('0b0010101010', 2)",
    "question_id": "1476-39",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#int"
    ],
    "clean_cmd": "int(, )",
    "canonical_cmd": "int('VAR_STR', 2)"
  },
  {
    "nl": "convert 21 to binary string",
    "cmd": "bin(21)",
    "question_id": "1476-6",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#bin"
    ],
    "clean_cmd": "bin()",
    "canonical_cmd": "bin(21)"
  },
  {
    "nl": "convert binary string '11111111' to integer",
    "cmd": "int('11111111', 2)",
    "question_id": "1476-59",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#int"
    ],
    "clean_cmd": "int(, )",
    "canonical_cmd": "int('VAR_STR', 2)"
  },
  {
    "nl": "get proportion of rows in dataframe `trace_df` whose values for column `ratio` are greater than 0",
    "cmd": "(trace_df['ratio'] > 0).mean()",
    "question_id": "41178532-76",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.statistics#statistics.mean"
    ],
    "clean_cmd": "(trace_df[] &gt; ).mean()",
    "canonical_cmd": "(VAR_STR['VAR_STR'] > 0).mean()"
  },
  {
    "nl": "get all possible combination of items from 2-dimensional list `a`",
    "cmd": "list(itertools.product(*a))",
    "question_id": "8249836-41",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.itertools#itertools.product",
      "python.library.functions#list"
    ],
    "clean_cmd": "list(itertools.product(*a))",
    "canonical_cmd": "list(itertools.product(*VAR_STR))"
  },
  {
    "nl": "iterate over a dictionary `d` in sorted order",
    "cmd": "it = iter(sorted(d.items()))",
    "question_id": "364519-94",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.functions#iter",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "it = iter(sorted(d.items()))",
    "canonical_cmd": "it = iter(sorted(VAR_STR.items()))"
  },
  {
    "nl": "iterate over a dictionary `d` in sorted order",
    "cmd": "for (key, value) in sorted(d.items()):\n    pass",
    "question_id": "364519-100",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": " (key, value)  sorted(d.items()):",
    "canonical_cmd": "for key, value in sorted(VAR_STR.items()):\n    pass"
  },
  {
    "nl": "iterate over a dictionary `dict` in sorted order",
    "cmd": "return sorted(dict.items())",
    "question_id": "364519-2",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict.items",
      "python.library.functions#sorted"
    ],
    "clean_cmd": " sorted(dict.items())",
    "canonical_cmd": "return sorted(VAR_STR.items())"
  },
  {
    "nl": "iterate over a dictionary `dict` in sorted order",
    "cmd": "return iter(sorted(dict.items()))",
    "question_id": "364519-33",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict.items",
      "python.library.functions#sorted",
      "python.library.functions#iter"
    ],
    "clean_cmd": " iter(sorted(dict.items()))",
    "canonical_cmd": "return iter(sorted(VAR_STR.items()))"
  },
  {
    "nl": "iterate over a dictionary `foo` in sorted order",
    "cmd": "for (k, v) in sorted(foo.items()):\n    pass",
    "question_id": "364519-52",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": " (k, v)  sorted(foo.items()):",
    "canonical_cmd": "for k, v in sorted(VAR_STR.items()):\n    pass"
  },
  {
    "nl": "iterate over a dictionary `foo` sorted by the key",
    "cmd": "for k in sorted(foo.keys()):\n    pass",
    "question_id": "364519-98",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.stdtypes#dict.keys"
    ],
    "clean_cmd": " k  sorted(foo.keys()):",
    "canonical_cmd": "for k in sorted(VAR_STR.keys()):\n    pass"
  },
  {
    "nl": "lower-case the string obtained by replacing the occurrences of regex pattern '(?<=[a-z])([A-Z])' in string `s` with eplacement '-\\\\1'",
    "cmd": "re.sub('(?<=[a-z])([A-Z])', '-\\\\1', s).lower()",
    "question_id": "39414085-16",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub",
      "python.library.stdtypes#str.lower"
    ],
    "clean_cmd": "re.sub(, , s).lower()",
    "canonical_cmd": "re.sub('VAR_STR', 'VAR_STR', VAR_STR).lower()"
  },
  {
    "nl": "convert a tensor with list of constants `[1, 2, 3]` into a numpy array in tensorflow",
    "cmd": "print(type(tf.Session().run(tf.constant([1, 2, 3]))))",
    "question_id": "34097281-53",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#type",
      "python.library.ast#ast.Constant",
      "flask.api.index#flask.session",
      "python.library.pdb#pdb.run"
    ],
    "clean_cmd": "print(type(tf.Session().run(tf.constant([, , ]))))",
    "canonical_cmd": "print(type(tf.Session().run(tf.constant([VAR_STR]))))"
  },
  {
    "nl": "Initialize a pandas series object `s` with columns `['A', 'B', 'A1R', 'B2', 'AABB4']`",
    "cmd": "s = pd.Series(['A', 'B', 'A1R', 'B2', 'AABB4'])",
    "question_id": "39816795-71",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.series"
    ],
    "clean_cmd": "s = pd.Series([, , , , ])",
    "canonical_cmd": "VAR_STR = pd.Series([VAR_STR])"
  },
  {
    "nl": "get set intersection between dictionaries `d1` and `d2`",
    "cmd": "dict((x, set(y) & set(d1.get(x, ()))) for x, y in d2.items())",
    "question_id": "638360-2",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#set",
      "python.library.stdtypes#dict",
      "python.library.stdtypes#dict.items",
      "python.library.stdtypes#dict.get"
    ],
    "clean_cmd": "dict((x, set(y) &amp; set(d1.get(x, ())))  x, y  d2.items())",
    "canonical_cmd": "dict((x, set(y) & set(VAR_STR.get(x, ()))) for x, y in VAR_STR.items())"
  },
  {
    "nl": "construct pandas dataframe from a list of tuples",
    "cmd": "df = pandas.DataFrame(data, columns=['R_Number', 'C_Number', 'Avg', 'Std'])",
    "question_id": "19961490-16",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe"
    ],
    "clean_cmd": "df = pandas.DataFrame(data, columns=[, , , ])",
    "canonical_cmd": "df = pandas.DataFrame(data, columns=['R_Number', 'C_Number', 'Avg', 'Std'])"
  },
  {
    "nl": "converting dictionary `d` into a dataframe `pd` with keys as data for column 'Date' and the corresponding values as data for column 'DateValue'",
    "cmd": "pd.DataFrame(list(d.items()), columns=['Date', 'DateValue'])",
    "question_id": "18837262-55",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe",
      "python.library.functions#list",
      "pandas.reference.api.pandas.dataframe.items"
    ],
    "clean_cmd": "pd.DataFrame(list(d.items()), columns=[, ])",
    "canonical_cmd": "VAR_STR.DataFrame(list(VAR_STR.items()), columns=['VAR_STR', 'VAR_STR'])"
  },
  {
    "nl": "generate a random number in 1 to 7 with a given distribution [0.1, 0.05, 0.05, 0.2, 0.4, 0.2]",
    "cmd": "numpy.random.choice(numpy.arange(1, 7), p=[0.1, 0.05, 0.05, 0.2, 0.4, 0.2])",
    "question_id": "4265988-26",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.random.generated.numpy.random.choice",
      "numpy.reference.generated.numpy.arange"
    ],
    "clean_cmd": "numpy.random.choice(numpy.arange(, ), p=[0.1, 0.05, 0.05, 0.2, 0.4, 0.2])",
    "canonical_cmd": "numpy.random.choice(numpy.arange(1, 7), p=[0.1, 0.05, 0.05, 0.2, 0.4, 0.2])"
  },
  {
    "nl": "Convert dataframe `df` to a pivot table using column 'year', 'month', and 'item' as indexes",
    "cmd": "df.set_index(['year', 'month', 'item']).unstack(level=-1)",
    "question_id": "35414625-11",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.set_index",
      "pandas.reference.api.pandas.dataframe.unstack"
    ],
    "clean_cmd": "df.set_index([, , ]).unstack(level=-)",
    "canonical_cmd": "VAR_STR.set_index(['VAR_STR', 'VAR_STR', 'VAR_STR']).unstack(level=-1)"
  },
  {
    "nl": "run a pivot with a multi-index `year` and `month` in a pandas data frame",
    "cmd": "df.pivot_table(values='value', index=['year', 'month'], columns='item')",
    "question_id": "35414625-17",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.pivot_table"
    ],
    "clean_cmd": "df.pivot_table(values=, index=[, ], columns=)",
    "canonical_cmd": "df.pivot_table(values='value', index=['VAR_STR', 'VAR_STR'], columns='item')"
  },
  {
    "nl": "get output from process `p1`",
    "cmd": "p1.communicate()[0]",
    "question_id": "748028-31",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.subprocess#subprocess.Popen.communicate"
    ],
    "clean_cmd": "p1.communicate()[]",
    "canonical_cmd": "VAR_STR.communicate()[0]"
  },
  {
    "nl": "How to get output of exe in python script?",
    "cmd": "output = subprocess.Popen(['mycmd', 'myarg'], stdout=PIPE).communicate()[0]",
    "question_id": "748028-100",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.subprocess#subprocess.Popen",
      "python.library.subprocess#subprocess.Popen.communicate"
    ],
    "clean_cmd": "output = subprocess.Popen([, ], stdout=PIPE).communicate()[]",
    "canonical_cmd": "output = subprocess.Popen(['mycmd', 'myarg'], stdout=PIPE).communicate()[0]"
  },
  {
    "nl": "get all sub-elements of an element `a` in an elementtree",
    "cmd": "[elem.tag for elem in a.iter()]",
    "question_id": "10408927-95",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#iter"
    ],
    "clean_cmd": "[elem.tag  elem  a.iter()]",
    "canonical_cmd": "[elem.tag for elem in VAR_STR.iter()]"
  },
  {
    "nl": "get all sub-elements of an element tree `a` excluding the root element",
    "cmd": "[elem.tag for elem in a.iter() if elem is not a]",
    "question_id": "10408927-56",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#iter"
    ],
    "clean_cmd": "[elem.tag  elem  a.iter()  elem   a]",
    "canonical_cmd": "[elem.tag for elem in VAR_STR.iter() if elem is not VAR_STR]"
  },
  {
    "nl": "get the next value greatest to `2` from a list of numbers `num_list`",
    "cmd": "min([x for x in num_list if x > 2])",
    "question_id": "29471884-33",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#min"
    ],
    "clean_cmd": "min([x  x  num_list  x &gt; ])",
    "canonical_cmd": "min([x for x in VAR_STR if x > 2])"
  },
  {
    "nl": "remove item \"b\" in list `a`",
    "cmd": "a.remove('b')",
    "question_id": "2793324-81",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#frozenset.remove"
    ],
    "clean_cmd": "a.remove()",
    "canonical_cmd": "VAR_STR.remove('VAR_STR')"
  },
  {
    "nl": "remove item `c` in list `a`",
    "cmd": "a.remove(c)",
    "question_id": "2793324-12",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#frozenset.remove"
    ],
    "clean_cmd": "a.remove(c)",
    "canonical_cmd": "VAR_STR.remove(VAR_STR)"
  },
  {
    "nl": "delete the element 6 from list `a`",
    "cmd": "a.remove(6)",
    "question_id": "2793324-93",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#frozenset.remove"
    ],
    "clean_cmd": "a.remove()",
    "canonical_cmd": "VAR_STR.remove(6)"
  },
  {
    "nl": "delete the element 6 from list `a`",
    "cmd": "a.remove(6)",
    "question_id": "2793324-3",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#frozenset.remove"
    ],
    "clean_cmd": "a.remove()",
    "canonical_cmd": "VAR_STR.remove(6)"
  },
  {
    "nl": "delete the element `c` from list `a`",
    "cmd": "if (c in a):\n    a.remove(c)",
    "question_id": "2793324-97",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#frozenset.remove"
    ],
    "clean_cmd": " (c  a):a.remove(c)",
    "canonical_cmd": "if VAR_STR in VAR_STR:\n    VAR_STR.remove(VAR_STR)"
  },
  {
    "nl": "delete the element `c` from list `a`",
    "cmd": "try:\n    a.remove(c)\nexcept ValueError:\n    pass",
    "question_id": "2793324-5",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#frozenset.remove"
    ],
    "clean_cmd": ":a.remove(c) ValueError:",
    "canonical_cmd": "try:\n    VAR_STR.remove(VAR_STR)\nexcept ValueError:\n    pass"
  },
  {
    "nl": "call a function `otherfunc` inside a bash script `test.sh` using subprocess",
    "cmd": "subprocess.call('test.sh otherfunc')",
    "question_id": "5826427-6",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.subprocess#subprocess.call"
    ],
    "clean_cmd": "subprocess.call()",
    "canonical_cmd": "subprocess.call('test.sh otherfunc')"
  },
  {
    "nl": "Can a python script execute a function inside a bash script?",
    "cmd": "subprocess.Popen(['bash', '-c', '. foo.sh; go'])",
    "question_id": "5826427-73",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.subprocess#subprocess.Popen"
    ],
    "clean_cmd": "subprocess.Popen([, , ])",
    "canonical_cmd": "subprocess.Popen(['bash', '-c', '. foo.sh; go'])"
  },
  {
    "nl": "write pandas dataframe `df` to the file 'c:\\\\data\\\\t.csv' without row names",
    "cmd": "df.to_csv('c:\\\\data\\\\t.csv', index=False)",
    "question_id": "30605909-89",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.to_csv"
    ],
    "clean_cmd": "df.to_csv(, index=False)",
    "canonical_cmd": "VAR_STR.to_csv('VAR_STR', index=False)"
  },
  {
    "nl": "match regex pattern '(\\\\d+(\\\\.\\\\d+)?)' with string '3434.35353'",
    "cmd": "print(re.match('(\\\\d+(\\\\.\\\\d+)?)', '3434.35353').group(1))",
    "question_id": "9891814-38",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.match",
      "python.library.re#re.Match.group"
    ],
    "clean_cmd": "print(re.match(, ).group())",
    "canonical_cmd": "print(re.match('VAR_STR', 'VAR_STR').group(1))"
  },
  {
    "nl": "filter the objects in django model 'Sample' between date range `2011-01-01` and `2011-01-31`",
    "cmd": "Sample.objects.filter(date__range=['2011-01-01', '2011-01-31'])",
    "question_id": "4668619-32",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.logging#logging.Filter.filter"
    ],
    "clean_cmd": "Sample.objects.filter(date__range=[, ])",
    "canonical_cmd": "VAR_STR.objects.filter(date__range=['VAR_STR', 'VAR_STR'])"
  },
  {
    "nl": "filter objects month wise in django model `Sample` for year `2011`",
    "cmd": "Sample.objects.filter(date__year='2011', date__month='01')",
    "question_id": "4668619-26",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.logging#logging.Filter.filter"
    ],
    "clean_cmd": "Sample.objects.filter(date__year=, date__month=)",
    "canonical_cmd": "VAR_STR.objects.filter(date__year='VAR_STR', date__month='01')"
  },
  {
    "nl": "Open image 'picture.jpg'",
    "cmd": "img = Image.open('picture.jpg')\nimg.show()",
    "question_id": "16387069-81",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.request#open"
    ],
    "clean_cmd": "img = Image.open()img.show()",
    "canonical_cmd": "img = Image.open('VAR_STR')\nimg.show()"
  },
  {
    "nl": "Open image \"picture.jpg\"",
    "cmd": "img = Image.open('picture.jpg')\nImg.show",
    "question_id": "16387069-86",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.request#open"
    ],
    "clean_cmd": "img = Image.open()Img.show",
    "canonical_cmd": "img = Image.open('VAR_STR')\nImg.show"
  },
  {
    "nl": "write the content of file `xxx.mp4` to file `f`",
    "cmd": "f.write(open('xxx.mp4', 'rb').read())",
    "question_id": "12426043-1",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.request#open",
      "python.library.os#os.read",
      "python.library.os#os.write"
    ],
    "clean_cmd": "f.write(open(, ).read())",
    "canonical_cmd": "VAR_STR.write(open('VAR_STR', 'rb').read())"
  },
  {
    "nl": "Sort the values of the dataframe `df` and align the columns accordingly based on the obtained indices after np.argsort.",
    "cmd": "pd.DataFrame(df.columns[np.argsort(df.values)], df.index, np.unique(df.values))",
    "question_id": "41572822-28",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe",
      "numpy.reference.generated.numpy.argsort",
      "numpy.reference.generated.numpy.unique"
    ],
    "clean_cmd": "pd.DataFrame(df.columns[np.argsort(df.values)], df.index, np.unique(df.values))",
    "canonical_cmd": "pd.DataFrame(VAR_STR.columns[np.argsort(VAR_STR.values)], VAR_STR.index, np.\n    unique(VAR_STR.values))"
  },
  {
    "nl": "remove duplicate rows from dataframe `df1` and calculate their frequency",
    "cmd": "df1.groupby(['key', 'year']).size().reset_index()",
    "question_id": "21558999-5",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.core.groupby.groupby.size",
      "pandas.reference.api.pandas.dataframe.reset_index"
    ],
    "clean_cmd": "df1.groupby([, ]).size().reset_index()",
    "canonical_cmd": "VAR_STR.groupby(['key', 'year']).size().reset_index()"
  },
  {
    "nl": "Get average for every three columns in `df` dataframe",
    "cmd": "df.groupby(np.arange(len(df.columns)) // 3, axis=1).mean()",
    "question_id": "40963347-42",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.groupby",
      "python.library.functions#len",
      "numpy.reference.generated.numpy.arange",
      "pandas.reference.api.pandas.dataframe.mean"
    ],
    "clean_cmd": "df.groupby(np.arange(len(df.columns)) // , axis=).mean()",
    "canonical_cmd": "VAR_STR.groupby(np.arange(len(VAR_STR.columns)) // 3, axis=1).mean()"
  },
  {
    "nl": "django redirect to view 'Home.views.index'",
    "cmd": "redirect('Home.views.index')",
    "question_id": "7284952-22",
    "cmd_name": "conala",
    "oracle_man": [
      "flask.api.index#flask.redirect"
    ],
    "clean_cmd": "redirect()",
    "canonical_cmd": "redirect('VAR_STR')"
  },
  {
    "nl": "get a list of keys of dictionary `things` sorted by the value of nested dictionary key 'weight'",
    "cmd": "sorted(list(things.keys()), key=lambda x: things[x]['weight'], reverse=True)",
    "question_id": "42352887-25",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.functions#list",
      "python.library.stdtypes#dict.keys"
    ],
    "clean_cmd": "sorted(list(things.keys()), key= x: things[x][], reverse=True)",
    "canonical_cmd": "sorted(list(VAR_STR.keys()), key=lambda x: VAR_STR[x]['VAR_STR'], reverse=True)"
  },
  {
    "nl": "get the path of module `a_module`",
    "cmd": "print(a_module.__file__)",
    "question_id": "247770-78",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "print(a_module.__file__)",
    "canonical_cmd": "print(VAR_STR.__file__)"
  },
  {
    "nl": "get the path of the current python module",
    "cmd": "print(os.getcwd())",
    "question_id": "247770-2",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.getcwd"
    ],
    "clean_cmd": "print(os.getcwd())",
    "canonical_cmd": "print(os.getcwd())"
  },
  {
    "nl": "get the path of the python module `amodule`",
    "cmd": "path = os.path.abspath(amodule.__file__)",
    "question_id": "247770-13",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os.path#os.path.abspath"
    ],
    "clean_cmd": "path = os.path.abspath(amodule.__file__)",
    "canonical_cmd": "path = os.path.abspath(VAR_STR.__file__)"
  },
  {
    "nl": "sum the values in each row of every two adjacent columns in dataframe `df`",
    "cmd": "df.groupby(np.arange(len(df.columns)) // 2 + 1, axis=1).sum().add_prefix('s')",
    "question_id": "40660956-52",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.groupby",
      "python.library.functions#len",
      "numpy.reference.generated.numpy.arange",
      "python.library.functions#sum",
      "pandas.reference.api.pandas.dataframe.add_prefix"
    ],
    "clean_cmd": "df.groupby(np.arange(len(df.columns)) //  + , axis=).sum().add_prefix()",
    "canonical_cmd": "VAR_STR.groupby(np.arange(len(VAR_STR.columns)) // 2 + 1, axis=1).sum().add_prefix(\n    's')"
  },
  {
    "nl": "display current time",
    "cmd": "now = datetime.datetime.now().strftime('%H:%M:%S')",
    "question_id": "5110352-21",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.datetime#datetime.datetime.strftime"
    ],
    "clean_cmd": "now = datetime.datetime.now().strftime()",
    "canonical_cmd": "now = datetime.datetime.now().strftime('%H:%M:%S')"
  },
  {
    "nl": "decode a hex string '4a4b4c' to UTF-8.",
    "cmd": "bytes.fromhex('4a4b4c').decode('utf-8')",
    "question_id": "3283984-56",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#bytes.fromhex",
      "python.library.stdtypes#bytes.decode"
    ],
    "clean_cmd": "bytes.fromhex().decode()",
    "canonical_cmd": "bytes.fromhex('VAR_STR').decode('utf-8')"
  },
  {
    "nl": "capture final output of a chain of system commands `ps -ef | grep something | wc -l`",
    "cmd": "subprocess.check_output('ps -ef | grep something | wc -l', shell=True)",
    "question_id": "14971373-85",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.subprocess#subprocess.check_output"
    ],
    "clean_cmd": "subprocess.check_output(, shell=True)",
    "canonical_cmd": "subprocess.check_output('VAR_STR', shell=True)"
  },
  {
    "nl": "find intersection data between series `s1` and series `s2`",
    "cmd": "pd.Series(list(set(s1).intersection(set(s2))))",
    "question_id": "18079563-10",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#set",
      "python.library.functions#list",
      "pandas.reference.series"
    ],
    "clean_cmd": "pd.Series(list(set(s1).intersection(set(s2))))",
    "canonical_cmd": "pd.Series(list(set(VAR_STR).intersection(set(VAR_STR))))"
  },
  {
    "nl": "Format a datetime string `when` to extract date only",
    "cmd": "then = datetime.datetime.strptime(when, '%Y-%m-%d').date()",
    "question_id": "26153795-98",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.datetime#datetime.datetime.strptime",
      "python.library.datetime#datetime.datetime.date"
    ],
    "clean_cmd": "then = datetime.datetime.strptime(when, ).date()",
    "canonical_cmd": "then = datetime.datetime.strptime(VAR_STR, '%Y-%m-%d').date()"
  },
  {
    "nl": "get the first object from a queryset in django model `Entry`",
    "cmd": "Entry.objects.filter()[:1].get()",
    "question_id": "5123839-3",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.logging#logging.Filter.filter",
      "python.library.webbrowser#webbrowser.get"
    ],
    "clean_cmd": "Entry.objects.filter()[:].get()",
    "canonical_cmd": "VAR_STR.objects.filter()[:1].get()"
  },
  {
    "nl": "enable warnings using action 'always'",
    "cmd": "warnings.simplefilter('always')",
    "question_id": "29784889-2",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.warnings#warnings.simplefilter"
    ],
    "clean_cmd": "warnings.simplefilter()",
    "canonical_cmd": "warnings.simplefilter('VAR_STR')"
  },
  {
    "nl": "download a file \"http://www.example.com/songs/mp3.mp3\" over HTTP and save to \"mp3.mp3\"",
    "cmd": "urllib.request.urlretrieve('http://www.example.com/songs/mp3.mp3', 'mp3.mp3')",
    "question_id": "22676-23",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.request#urllib.request.urlretrieve"
    ],
    "clean_cmd": "urllib.request.urlretrieve(, )",
    "canonical_cmd": "urllib.request.urlretrieve('VAR_STR', 'VAR_STR')"
  },
  {
    "nl": "download a file `url` over HTTP and save to `file_name`",
    "cmd": "u = urllib.request.urlopen(url)\nf = open(file_name, 'wb')\nmeta = u.info()\nfile_size = int(meta.getheaders('Content-Length')[0])\nprint(('Downloading: %s Bytes: %s' % (file_name, file_size)))\nfile_size_dl = 0\nblock_sz = 8192\nwhile True:\n    buffer = u.read(block_sz)\n    if (not buffer):\n        break\n    file_size_dl += len(buffer)\n    f.write(buffer)\n    status = ('%10d  [%3.2f%%]' % (file_size_dl, ((file_size_dl * 100.0) / file_size)))\n    status = (status + (chr(8) * (len(status) + 1)))\n    print(status, end=' ')\nf.close()",
    "question_id": "22676-91",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len",
      "python.library.urllib.request#urllib.request.urlopen",
      "python.library.functions#chr",
      "python.library.functions#int",
      "python.library.urllib.request#open",
      "python.library.urllib.request#urllib.request.BaseHandler.close",
      "python.library.urllib.request#urllib.response.addinfourl.info"
    ],
    "clean_cmd": "u = urllib.request.urlopen(url)f = open(file_name, )meta = u.info()file_size = int(meta.getheaders()[])print(( % (file_name, file_size)))file_size_dl = block_sz =  True:buffer = u.read(block_sz) ( buffer):file_size_dl += len(buffer)f.write(buffer)status = ( % (file_size_dl, ((file_size_dl * 100.0) / file_size)))status = (status + (chr() * (len(status) + )))print(status, end=)f.close()",
    "canonical_cmd": "u = urllib.request.urlopen(VAR_STR)\nf = open(VAR_STR, 'wb')\nmeta = u.info()\nfile_size = int(meta.getheaders('Content-Length')[0])\nprint('Downloading: %s Bytes: %s' % (VAR_STR, file_size))\nfile_size_dl = 0\nblock_sz = 8192\nwhile True:\n    buffer = u.read(block_sz)\n    if not buffer:\n        break\n    file_size_dl += len(buffer)\n    f.write(buffer)\n    status = '%10d  [%3.2f%%]' % (file_size_dl, file_size_dl * 100.0 /\n        file_size)\n    status = status + chr(8) * (len(status) + 1)\n    print(status, end=' ')\nf.close()"
  },
  {
    "nl": "download a file 'http://www.example.com/' over HTTP",
    "cmd": "response = urllib.request.urlopen('http://www.example.com/')\nhtml = response.read()",
    "question_id": "22676-14",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.request#urllib.request.urlopen",
      "python.library.urllib.robotparser#urllib.robotparser.RobotFileParser.read"
    ],
    "clean_cmd": "response = urllib.request.urlopen()html = response.read()",
    "canonical_cmd": "response = urllib.request.urlopen('VAR_STR')\nhtml = response.read()"
  },
  {
    "nl": "download a file `url` over HTTP",
    "cmd": "r = requests.get(url)",
    "question_id": "22676-1",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.webbrowser#webbrowser.get"
    ],
    "clean_cmd": "r = requests.get(url)",
    "canonical_cmd": "r = requests.get(VAR_STR)"
  },
  {
    "nl": "download a file `url` over HTTP and save to \"10MB\"",
    "cmd": "response = requests.get(url, stream=True)\nwith open('10MB', 'wb') as handle:\n    for data in tqdm(response.iter_content()):\n        handle.write(data)",
    "question_id": "22676-64",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.request#open",
      "matplotlib.backend_pdf_api#matplotlib.backends.backend_pdf.Stream.write"
    ],
    "clean_cmd": "response = requests.get(url, stream=True) open(, )  handle: data  tqdm(response.iter_content()):handle.write(data)",
    "canonical_cmd": "response = requests.get(VAR_STR, stream=True)\nwith open('VAR_STR', 'wb') as handle:\n    for data in tqdm(response.iter_content()):\n        handle.write(data)"
  },
  {
    "nl": "Multiply a matrix `P` with a 3d tensor `T` in scipy",
    "cmd": "scipy.tensordot(P, T, axes=[1, 1]).swapaxes(0, 1)",
    "question_id": "4490961-76",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.tensordot",
      "numpy.reference.generated.numpy.swapaxes"
    ],
    "clean_cmd": "scipy.tensordot(P, T, axes=[, ]).swapaxes(, )",
    "canonical_cmd": "scipy.tensordot(VAR_STR, VAR_STR, axes=[1, 1]).swapaxes(0, 1)"
  },
  {
    "nl": "convert hex string '470FC614' to a float number",
    "cmd": "struct.unpack('!f', '470FC614'.decode('hex'))[0]",
    "question_id": "1592158-5",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.struct#struct.unpack",
      "python.library.stdtypes#bytearray.decode"
    ],
    "clean_cmd": "struct.unpack(, .decode())[]",
    "canonical_cmd": "struct.unpack('!f', 'VAR_STR'.decode('hex'))[0]"
  },
  {
    "nl": "reverse the list that contains 1 to 10",
    "cmd": "list(reversed(list(range(10))))",
    "question_id": "7286365-38",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list",
      "python.library.functions#range",
      "python.library.functions#reversed"
    ],
    "clean_cmd": "list(reversed(list(range())))",
    "canonical_cmd": "list(reversed(list(range(10))))"
  },
  {
    "nl": "get the date 7 days before the current date",
    "cmd": "(datetime.datetime.now() - datetime.timedelta(days=7)).date()",
    "question_id": "20573459-83",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.datetime#datetime.datetime.now",
      "python.library.datetime#datetime.timedelta",
      "python.library.datetime#datetime.datetime.date"
    ],
    "clean_cmd": "(datetime.datetime.now() - datetime.timedelta(days=)).date()",
    "canonical_cmd": "(datetime.datetime.now() - datetime.timedelta(days=7)).date()"
  },
  {
    "nl": "encode binary string 'your string' to base64 code",
    "cmd": "base64.b64encode(bytes('your string', 'utf-8'))",
    "question_id": "23164058-67",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.base64#base64.b64encode",
      "python.library.functions#bytes"
    ],
    "clean_cmd": "base64.b64encode(bytes(, ))",
    "canonical_cmd": "base64.b64encode(bytes('VAR_STR', 'utf-8'))"
  },
  {
    "nl": "identify duplicate values in list `mylist`",
    "cmd": "[k for k, v in list(Counter(mylist).items()) if v > 1]",
    "question_id": "11236006-9",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list",
      "python.library.collections#collections.Counter",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "[k  k, v  list(Counter(mylist).items())  v &gt; ]",
    "canonical_cmd": "[k for k, v in list(Counter(VAR_STR).items()) if v > 1]"
  },
  {
    "nl": "write the data of dataframe `df` into text file `np.txt`",
    "cmd": "np.savetxt('c:\\\\data\\\\np.txt', df.values, fmt='%d')",
    "question_id": "31247198-48",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.savetxt"
    ],
    "clean_cmd": "np.savetxt(, df.values, fmt=)",
    "canonical_cmd": "np.savetxt('c:\\\\data\\\\np.txt', VAR_STR.values, fmt='%d')"
  },
  {
    "nl": "write content of DataFrame `df` into text file 'c:\\\\data\\\\pandas.txt'",
    "cmd": "df.to_csv('c:\\\\data\\\\pandas.txt', header=None, index=None, sep=' ', mode='a')",
    "question_id": "31247198-21",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.to_csv"
    ],
    "clean_cmd": "df.to_csv(, header=None, index=None, sep=, mode=)",
    "canonical_cmd": "VAR_STR.to_csv('VAR_STR', header=None, index=None, sep=' ', mode='a')"
  },
  {
    "nl": "Split a string `x` by last occurrence of character `-`",
    "cmd": "print(x.rpartition('-')[0])",
    "question_id": "15851568-6",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.rpartition"
    ],
    "clean_cmd": "print(x.rpartition()[])",
    "canonical_cmd": "print(VAR_STR.rpartition('VAR_STR')[0])"
  },
  {
    "nl": "get the last part of a string before the character '-'",
    "cmd": "print(x.rsplit('-', 1)[0])",
    "question_id": "15851568-9",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.rsplit"
    ],
    "clean_cmd": "print(x.rsplit(, )[])",
    "canonical_cmd": "print(x.rsplit('VAR_STR', 1)[0])"
  },
  {
    "nl": "create a random list of integers",
    "cmd": "[int(1000 * random.random()) for i in range(10000)]",
    "question_id": "4172131-18",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#range",
      "python.library.functions#int"
    ],
    "clean_cmd": "[int( * random.random())  i  range()]",
    "canonical_cmd": "[int(1000 * random.random()) for i in range(10000)]"
  },
  {
    "nl": "Using %f with strftime() in Python to get microseconds",
    "cmd": "datetime.datetime.now().strftime('%H:%M:%S.%f')",
    "question_id": "6677332-3",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.datetime#datetime.datetime.now",
      "python.library.datetime#datetime.datetime.strftime"
    ],
    "clean_cmd": "datetime.datetime.now().strftime()",
    "canonical_cmd": "datetime.datetime.now().strftime('%H:%M:%S.%f')"
  },
  {
    "nl": "print a 2 dimensional list `tab` as a table with delimiters",
    "cmd": "print('\\n'.join('\\t'.join(str(col) for col in row) for row in tab))",
    "question_id": "583557-14",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": "print(.join(.join(str(col)  col  row)  row  tab))",
    "canonical_cmd": "print('\\n'.join('\\t'.join(str(col) for col in row) for row in VAR_STR))"
  },
  {
    "nl": "count non zero values in each column in pandas data frame",
    "cmd": "df.astype(bool).sum(axis=1)",
    "question_id": "26053849-22",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.astype",
      "python.library.functions#sum"
    ],
    "clean_cmd": "df.astype(bool).sum(axis=)",
    "canonical_cmd": "df.astype(bool).sum(axis=1)"
  },
  {
    "nl": "lower a string `text` and remove non-alphanumeric characters aside from space",
    "cmd": "re.sub('[^\\\\sa-zA-Z0-9]', '', text).lower().strip()",
    "question_id": "30693804-5",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub",
      "python.library.stdtypes#str.lower",
      "python.library.stdtypes#str.strip"
    ],
    "clean_cmd": "re.sub(, , text).lower().strip()",
    "canonical_cmd": "re.sub('[^\\\\sa-zA-Z0-9]', '', VAR_STR).lower().strip()"
  },
  {
    "nl": "remove all non-alphanumeric characters except space from a string `text` and lower it",
    "cmd": "re.sub('(?!\\\\s)[\\\\W_]', '', text).lower().strip()",
    "question_id": "30693804-72",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub",
      "python.library.stdtypes#str.lower",
      "python.library.stdtypes#str.strip"
    ],
    "clean_cmd": "re.sub(, , text).lower().strip()",
    "canonical_cmd": "re.sub('(?!\\\\s)[\\\\W_]', '', VAR_STR).lower().strip()"
  },
  {
    "nl": "access a tag called \"name\" in beautifulsoup `soup`",
    "cmd": "print(soup.find('name').string)",
    "question_id": "14435268-66",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.find"
    ],
    "clean_cmd": "print(soup.find().string)",
    "canonical_cmd": "print(VAR_STR.find('VAR_STR').string)"
  },
  {
    "nl": "Get the characters count in a file `filepath`",
    "cmd": "os.stat(filepath).st_size",
    "question_id": "2011048-15",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.stat"
    ],
    "clean_cmd": "os.stat(filepath).st_size",
    "canonical_cmd": "os.stat(VAR_STR).st_size"
  },
  {
    "nl": "count the occurrences of item \"a\" in list `l`",
    "cmd": "l.count('a')",
    "question_id": "2600191-99",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.count"
    ],
    "clean_cmd": "l.count()",
    "canonical_cmd": "VAR_STR.count('VAR_STR')"
  },
  {
    "nl": "count the occurrences of items in list `l`",
    "cmd": "Counter(l)",
    "question_id": "2600191-23",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.collections#collections.Counter"
    ],
    "clean_cmd": "Counter(l)",
    "canonical_cmd": "Counter(VAR_STR)"
  },
  {
    "nl": "count the occurrences of items in list `l`",
    "cmd": "[[x, l.count(x)] for x in set(l)]",
    "question_id": "2600191-53",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#set",
      "python.library.stdtypes#str.count"
    ],
    "clean_cmd": "[[x, l.count(x)]  x  set(l)]",
    "canonical_cmd": "[[x, VAR_STR.count(x)] for x in set(VAR_STR)]"
  },
  {
    "nl": "count the occurrences of items in list `l`",
    "cmd": "dict(((x, l.count(x)) for x in set(l)))",
    "question_id": "2600191-55",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict",
      "python.library.stdtypes#set",
      "python.library.stdtypes#str.count"
    ],
    "clean_cmd": "dict(((x, l.count(x))  x  set(l)))",
    "canonical_cmd": "dict((x, VAR_STR.count(x)) for x in set(VAR_STR))"
  },
  {
    "nl": "count the occurrences of item \"b\" in list `l`",
    "cmd": "l.count('b')",
    "question_id": "2600191-98",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.count"
    ],
    "clean_cmd": "l.count()",
    "canonical_cmd": "VAR_STR.count('VAR_STR')"
  },
  {
    "nl": "copy file `srcfile` to directory `dstdir`",
    "cmd": "shutil.copy(srcfile, dstdir)",
    "question_id": "12842997-15",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.shutil#shutil.copy"
    ],
    "clean_cmd": "shutil.copy(srcfile, dstdir)",
    "canonical_cmd": "shutil.copy(VAR_STR, VAR_STR)"
  },
  {
    "nl": "combine values from column 'b' and column 'a' of dataframe `df`  into column 'c' of datafram `df`",
    "cmd": "df['c'] = np.where(df['a'].isnull, df['b'], df['a'])",
    "question_id": "38152389-84",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.where"
    ],
    "clean_cmd": "df[] = np.where(df[].isnull, df[], df[])",
    "canonical_cmd": "VAR_STR['VAR_STR'] = np.where(VAR_STR['VAR_STR'].isnull, VAR_STR['VAR_STR'], VAR_STR['VAR_STR']\n    )"
  },
  {
    "nl": "Convert tuple `t` to list",
    "cmd": "list(t)",
    "question_id": "16296643-89",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list"
    ],
    "clean_cmd": "list(t)",
    "canonical_cmd": "list(VAR_STR)"
  },
  {
    "nl": "Convert list `t` to tuple",
    "cmd": "tuple(l)",
    "question_id": "16296643-89",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list"
    ],
    "clean_cmd": "tuple(l)",
    "canonical_cmd": "tuple(l)"
  },
  {
    "nl": "Convert tuple `level1` to list",
    "cmd": "level1 = map(list, level1)",
    "question_id": "16296643-20",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#map"
    ],
    "clean_cmd": "level1 = map(list, level1)",
    "canonical_cmd": "VAR_STR = map(list, VAR_STR)"
  },
  {
    "nl": "send the output of pprint object `dataobject` to file `logFile`",
    "cmd": "pprint.pprint(dataobject, logFile)",
    "question_id": "3880399-74",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "pprint.pprint(dataobject, logFile)",
    "canonical_cmd": "pprint.pprint(VAR_STR, VAR_STR)"
  },
  {
    "nl": "get index of rows in column 'BoolCol'",
    "cmd": "df.loc[df['BoolCol']]",
    "question_id": "21800169-34",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.loc"
    ],
    "clean_cmd": "df.loc[df[]]",
    "canonical_cmd": "df.loc[df['VAR_STR']]"
  },
  {
    "nl": "Create a list containing the indexes of rows where the value of column 'BoolCol' in dataframe `df` are equal to True",
    "cmd": "df.iloc[np.flatnonzero(df['BoolCol'])]",
    "question_id": "21800169-19",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.flatnonzero",
      "pandas.reference.api.pandas.dataframe.loc"
    ],
    "clean_cmd": "df.iloc[np.flatnonzero(df[])]",
    "canonical_cmd": "VAR_STR.iloc[np.flatnonzero(VAR_STR['VAR_STR'])]"
  },
  {
    "nl": "get list of indexes of rows where column 'BoolCol' values match True",
    "cmd": "df[df['BoolCol'] == True].index.tolist()",
    "question_id": "21800169-92",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.index.tolist"
    ],
    "clean_cmd": "df[df[] == True].index.tolist()",
    "canonical_cmd": "df[df['VAR_STR'] == True].index.tolist()"
  },
  {
    "nl": "get index of rows in dataframe `df` which column 'BoolCol' matches value True",
    "cmd": "df[df['BoolCol']].index.tolist()",
    "question_id": "21800169-41",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.index.tolist"
    ],
    "clean_cmd": "df[df[]].index.tolist()",
    "canonical_cmd": "VAR_STR[VAR_STR['VAR_STR']].index.tolist()"
  },
  {
    "nl": "change working directory to the directory `owd`",
    "cmd": "os.chdir(owd)",
    "question_id": "299446-100",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.chdir"
    ],
    "clean_cmd": "os.chdir(owd)",
    "canonical_cmd": "os.chdir(VAR_STR)"
  },
  {
    "nl": "remove a div with a id `main-content` using beautifulsoup",
    "cmd": "soup.find('div', id='main-content').decompose()",
    "question_id": "32063985-36",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.find"
    ],
    "clean_cmd": "soup.find(, id=).decompose()",
    "canonical_cmd": "soup.find('div', id='VAR_STR').decompose()"
  },
  {
    "nl": "join each element in array `a` with element at the same index in array `b` as a tuple",
    "cmd": "np.array([zip(x, y) for x, y in zip(a, b)])",
    "question_id": "17960441-40",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip",
      "numpy.reference.generated.numpy.array"
    ],
    "clean_cmd": "np.array([zip(x, y)  x, y  zip(a, b)])",
    "canonical_cmd": "np.array([zip(x, y) for x, y in zip(VAR_STR, VAR_STR)])"
  },
  {
    "nl": "zip two 2-d arrays `a` and `b`",
    "cmd": "np.array(zip(a.ravel(), b.ravel()), dtype='i4,i4').reshape(a.shape)",
    "question_id": "17960441-39",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip",
      "numpy.reference.generated.numpy.array",
      "numpy.reference.generated.numpy.ravel"
    ],
    "clean_cmd": "np.array(zip(a.ravel(), b.ravel()), dtype=).reshape(a.shape)",
    "canonical_cmd": "np.array(zip(VAR_STR.ravel(), VAR_STR.ravel()), dtype='i4,i4').reshape(VAR_STR.shape)"
  },
  {
    "nl": "Find last occurrence of character '}' in string \"abcd}def}\"",
    "cmd": "'abcd}def}'.rfind('}')",
    "question_id": "26443308-74",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.rfind"
    ],
    "clean_cmd": ".rfind()",
    "canonical_cmd": "\"\"\"VAR_STR\"\"\".rfind('VAR_STR')"
  },
  {
    "nl": "shuffle columns of an numpy array 'r'",
    "cmd": "np.random.shuffle(np.transpose(r))",
    "question_id": "20546419-7",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.random.generated.numpy.random.shuffle",
      "numpy.reference.generated.numpy.transpose"
    ],
    "clean_cmd": "np.random.shuffle(np.transpose(r))",
    "canonical_cmd": "np.random.shuffle(np.transpose(VAR_STR))"
  },
  {
    "nl": "determine number of files on a drive with python",
    "cmd": "os.statvfs('/').f_files - os.statvfs('/').f_ffree",
    "question_id": "574236-63",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.statvfs"
    ],
    "clean_cmd": "os.statvfs().f_files - os.statvfs().f_ffree",
    "canonical_cmd": "os.statvfs('/').f_files - os.statvfs('/').f_ffree"
  },
  {
    "nl": "how to get a single result from a SQLite query in python?",
    "cmd": "cursor.fetchone()[0]",
    "question_id": "7011291-98",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.sqlite3#sqlite3.Cursor.fetchone"
    ],
    "clean_cmd": "cursor.fetchone()[]",
    "canonical_cmd": "cursor.fetchone()[0]"
  },
  {
    "nl": "prepend the line '#test firstline\\n' to the contents of file 'infile' and save as the file 'outfile'",
    "cmd": "open('outfile', 'w').write('#test firstline\\n' + open('infile').read())",
    "question_id": "4454298-91",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.request#open"
    ],
    "clean_cmd": "open(, ).write( + open().read())",
    "canonical_cmd": "open('VAR_STR', 'w').write('VAR_STR' + open('VAR_STR').read())"
  },
  {
    "nl": "return `True` if string `foobarrrr` contains regex `ba[rzd]`",
    "cmd": "bool(re.search('ba[rzd]', 'foobarrrr'))",
    "question_id": "9012008-0",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.search",
      "python.library.functions#bool"
    ],
    "clean_cmd": "bool(re.search(, ))",
    "canonical_cmd": "bool(re.search('VAR_STR', 'VAR_STR'))"
  },
  {
    "nl": "Removing duplicates in list `t`",
    "cmd": "list(set(t))",
    "question_id": "7961363-85",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list",
      "python.library.stdtypes#set"
    ],
    "clean_cmd": "list(set(t))",
    "canonical_cmd": "list(set(VAR_STR))"
  },
  {
    "nl": "Removing duplicates in list `source_list`",
    "cmd": "list(set(source_list))",
    "question_id": "7961363-86",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list",
      "python.library.stdtypes#set"
    ],
    "clean_cmd": "list(set(source_list))",
    "canonical_cmd": "list(set(VAR_STR))"
  },
  {
    "nl": "Removing duplicates in list `abracadabra`",
    "cmd": "list(OrderedDict.fromkeys('abracadabra'))",
    "question_id": "7961363-7",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list",
      "python.library.stdtypes#dict.fromkeys"
    ],
    "clean_cmd": "list(OrderedDict.fromkeys())",
    "canonical_cmd": "list(OrderedDict.fromkeys('VAR_STR'))"
  },
  {
    "nl": "Convert array `a` into a list",
    "cmd": "numpy.array(a).reshape(-1).tolist()",
    "question_id": "5183533-81",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.array",
      "python.library.array#array.array.tolist",
      "numpy.reference.generated.numpy.reshape"
    ],
    "clean_cmd": "numpy.array(a).reshape(-).tolist()",
    "canonical_cmd": "numpy.array(VAR_STR).reshape(-1).tolist()"
  },
  {
    "nl": "Convert the first row of numpy matrix `a` to a list",
    "cmd": "numpy.array(a)[0].tolist()",
    "question_id": "5183533-11",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.array",
      "python.library.array#array.array.tolist"
    ],
    "clean_cmd": "numpy.array(a)[].tolist()",
    "canonical_cmd": "numpy.array(VAR_STR)[0].tolist()"
  },
  {
    "nl": "In `soup`, get the content of the sibling of the `td`  tag with text content `Address:`",
    "cmd": "print(soup.find(text='Address:').findNext('td').contents[0])",
    "question_id": "5999747-97",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.find"
    ],
    "clean_cmd": "print(soup.find(text=).findNext().contents[])",
    "canonical_cmd": "print(VAR_STR.find(text='VAR_STR').findNext('VAR_STR').contents[0])"
  },
  {
    "nl": "encode string 'data to be encoded'",
    "cmd": "encoded = base64.b64encode('data to be encoded')",
    "question_id": "8908287-54",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.base64#base64.b64encode"
    ],
    "clean_cmd": "encoded = base64.b64encode()",
    "canonical_cmd": "encoded = base64.b64encode('VAR_STR')"
  },
  {
    "nl": "encode a string `data to be encoded` to `ascii` encoding",
    "cmd": "encoded = 'data to be encoded'.encode('ascii')",
    "question_id": "8908287-40",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.encode"
    ],
    "clean_cmd": "encoded = .encode()",
    "canonical_cmd": "encoded = 'VAR_STR'.encode('VAR_STR')"
  },
  {
    "nl": "Get attribute `my_str` of object `my_object`",
    "cmd": "getattr(my_object, my_str)",
    "question_id": "9035479-73",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#getattr"
    ],
    "clean_cmd": "getattr(my_object, my_str)",
    "canonical_cmd": "getattr(VAR_STR, VAR_STR)"
  },
  {
    "nl": "replace all elements in array `A` that are not present in array `[1, 3, 4]` with zeros",
    "cmd": "np.where(np.in1d(A, [1, 3, 4]).reshape(A.shape), A, 0)",
    "question_id": "34945274-35",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.in1d",
      "numpy.reference.generated.numpy.where",
      "numpy.reference.generated.numpy.reshape"
    ],
    "clean_cmd": "np.where(np.in1d(A, [, , ]).reshape(A.shape), A, )",
    "canonical_cmd": "np.where(np.in1d(VAR_STR, [VAR_STR]).reshape(VAR_STR.shape), VAR_STR, 0)"
  },
  {
    "nl": "calculate mean across dimension in a 2d array `a`",
    "cmd": "np.mean(a, axis=1)",
    "question_id": "15819980-52",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.mean"
    ],
    "clean_cmd": "np.mean(a, axis=)",
    "canonical_cmd": "np.mean(VAR_STR, axis=1)"
  },
  {
    "nl": "add a header to a csv file",
    "cmd": "writer.writeheader()",
    "question_id": "15907200-35",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.csv#csv.DictWriter.writeheader"
    ],
    "clean_cmd": "writer.writeheader()",
    "canonical_cmd": "writer.writeheader()"
  },
  {
    "nl": "selecting rows in Numpy ndarray 'a', where the value in the first column is 0 and value in the second column is 1",
    "cmd": "a[np.where((a[:, (0)] == 0) * (a[:, (1)] == 1))]",
    "question_id": "23359886-48",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.where"
    ],
    "clean_cmd": "a[np.where((a[:, ()] == ) * (a[:, ()] == ))]",
    "canonical_cmd": "VAR_STR[np.where((VAR_STR[:, (0)] == 0) * (VAR_STR[:, (1)] == 1))]"
  },
  {
    "nl": "convert a pandas series `sf` into a pandas dataframe `df` with columns `email` and `list`",
    "cmd": "pd.DataFrame({'email': sf.index, 'list': sf.values})",
    "question_id": "26097916-29",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe"
    ],
    "clean_cmd": "pd.DataFrame({: sf.index, : sf.values})",
    "canonical_cmd": "pd.DataFrame({'VAR_STR': VAR_STR.index, 'VAR_STR': VAR_STR.values})"
  },
  {
    "nl": "resized image `image` to width, height of `(x, y)` with filter of `ANTIALIAS`",
    "cmd": "image = image.resize((x, y), Image.ANTIALIAS)",
    "question_id": "1405602-48",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.resize"
    ],
    "clean_cmd": "image = image.resize((x, y), Image.ANTIALIAS)",
    "canonical_cmd": "VAR_STR = VAR_STR.resize((VAR_STR), Image.VAR_STR)"
  },
  {
    "nl": "find 10 largest differences between each respective elements of list `l1` and list `l2`",
    "cmd": "heapq.nlargest(10, range(len(l1)), key=lambda i: abs(l1[i] - l2[i]))",
    "question_id": "9323159-38",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.heapq#heapq.nlargest",
      "python.library.functions#len",
      "python.library.functions#range",
      "python.library.functions#abs"
    ],
    "clean_cmd": "heapq.nlargest(, range(len(l1)), key= i: abs(l1[i] - l2[i]))",
    "canonical_cmd": "heapq.nlargest(10, range(len(VAR_STR)), key=lambda i: abs(VAR_STR[i] - VAR_STR[i]))"
  },
  {
    "nl": "BeautifulSoup find all 'span' elements in HTML string `soup` with class of 'starGryB sp'",
    "cmd": "soup.find_all('span', {'class': 'starGryB sp'})",
    "question_id": "29877663-54",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "soup.find_all(, {: })",
    "canonical_cmd": "VAR_STR.find_all('VAR_STR', {'class': 'VAR_STR'})"
  },
  {
    "nl": "Sort lis `list5` in ascending order based on the degrees value of its elements",
    "cmd": "sorted(list5, lambda x: (degree(x), x))",
    "question_id": "16193578-72",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "numpy.reference.generated.numpy.polynomial.hermite_e.hermitee.degree"
    ],
    "clean_cmd": "sorted(list5,  x: (degree(x), x))",
    "canonical_cmd": "sorted(VAR_STR, lambda x: (degree(x), x))"
  },
  {
    "nl": "How do I perform secondary sorting in python?",
    "cmd": "sorted(list5, key=lambda vertex: (degree(vertex), vertex))",
    "question_id": "16193578-58",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "numpy.reference.generated.numpy.polynomial.hermite_e.hermitee.degree"
    ],
    "clean_cmd": "sorted(list5, key= vertex: (degree(vertex), vertex))",
    "canonical_cmd": "sorted(list5, key=lambda vertex: (degree(vertex), vertex))"
  },
  {
    "nl": "get attribute 'attr' from object `obj`",
    "cmd": "getattr(obj, 'attr')",
    "question_id": "7373219-3",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#getattr"
    ],
    "clean_cmd": "getattr(obj, )",
    "canonical_cmd": "getattr(VAR_STR, 'VAR_STR')"
  },
  {
    "nl": "encode `Decimal('3.9')` to a JSON string",
    "cmd": "json.dumps(Decimal('3.9'))",
    "question_id": "1960516-75",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.json#json.dumps",
      "python.library.decimal#decimal.Decimal"
    ],
    "clean_cmd": "json.dumps(Decimal())",
    "canonical_cmd": "json.dumps(Decimal('3.9'))"
  },
  {
    "nl": "filter dataframe `df` by values in column `A` that appear more than once",
    "cmd": "df.groupby('A').filter(lambda x: len(x) > 1)",
    "question_id": "29836836-87",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.groupby",
      "python.library.functions#len",
      "python.library.functions#filter"
    ],
    "clean_cmd": "df.groupby().filter( x: len(x) &gt; )",
    "canonical_cmd": "VAR_STR.groupby('VAR_STR').filter(lambda x: len(x) > 1)"
  },
  {
    "nl": "add color bar with image `mappable` to plot `plt`",
    "cmd": "plt.colorbar(mappable=mappable, cax=ax3)",
    "question_id": "35420052-72",
    "cmd_name": "conala",
    "oracle_man": [
      "matplotlib.colorbar_api#matplotlib.colorbar.Colorbar"
    ],
    "clean_cmd": "plt.colorbar(mappable=mappable, cax=ax3)",
    "canonical_cmd": "VAR_STR.colorbar(VAR_STR=VAR_STR, cax=ax3)"
  },
  {
    "nl": "count most frequent 100 words in column 'text' of dataframe `df`",
    "cmd": "Counter(' '.join(df['text']).split()).most_common(100)",
    "question_id": "29903025-37",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.collections#collections.Counter.most_common",
      "python.library.collections#collections.Counter",
      "python.library.stdtypes#str.join",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "Counter(.join(df[]).split()).most_common()",
    "canonical_cmd": "Counter(' '.join(VAR_STR['VAR_STR']).split()).most_common(100)"
  },
  {
    "nl": "get a value of datetime.today() in the UTC time zone",
    "cmd": "datetime.now(pytz.utc)",
    "question_id": "4530069-65",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.datetime#datetime.datetime.now"
    ],
    "clean_cmd": "datetime.now(pytz.utc)",
    "canonical_cmd": "datetime.now(pytz.utc)"
  },
  {
    "nl": "Django response with JSON `data`",
    "cmd": "return HttpResponse(data, mimetype='application/json')",
    "question_id": "9262278-9",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.http.client#http.client.HTTPResponse"
    ],
    "clean_cmd": " HttpResponse(data, mimetype=)",
    "canonical_cmd": "return HttpResponse(VAR_STR, mimetype='application/json')"
  },
  {
    "nl": "Open a background process 'background-process' with arguments 'arguments'",
    "cmd": "subprocess.Popen(['background-process', 'arguments'])",
    "question_id": "3595685-42",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.subprocess#subprocess.Popen"
    ],
    "clean_cmd": "subprocess.Popen([, ])",
    "canonical_cmd": "subprocess.Popen(['VAR_STR', 'VAR_STR'])"
  },
  {
    "nl": "finding the index of elements containing substring 'how' and 'what' in a list of strings 'myList'.",
    "cmd": "[i for i, j in enumerate(myList) if 'how' in j.lower() or 'what' in j.lower()]",
    "question_id": "30729735-74",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#enumerate",
      "python.library.stdtypes#str.lower"
    ],
    "clean_cmd": "[i  i, j  enumerate(myList)    j.lower()    j.lower()]",
    "canonical_cmd": "[i for i, j in enumerate(VAR_STR) if 'VAR_STR' in j.lower() or 'VAR_STR' in j.lower()\n    ]"
  },
  {
    "nl": "unpack a series of tuples in pandas into a DataFrame with column names 'out-1' and 'out-2'",
    "cmd": "pd.DataFrame(out.tolist(), columns=['out-1', 'out-2'], index=out.index)",
    "question_id": "22799300-26",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe",
      "pandas.reference.api.pandas.index.tolist"
    ],
    "clean_cmd": "pd.DataFrame(out.tolist(), columns=[, ], index=out.index)",
    "canonical_cmd": "pd.DataFrame(out.tolist(), columns=['VAR_STR', 'VAR_STR'], index=out.index)"
  },
  {
    "nl": "rotate the xtick labels of matplotlib plot `ax` by `45` degrees to make long labels readable",
    "cmd": "ax.set_xticklabels(labels, rotation=45)",
    "question_id": "3464359-19",
    "cmd_name": "conala",
    "oracle_man": [
      "matplotlib._as_gen.matplotlib.axes.axes.set_xticklabels"
    ],
    "clean_cmd": "ax.set_xticklabels(labels, rotation=)",
    "canonical_cmd": "VAR_STR.set_xticklabels(labels, rotation=45)"
  },
  {
    "nl": "get number of keys in dictionary `yourdict`",
    "cmd": "len(list(yourdict.keys()))",
    "question_id": "2212433-44",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len",
      "python.library.functions#list",
      "python.library.stdtypes#dict.keys"
    ],
    "clean_cmd": "len(list(yourdict.keys()))",
    "canonical_cmd": "len(list(VAR_STR.keys()))"
  },
  {
    "nl": "count the number of keys in dictionary `yourdictfile`",
    "cmd": "len(set(open(yourdictfile).read().split()))",
    "question_id": "2212433-64",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len",
      "python.library.urllib.request#open",
      "python.library.stdtypes#set",
      "python.library.os#os.read",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "len(set(open(yourdictfile).read().split()))",
    "canonical_cmd": "len(set(open(VAR_STR).read().split()))"
  },
  {
    "nl": "split a list in first column  into multiple columns keeping other columns as well in pandas data frame",
    "cmd": "pd.concat([df[0].apply(pd.Series), df[1]], axis=1)",
    "question_id": "40924332-83",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.concat",
      "pandas.reference.api.pandas.series.apply"
    ],
    "clean_cmd": "pd.concat([df[].apply(pd.Series), df[]], axis=)",
    "canonical_cmd": "pd.concat([df[0].apply(pd.Series), df[1]], axis=1)"
  },
  {
    "nl": "Call a subprocess with arguments `c:\\\\Program Files\\\\VMware\\\\VMware Server\\\\vmware-cmd.bat` that may contain spaces",
    "cmd": "subprocess.Popen(['c:\\\\Program Files\\\\VMware\\\\VMware Server\\\\vmware-cmd.bat'])",
    "question_id": "804995-74",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.subprocess#subprocess.Popen"
    ],
    "clean_cmd": "subprocess.Popen([])",
    "canonical_cmd": "subprocess.Popen(['VAR_STR'])"
  },
  {
    "nl": "reverse a priority queue `q` in python without using classes",
    "cmd": "q.put((-n, n))",
    "question_id": "26441253-42",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.put"
    ],
    "clean_cmd": "q.put((-n, n))",
    "canonical_cmd": "VAR_STR.put((-n, n))"
  },
  {
    "nl": "numpy concatenate two arrays `a` and `b` along the first axis",
    "cmd": "print(concatenate((a, b), axis=0))",
    "question_id": "21887754-31",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.concatenate"
    ],
    "clean_cmd": "print(concatenate((a, b), axis=))",
    "canonical_cmd": "print(concatenate((VAR_STR, VAR_STR), axis=0))"
  },
  {
    "nl": "numpy concatenate two arrays `a` and `b` along the second axis",
    "cmd": "print(concatenate((a, b), axis=1))",
    "question_id": "21887754-88",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.concatenate"
    ],
    "clean_cmd": "print(concatenate((a, b), axis=))",
    "canonical_cmd": "print(concatenate((VAR_STR, VAR_STR), axis=1))"
  },
  {
    "nl": "numpy concatenate two arrays `a` and `b` along the first axis",
    "cmd": "c = np.r_[(a[None, :], b[None, :])]",
    "question_id": "21887754-21",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "c = np.r_[(a[None, :], b[None, :])]",
    "canonical_cmd": "c = np.r_[VAR_STR[(None), :], VAR_STR[(None), :]]"
  },
  {
    "nl": "numpy concatenate two arrays `a` and `b` along the first axis",
    "cmd": "np.array((a, b))",
    "question_id": "21887754-64",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.array"
    ],
    "clean_cmd": "np.array((a, b))",
    "canonical_cmd": "np.array((VAR_STR, VAR_STR))"
  },
  {
    "nl": "return a 401 unauthorized in django",
    "cmd": "return HttpResponse('Unauthorized', status=401)",
    "question_id": "4356842-88",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.http.client#http.client.HTTPResponse"
    ],
    "clean_cmd": " HttpResponse(, status=)",
    "canonical_cmd": "return HttpResponse('Unauthorized', status=401)"
  },
  {
    "nl": "Get the integer location of a key `bob` in a pandas data frame",
    "cmd": "df.index.get_loc('bob')",
    "question_id": "31793195-78",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.index.get_loc"
    ],
    "clean_cmd": "df.index.get_loc()",
    "canonical_cmd": "df.index.get_loc('VAR_STR')"
  },
  {
    "nl": "converting two lists `[1, 2, 3]` and `[4, 5, 6]` into a matrix",
    "cmd": "np.column_stack(([1, 2, 3], [4, 5, 6]))",
    "question_id": "18730044-52",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.column_stack"
    ],
    "clean_cmd": "np.column_stack(([, , ], [, , ]))",
    "canonical_cmd": "np.column_stack(([VAR_STR], [VAR_STR]))"
  },
  {
    "nl": "delete all occurrences of character 'i' in string 'it is icy'",
    "cmd": "\"\"\"\"\"\".join([char for char in 'it is icy' if char != 'i'])",
    "question_id": "22187233-98",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.char.join"
    ],
    "clean_cmd": ".join([char  char    char != ])",
    "canonical_cmd": "\"\"\"\"\"\".join([char for char in 'VAR_STR' if char != 'VAR_STR'])"
  },
  {
    "nl": "delete all instances of a character 'i' in a string 'it is icy'",
    "cmd": "re.sub('i', '', 'it is icy')",
    "question_id": "22187233-48",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "re.sub(, , )",
    "canonical_cmd": "re.sub('VAR_STR', '', 'VAR_STR')"
  },
  {
    "nl": "delete all characters \"i\" in string \"it is icy\"",
    "cmd": "\"\"\"it is icy\"\"\".replace('i', '')",
    "question_id": "22187233-75",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.replace"
    ],
    "clean_cmd": ".replace(, )",
    "canonical_cmd": "\"\"\"VAR_STR\"\"\".replace('VAR_STR', '')"
  },
  {
    "nl": "How to delete all instances of a character in a string in python?",
    "cmd": "\"\"\"\"\"\".join([char for char in 'it is icy' if char != 'i'])",
    "question_id": "22187233-63",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.char.join"
    ],
    "clean_cmd": ".join([char  char    char != ])",
    "canonical_cmd": "\"\"\"\"\"\".join([char for char in 'it is icy' if char != 'i'])"
  },
  {
    "nl": "converting list of strings `intstringlist` to list of integer `nums`",
    "cmd": "nums = [int(x) for x in intstringlist]",
    "question_id": "10351772-57",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#int"
    ],
    "clean_cmd": "nums = [int(x)  x  intstringlist]",
    "canonical_cmd": "VAR_STR = [int(x) for x in VAR_STR]"
  },
  {
    "nl": "convert list of string numbers into list of integers",
    "cmd": "map(int, eval(input('Enter the unfriendly numbers: ')))",
    "question_id": "10351772-10",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#eval",
      "python.library.functions#input",
      "python.library.functions#map"
    ],
    "clean_cmd": "map(int, eval(input()))",
    "canonical_cmd": "map(int, eval(input('Enter the unfriendly numbers: ')))"
  },
  {
    "nl": "Find all files in directory \"/mydir\" with extension \".txt\"",
    "cmd": "os.chdir('/mydir')\nfor file in glob.glob('*.txt'):\n    pass",
    "question_id": "3964681-43",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.chdir"
    ],
    "clean_cmd": "os.chdir() file  glob.glob():",
    "canonical_cmd": "os.chdir('VAR_STR')\nfor file in glob.glob('*.txt'):\n    pass"
  },
  {
    "nl": "Find all files in directory \"/mydir\" with extension \".txt\"",
    "cmd": "for file in os.listdir('/mydir'):\n    if file.endswith('.txt'):\n        pass",
    "question_id": "3964681-18",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.listdir",
      "python.library.stdtypes#str.endswith"
    ],
    "clean_cmd": " file  os.listdir(): file.endswith():",
    "canonical_cmd": "for file in os.listdir('VAR_STR'):\n    if file.endswith('VAR_STR'):\n        pass"
  },
  {
    "nl": "Find all files in directory \"/mydir\" with extension \".txt\"",
    "cmd": "for (root, dirs, files) in os.walk('/mydir'):\n    for file in files:\n        if file.endswith('.txt'):\n            pass",
    "question_id": "3964681-62",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.walk",
      "python.library.stdtypes#str.endswith"
    ],
    "clean_cmd": " (root, dirs, files)  os.walk(): file  files: file.endswith():",
    "canonical_cmd": "for root, dirs, files in os.walk('VAR_STR'):\n    for file in files:\n        if file.endswith('VAR_STR'):\n            pass"
  },
  {
    "nl": "decode json string `request.body` to python dict",
    "cmd": "json.loads(request.body)",
    "question_id": "18979111-77",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.json#json.loads"
    ],
    "clean_cmd": "json.loads(request.body)",
    "canonical_cmd": "json.loads(request.body)"
  },
  {
    "nl": "How to sort a Dataframe by the ocurrences in a column in Python (pandas)",
    "cmd": "df.groupby('prots').sum().sort('scores', ascending=False)",
    "question_id": "36402748-46",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.groupby",
      "python.library.functions#sum",
      "pandas.reference.api.pandas.index.sort"
    ],
    "clean_cmd": "df.groupby().sum().sort(, ascending=False)",
    "canonical_cmd": "df.groupby('prots').sum().sort('scores', ascending=False)"
  },
  {
    "nl": "get json data from restful service 'url'",
    "cmd": "json.load(urllib.request.urlopen('url'))",
    "question_id": "7750557-47",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.request#urllib.request.urlopen",
      "python.library.json#json.load"
    ],
    "clean_cmd": "json.load(urllib.request.urlopen())",
    "canonical_cmd": "json.load(urllib.request.urlopen('VAR_STR'))"
  },
  {
    "nl": "django filter by hour",
    "cmd": "Entry.objects.filter(pub_date__contains='08:00')",
    "question_id": "2984751-50",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.logging#logging.Filter.filter"
    ],
    "clean_cmd": "Entry.objects.filter(pub_date__contains=)",
    "canonical_cmd": "Entry.objects.filter(pub_date__contains='08:00')"
  },
  {
    "nl": "sort a list of dictionary `list` first by key `points` and then by `time`",
    "cmd": "list.sort(key=lambda item: (item['points'], item['time']))",
    "question_id": "5944630-52",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#list.sort"
    ],
    "clean_cmd": "list.sort(key= item: (item[], item[]))",
    "canonical_cmd": "VAR_STR.sort(key=lambda item: (item['VAR_STR'], item['VAR_STR']))"
  },
  {
    "nl": "Convert integer `number` into an unassigned integer",
    "cmd": "struct.unpack('H', struct.pack('h', number))",
    "question_id": "19546911-36",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.struct#struct.unpack",
      "python.library.struct#struct.pack"
    ],
    "clean_cmd": "struct.unpack(, struct.pack(, number))",
    "canonical_cmd": "struct.unpack('H', struct.pack('h', VAR_STR))"
  },
  {
    "nl": "write dataframe `df`, excluding index, to a csv file",
    "cmd": "df.to_csv(filename, index=False)",
    "question_id": "20107570-40",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.to_csv"
    ],
    "clean_cmd": "df.to_csv(filename, index=False)",
    "canonical_cmd": "VAR_STR.to_csv(filename, index=False)"
  },
  {
    "nl": "convert a urllib unquoted string `unescaped` to a json data `json_data`",
    "cmd": "json_data = json.loads(unescaped)",
    "question_id": "8740353-74",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.json#json.loads"
    ],
    "clean_cmd": "json_data = json.loads(unescaped)",
    "canonical_cmd": "VAR_STR = json.loads(VAR_STR)"
  },
  {
    "nl": "write `newFileBytes` to a binary file `newFile`",
    "cmd": "newFile.write(struct.pack('5B', *newFileBytes))",
    "question_id": "18367007-15",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.struct#struct.pack",
      "python.library.os#os.write"
    ],
    "clean_cmd": "newFile.write(struct.pack(, *newFileBytes))",
    "canonical_cmd": "VAR_STR.write(struct.pack('5B', *VAR_STR))"
  },
  {
    "nl": "get the last key of dictionary `dict`",
    "cmd": "list(dict.keys())[-1]",
    "question_id": "16125229-76",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict.keys",
      "python.library.functions#list"
    ],
    "clean_cmd": "list(dict.keys())[-]",
    "canonical_cmd": "list(VAR_STR.keys())[-1]"
  },
  {
    "nl": "Django get maximum value associated with field 'added' in model `AuthorizedEmail`",
    "cmd": "AuthorizedEmail.objects.filter(group=group).order_by('-added')[0]",
    "question_id": "10668585-75",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.logging#logging.Filter.filter"
    ],
    "clean_cmd": "AuthorizedEmail.objects.filter(group=group).order_by()[]",
    "canonical_cmd": "VAR_STR.objects.filter(group=group).order_by('-added')[0]"
  },
  {
    "nl": "create a datetime with the current date & time",
    "cmd": "datetime.datetime.now()",
    "question_id": "10607688-46",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.datetime#datetime.datetime.now"
    ],
    "clean_cmd": "datetime.datetime.now()",
    "canonical_cmd": "datetime.datetime.now()"
  },
  {
    "nl": "get the highest element in absolute value in a numpy matrix `x`",
    "cmd": "max(x.min(), x.max(), key=abs)",
    "question_id": "17794266-82",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#max"
    ],
    "clean_cmd": "max(x.min(), x.max(), key=abs)",
    "canonical_cmd": "max(VAR_STR.min(), VAR_STR.max(), key=abs)"
  }
]