[
  {
    "nl": "Create list `instancelist` containing 29 objects of type MyClass",
    "cmd": "instancelist = [MyClass() for i in range(29)]",
    "question_id": "348196-52",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#range"
    ],
    "clean_cmd": "instancelist = [MyClass()  i  range()]",
    "canonical_cmd": "VAR_STR = [MyClass() for i in range(29)]"
  },
  {
    "nl": "Taking the results of a bash command \"awk '{print $10, $11}' test.txt > test2.txt\"",
    "cmd": "os.system(\"awk '{print $10, $11}' test.txt > test2.txt\")",
    "question_id": "5744980-30",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.system"
    ],
    "clean_cmd": "os.system()",
    "canonical_cmd": "os.system('VAR_STR')"
  },
  {
    "nl": "selenium wait for driver `driver` 60 seconds before throwing a NoSuchElementExceptions exception",
    "cmd": "driver.implicitly_wait(60)",
    "question_id": "16739319-24",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "driver.implicitly_wait()",
    "canonical_cmd": "VAR_STR.implicitly_wait(60)"
  },
  {
    "nl": "selenium webdriver switch to frame 'frameName'",
    "cmd": "driver.switch_to_frame('frameName')",
    "question_id": "16739319-56",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "driver.switch_to_frame()",
    "canonical_cmd": "driver.switch_to_frame('VAR_STR')"
  },
  {
    "nl": "Save plot `plt` as png file 'filename.png'",
    "cmd": "plt.savefig('filename.png')",
    "question_id": "39870642-93",
    "cmd_name": "conala",
    "oracle_man": [
      "matplotlib.figure_api#matplotlib.figure.Figure.savefig"
    ],
    "clean_cmd": "plt.savefig()",
    "canonical_cmd": "VAR_STR.savefig('VAR_STR')"
  },
  {
    "nl": "Save matplotlib graph to image file `filename.png` at a resolution of `300 dpi`",
    "cmd": "plt.savefig('filename.png', dpi=300)",
    "question_id": "39870642-22",
    "cmd_name": "conala",
    "oracle_man": [
      "matplotlib.figure_api#matplotlib.figure.Figure.savefig"
    ],
    "clean_cmd": "plt.savefig(, dpi=)",
    "canonical_cmd": "plt.savefig('VAR_STR', dpi=300)"
  },
  {
    "nl": "search for regex pattern 'Test(.*)print' in string `testStr` including new line character '\\n'",
    "cmd": "re.search('Test(.*)print', testStr, re.DOTALL)",
    "question_id": "20062565-45",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.search"
    ],
    "clean_cmd": "re.search(, testStr, re.DOTALL)",
    "canonical_cmd": "re.search('VAR_STR', VAR_STR, re.DOTALL)"
  },
  {
    "nl": "Enclose numbers in quotes in a string `This is number 1 and this is number 22`",
    "cmd": "re.sub('(\\\\d+)', '\"\\\\1\"', 'This is number 1 and this is number 22')",
    "question_id": "42364992-94",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "re.sub(, , )",
    "canonical_cmd": "re.sub('(\\\\d+)', '\"\\\\1\"', 'VAR_STR')"
  },
  {
    "nl": "Importing file `file` from folder '/path/to/application/app/folder'",
    "cmd": "sys.path.insert(0, '/path/to/application/app/folder')\nimport file",
    "question_id": "4383571-75",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.insert"
    ],
    "clean_cmd": "sys.path.insert(, )import file",
    "canonical_cmd": "sys.path.insert(0, 'VAR_STR')\nimport VAR_STR"
  },
  {
    "nl": "append the sum of each tuple pair in the grouped list `list1` and list `list2` elements to list `list3`",
    "cmd": "list3 = [(a + b) for a, b in zip(list1, list2)]",
    "question_id": "11703064-52",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip"
    ],
    "clean_cmd": "list3 = [(a + b)  a, b  zip(list1, list2)]",
    "canonical_cmd": "VAR_STR = [(a + b) for a, b in zip(VAR_STR, VAR_STR)]"
  },
  {
    "nl": "remove frame of legend in plot `plt`",
    "cmd": "plt.legend(frameon=False)",
    "question_id": "25540259-67",
    "cmd_name": "conala",
    "oracle_man": [
      "matplotlib.legend_api#matplotlib.legend.Legend"
    ],
    "clean_cmd": "plt.legend(frameon=False)",
    "canonical_cmd": "VAR_STR.legend(frameon=False)"
  },
  {
    "nl": "remove the punctuation '!', '.', ':' from a string `asking`",
    "cmd": "out = ''.join(c for c in asking if c not in ('!', '.', ':'))",
    "question_id": "16050952-62",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": "out = .join(c  c  asking  c   (, , ))",
    "canonical_cmd": "out = ''.join(c for c in VAR_STR if c not in ('VAR_STR', 'VAR_STR', 'VAR_STR'))"
  },
  {
    "nl": "use a list of values `[3,6]` to select rows from a pandas dataframe `df`'s column 'A'",
    "cmd": "df[df['A'].isin([3, 6])]",
    "question_id": "12096252-61",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.isin"
    ],
    "clean_cmd": "df[df[].isin([, ])]",
    "canonical_cmd": "VAR_STR[VAR_STR['A'].isin([3, 6])]"
  },
  {
    "nl": "Parse a file `sample.xml` using expat parsing in python 3",
    "cmd": "parser.ParseFile(open('sample.xml', 'rb'))",
    "question_id": "1179305-61",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.request#open"
    ],
    "clean_cmd": "parser.ParseFile(open(, ))",
    "canonical_cmd": "parser.ParseFile(open('VAR_STR', 'rb'))"
  },
  {
    "nl": "match zero-or-more instances of lower case alphabet characters in a string `f233op `",
    "cmd": "re.findall('([a-z]*)', 'f233op')",
    "question_id": "22229255-65",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "re.findall(, )",
    "canonical_cmd": "re.findall('([a-z]*)', 'VAR_STR')"
  },
  {
    "nl": "match zero-or-more instances of lower case alphabet characters in a string `f233op `",
    "cmd": "re.findall('([a-z])*', 'f233op')",
    "question_id": "22229255-56",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "re.findall(, )",
    "canonical_cmd": "re.findall('([a-z])*', 'VAR_STR')"
  },
  {
    "nl": "Convert hex string \"deadbeef\" to integer",
    "cmd": "int('deadbeef', 16)",
    "question_id": "209513-92",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#int"
    ],
    "clean_cmd": "int(, )",
    "canonical_cmd": "int('VAR_STR', 16)"
  },
  {
    "nl": "Convert hex string \"a\" to integer",
    "cmd": "int('a', 16)",
    "question_id": "209513-93",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#int"
    ],
    "clean_cmd": "int(, )",
    "canonical_cmd": "int('VAR_STR', 16)"
  },
  {
    "nl": "Convert hex string \"0xa\" to integer",
    "cmd": "int('0xa', 16)",
    "question_id": "209513-94",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#int"
    ],
    "clean_cmd": "int(, )",
    "canonical_cmd": "int('VAR_STR', 16)"
  },
  {
    "nl": "Convert hex string `s` to integer",
    "cmd": "int(s, 16)",
    "question_id": "209513-47",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#int"
    ],
    "clean_cmd": "int(s, )",
    "canonical_cmd": "int(VAR_STR, 16)"
  },
  {
    "nl": "Convert hex string `hexString` to int",
    "cmd": "int(hexString, 16)",
    "question_id": "209513-79",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#int"
    ],
    "clean_cmd": "int(hexString, )",
    "canonical_cmd": "int(VAR_STR, 16)"
  },
  {
    "nl": "empty a list `lst`",
    "cmd": "del lst[:]",
    "question_id": "1400608-13",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": " lst[:]",
    "canonical_cmd": "del VAR_STR[:]"
  },
  {
    "nl": "empty a list `lst`",
    "cmd": "del lst1[:]",
    "question_id": "1400608-91",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": " lst1[:]",
    "canonical_cmd": "del lst1[:]"
  },
  {
    "nl": "empty a list `lst`",
    "cmd": "lst[:] = []",
    "question_id": "1400608-86",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "lst[:] = []",
    "canonical_cmd": "VAR_STR[:] = []"
  },
  {
    "nl": "empty a list `alist`",
    "cmd": "alist[:] = []",
    "question_id": "1400608-67",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "alist[:] = []",
    "canonical_cmd": "VAR_STR[:] = []"
  },
  {
    "nl": "encode unicode string '\\xc5\\xc4\\xd6' to utf-8 code",
    "cmd": "print('\\xc5\\xc4\\xd6'.encode('UTF8'))",
    "question_id": "15740236-74",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.encode"
    ],
    "clean_cmd": "print(.encode())",
    "canonical_cmd": "print('VAR_STR'.encode('UTF8'))"
  },
  {
    "nl": "solve for the least squares' solution of matrices `a` and `b`",
    "cmd": "np.linalg.solve(np.dot(a.T, a), np.dot(a.T, b))",
    "question_id": "41648246-16",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.dot",
      "numpy.reference.generated.numpy.linalg.solve"
    ],
    "clean_cmd": "np.linalg.solve(np.dot(a.T, a), np.dot(a.T, b))",
    "canonical_cmd": "np.linalg.solve(np.dot(VAR_STR.T, VAR_STR), np.dot(VAR_STR.T, VAR_STR))"
  },
  {
    "nl": "create a file 'filename' with each tuple in the list `mylist` written to a line",
    "cmd": "open('filename', 'w').write('\\n'.join('%s %s' % x for x in mylist))",
    "question_id": "3820312-13",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.request#open",
      "python.library.stdtypes#str.join",
      "python.library.os#os.write"
    ],
    "clean_cmd": "open(, ).write(.join( % x  x  mylist))",
    "canonical_cmd": "open('VAR_STR', 'w').write('\\n'.join('%s %s' % x for x in VAR_STR))"
  },
  {
    "nl": "print numbers in list `list` with precision of 3 decimal places",
    "cmd": "print('[%s]' % ', '.join('%.3f' % val for val in list))",
    "question_id": "7351270-100",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": "print( % .join( % val  val  list))",
    "canonical_cmd": "print('[%s]' % ', '.join('%.3f' % val for val in VAR_STR))"
  },
  {
    "nl": "format print output of list of floats `l` to print only up to 3 decimal points",
    "cmd": "print('[' + ', '.join('%5.3f' % v for v in l) + ']')",
    "question_id": "7351270-60",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": "print( + .join( % v  v  l) + )",
    "canonical_cmd": "print('[' + ', '.join('%5.3f' % v for v in VAR_STR) + ']')"
  },
  {
    "nl": "print a list of floating numbers `l` using string formatting",
    "cmd": "print([('%5.3f' % val) for val in l])",
    "question_id": "7351270-62",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "print([( % val)  val  l])",
    "canonical_cmd": "print([('%5.3f' % val) for val in VAR_STR])"
  },
  {
    "nl": "delete letters from string '12454v'",
    "cmd": "\"\"\"\"\"\".join(filter(str.isdigit, '12454v'))",
    "question_id": "14750675-12",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#filter",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": ".join(filter(str.isdigit, ))",
    "canonical_cmd": "\"\"\"\"\"\".join(filter(str.isdigit, 'VAR_STR'))"
  },
  {
    "nl": "Get a md5 hash from string `thecakeisalie`",
    "cmd": "k = hashlib.md5('thecakeisalie').hexdigest()",
    "question_id": "4508155-69",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.hashlib#hashlib.hash.hexdigest"
    ],
    "clean_cmd": "k = hashlib.md5().hexdigest()",
    "canonical_cmd": "k = hashlib.md5('VAR_STR').hexdigest()"
  },
  {
    "nl": "replace string 'in.' with ' in. ' in dataframe `df` column 'a'",
    "cmd": "df['a'] = df['a'].str.replace('in.', ' in. ')",
    "question_id": "36296993-71",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.replace"
    ],
    "clean_cmd": "df[] = df[].str.replace(, )",
    "canonical_cmd": "VAR_STR['VAR_STR'] = VAR_STR['VAR_STR'].str.replace('VAR_STR', ' in. ')"
  },
  {
    "nl": "convert string `x'  to dictionary splitted by `=` using list comprehension",
    "cmd": "dict([x.split('=') for x in s.split()])",
    "question_id": "1246444-16",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "dict([x.split()  x  s.split()])",
    "canonical_cmd": "dict([x.split('=') for x in s.split()])"
  },
  {
    "nl": "add a column 'new_col' to dataframe `df` for index in range",
    "cmd": "df['new_col'] = list(range(1, len(df) + 1))",
    "question_id": "12168648-98",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len",
      "python.library.functions#range",
      "python.library.functions#list"
    ],
    "clean_cmd": "df[] = list(range(, len(df) + ))",
    "canonical_cmd": "VAR_STR['VAR_STR'] = list(range(1, len(VAR_STR) + 1))"
  },
  {
    "nl": "apply logical operator 'AND' to all elements in list `a_list`",
    "cmd": "all(a_list)",
    "question_id": "1790520-32",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#all"
    ],
    "clean_cmd": "all(a_list)",
    "canonical_cmd": "all(VAR_STR)"
  },
  {
    "nl": "get a list of booleans `z` that shows wether the corresponding items in list `x` and `y` are equal",
    "cmd": "z = [(i == j) for i, j in zip(x, y)]",
    "question_id": "32996293-30",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip"
    ],
    "clean_cmd": "z = [(i == j)  i, j  zip(x, y)]",
    "canonical_cmd": "VAR_STR = [(i == j) for i, j in zip(VAR_STR, VAR_STR)]"
  },
  {
    "nl": "create a list which indicates whether each element in `x` and `y` is identical",
    "cmd": "[(x[i] == y[i]) for i in range(len(x))]",
    "question_id": "32996293-89",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len",
      "python.library.functions#range"
    ],
    "clean_cmd": "[(x[i] == y[i])  i  range(len(x))]",
    "canonical_cmd": "[(VAR_STR[i] == VAR_STR[i]) for i in range(len(VAR_STR))]"
  },
  {
    "nl": "convert currency string `dollars` to decimal `cents_int`",
    "cmd": "cents_int = int(round(float(dollars.strip('$')) * 100))",
    "question_id": "3887469-9",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#float",
      "python.library.functions#int",
      "python.library.functions#round",
      "python.library.stdtypes#str.strip"
    ],
    "clean_cmd": "cents_int = int(round(float(dollars.strip()) * ))",
    "canonical_cmd": "VAR_STR = int(round(float(VAR_STR.strip('$')) * 100))"
  },
  {
    "nl": "sort list `users` using values associated with key 'id' according to elements in list `order`",
    "cmd": "users.sort(key=lambda x: order.index(x['id']))",
    "question_id": "17734779-42",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.index.sort"
    ],
    "clean_cmd": "users.sort(key= x: order.index(x[]))",
    "canonical_cmd": "VAR_STR.sort(key=lambda x: VAR_STR.index(x['VAR_STR']))"
  },
  {
    "nl": "sort a python list of dictionaries `users` by a given list `order` of ids 'id' with the desired order",
    "cmd": "users.sort(key=lambda x: order.index(x['id']))",
    "question_id": "17734779-50",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.index.sort"
    ],
    "clean_cmd": "users.sort(key= x: order.index(x[]))",
    "canonical_cmd": "VAR_STR.sort(key=lambda x: VAR_STR.index(x['VAR_STR']))"
  },
  {
    "nl": "Get all indexes of a letter `e` from a string `word`",
    "cmd": "[index for index, letter in enumerate(word) if letter == 'e']",
    "question_id": "7658932-91",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#enumerate"
    ],
    "clean_cmd": "[index  index, letter  enumerate(word)  letter == ]",
    "canonical_cmd": "[index for index, letter in enumerate(VAR_STR) if letter == 'VAR_STR']"
  },
  {
    "nl": "format parameters 'b' and 'a' into plcaeholders in string \"{0}\\\\w{{2}}b{1}\\\\w{{2}}quarter\"",
    "cmd": "\"\"\"{0}\\\\w{{2}}b{1}\\\\w{{2}}quarter\"\"\".format('b', 'a')",
    "question_id": "18609153-66",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#format"
    ],
    "clean_cmd": ".format(, )",
    "canonical_cmd": "\"\"\"VAR_STR\"\"\".format('VAR_STR', 'VAR_STR')"
  },
  {
    "nl": "get line count of file 'myfile.txt'",
    "cmd": "sum((1 for line in open('myfile.txt')))",
    "question_id": "845058-51",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sum",
      "python.library.urllib.request#open"
    ],
    "clean_cmd": "sum((  line  open()))",
    "canonical_cmd": "sum(1 for line in open('VAR_STR'))"
  },
  {
    "nl": "get line count of file `filename`",
    "cmd": "def bufcount(filename):\n    f = open(filename)\n    lines = 0\n    buf_size = (1024 * 1024)\n    read_f = f.read\n    buf = read_f(buf_size)\n    while buf:\n        lines += buf.count('\\n')\n        buf = read_f(buf_size)\n    return lines",
    "question_id": "845058-70",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.request#open"
    ],
    "clean_cmd": " bufcount(filename):f = open(filename)lines = buf_size = ( * )read_f = f.readbuf = read_f(buf_size) buf:lines += buf.count()buf = read_f(buf_size) lines",
    "canonical_cmd": "def bufcount(VAR_STR):\n    f = open(VAR_STR)\n    lines = 0\n    buf_size = 1024 * 1024\n    read_f = f.read\n    buf = read_f(buf_size)\n    while buf:\n        lines += buf.count('\\n')\n        buf = read_f(buf_size)\n    return lines"
  },
  {
    "nl": "Determine the byte length of a utf-8 encoded string `s`",
    "cmd": "return len(s.encode('utf-8'))",
    "question_id": "6714826-24",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len",
      "python.library.stdtypes#str.encode"
    ],
    "clean_cmd": " len(s.encode())",
    "canonical_cmd": "return len(VAR_STR.encode('utf-8'))"
  },
  {
    "nl": "specify multiple positional arguments with argparse",
    "cmd": "parser.add_argument('input', nargs='+')",
    "question_id": "5373474-77",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.argparse#argparse.ArgumentParser.add_argument"
    ],
    "clean_cmd": "parser.add_argument(, nargs=)",
    "canonical_cmd": "parser.add_argument('input', nargs='+')"
  },
  {
    "nl": "delete every 8th column in a numpy array 'a'.",
    "cmd": "np.delete(a, list(range(0, a.shape[1], 8)), axis=1)",
    "question_id": "28925267-60",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.delete",
      "python.library.functions#range",
      "python.library.functions#list"
    ],
    "clean_cmd": "np.delete(a, list(range(, a.shape[], )), axis=)",
    "canonical_cmd": "np.delete(VAR_STR, list(range(0, VAR_STR.shape[1], 8)), axis=1)"
  },
  {
    "nl": "Replace repeated instances of a character '*' with a single instance in a string 'text'",
    "cmd": "re.sub('\\\\*\\\\*+', '*', text)",
    "question_id": "3878555-86",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "re.sub(, , text)",
    "canonical_cmd": "re.sub('\\\\*\\\\*+', 'VAR_STR', VAR_STR)"
  },
  {
    "nl": "replace repeated instances of \"*\" with a single instance of \"*\"",
    "cmd": "re.sub('\\\\*+', '*', text)",
    "question_id": "3878555-86",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "re.sub(, , text)",
    "canonical_cmd": "re.sub('\\\\*+', 'VAR_STR', text)"
  },
  {
    "nl": "split elements of a list `l` by '\\t'",
    "cmd": "[i.partition('\\t')[-1] for i in l if '\\t' in i]",
    "question_id": "23145240-59",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.partition"
    ],
    "clean_cmd": "[i.partition()[-]  i  l    i]",
    "canonical_cmd": "[i.partition('VAR_STR')[-1] for i in VAR_STR if 'VAR_STR' in i]"
  },
  {
    "nl": "get two random records from model 'MyModel' in Django",
    "cmd": "MyModel.objects.order_by('?')[:2]",
    "question_id": "1731346-40",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "MyModel.objects.order_by()[:]",
    "canonical_cmd": "VAR_STR.objects.order_by('?')[:2]"
  },
  {
    "nl": "get value of first child of xml node `name`",
    "cmd": "name[0].firstChild.nodeValue",
    "question_id": "317413-56",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "name[].firstChild.nodeValue",
    "canonical_cmd": "VAR_STR[0].firstChild.nodeValue"
  },
  {
    "nl": "Convert a hex string `437c2123 ` according to ascii value.",
    "cmd": "\"\"\"437c2123\"\"\".decode('hex')",
    "question_id": "10618586-67",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#bytearray.decode"
    ],
    "clean_cmd": ".decode()",
    "canonical_cmd": "\"\"\"VAR_STR\"\"\".decode('hex')"
  },
  {
    "nl": "Get all `a` tags where the text starts with value `some text` using regex",
    "cmd": "doc.xpath(\"//a[starts-with(text(),'some text')]\")",
    "question_id": "2755950-13",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "doc.xpath()",
    "canonical_cmd": "doc.xpath(\"//a[starts-with(text(),'some text')]\")"
  },
  {
    "nl": "split string `str1` on one or more spaces with a regular expression",
    "cmd": "re.split(' +', str1)",
    "question_id": "10974932-69",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.split"
    ],
    "clean_cmd": "re.split(, str1)",
    "canonical_cmd": "re.split(' +', VAR_STR)"
  },
  {
    "nl": "python split string based on regular expression",
    "cmd": "re.findall('\\\\S+', str1)",
    "question_id": "10974932-59",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "re.findall(, str1)",
    "canonical_cmd": "re.findall('\\\\S+', str1)"
  },
  {
    "nl": "BeautifulSoup find all tags with attribute 'name' equal to 'description'",
    "cmd": "soup.findAll(attrs={'name': 'description'})",
    "question_id": "3774571-10",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "soup.findAll(attrs={: })",
    "canonical_cmd": "soup.findAll(attrs={'VAR_STR': 'VAR_STR'})"
  },
  {
    "nl": "get the dot product of two one dimensional numpy arrays",
    "cmd": "np.dot(a[:, (None)], b[(None), :])",
    "question_id": "23566515-25",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.dot"
    ],
    "clean_cmd": "np.dot(a[:, (None)], b[(None), :])",
    "canonical_cmd": "np.dot(a[:, (None)], b[(None), :])"
  },
  {
    "nl": "multiplication of two 1-dimensional arrays  in numpy",
    "cmd": "np.outer(a, b)",
    "question_id": "23566515-22",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.outer"
    ],
    "clean_cmd": "np.outer(a, b)",
    "canonical_cmd": "np.outer(a, b)"
  },
  {
    "nl": "insert a list `k` at the front of list `a`",
    "cmd": "a.insert(0, k)",
    "question_id": "8785554-77",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.insert"
    ],
    "clean_cmd": "a.insert(, k)",
    "canonical_cmd": "VAR_STR.insert(0, VAR_STR)"
  },
  {
    "nl": "insert elements of list `k` into list `a` at position `n`",
    "cmd": "a = a[:n] + k + a[n:]",
    "question_id": "8785554-42",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "a = a[:n] + k + a[n:]",
    "canonical_cmd": "VAR_STR = VAR_STR[:VAR_STR] + VAR_STR + VAR_STR[VAR_STR:]"
  },
  {
    "nl": "get values from a dictionary `my_dict` whose key contains the string `Date`",
    "cmd": "[v for k, v in list(my_dict.items()) if 'Date' in k]",
    "question_id": "17106819-22",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "[v  k, v  list(my_dict.items())    k]",
    "canonical_cmd": "[v for k, v in list(VAR_STR.items()) if 'VAR_STR' in k]"
  },
  {
    "nl": "Print variable `count` and variable `conv` with space string '    ' in between",
    "cmd": "print(str(count) + '    ' + str(conv))",
    "question_id": "9969684-48",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str"
    ],
    "clean_cmd": "print(str(count) +  + str(conv))",
    "canonical_cmd": "print(str(VAR_STR) + '    ' + str(VAR_STR))"
  },
  {
    "nl": "convert JSON string '2012-05-29T19:30:03.283Z' into a DateTime object using format '%Y-%m-%dT%H:%M:%S.%fZ'",
    "cmd": "datetime.datetime.strptime('2012-05-29T19:30:03.283Z', '%Y-%m-%dT%H:%M:%S.%fZ')",
    "question_id": "10805589-34",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.datetime#datetime.datetime.strptime"
    ],
    "clean_cmd": "datetime.datetime.strptime(, )",
    "canonical_cmd": "datetime.datetime.strptime('VAR_STR', 'VAR_STR')"
  },
  {
    "nl": "decode string `content` to UTF-8 code",
    "cmd": "print(content.decode('utf8'))",
    "question_id": "17577727-88",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#bytearray.decode"
    ],
    "clean_cmd": "print(content.decode())",
    "canonical_cmd": "print(VAR_STR.decode('utf8'))"
  },
  {
    "nl": "convert list `data` into a string of its elements",
    "cmd": "print(''.join(map(str, data)))",
    "question_id": "17757450-20",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#map",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": "print(.join(map(str, data)))",
    "canonical_cmd": "print(''.join(map(str, VAR_STR)))"
  },
  {
    "nl": "sort list `lst` in descending order based on the second item of each tuple in it",
    "cmd": "lst.sort(key=lambda x: x[2], reverse=True)",
    "question_id": "11584773-84",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#list.sort"
    ],
    "clean_cmd": "lst.sort(key= x: x[], reverse=True)",
    "canonical_cmd": "VAR_STR.sort(key=lambda x: x[2], reverse=True)"
  },
  {
    "nl": "Replace non-ASCII characters in string `text` with a single space",
    "cmd": "re.sub('[^\\\\x00-\\\\x7F]+', ' ', text)",
    "question_id": "20078816-77",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "re.sub(, , text)",
    "canonical_cmd": "re.sub('[^\\\\x00-\\\\x7F]+', ' ', VAR_STR)"
  },
  {
    "nl": "get all digits in a string `s` after a '[' character",
    "cmd": "re.findall('\\\\d+(?=[^[]+$)', s)",
    "question_id": "34338341-12",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "re.findall(, s)",
    "canonical_cmd": "re.findall('\\\\d+(?=[^[]+$)', VAR_STR)"
  },
  {
    "nl": "create a list of tuples which contains number 9 and the number before it, for each occurrence of 9 in the list 'myList'",
    "cmd": "[(x, y) for x, y in zip(myList, myList[1:]) if y == 9]",
    "question_id": "38251245-5",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip"
    ],
    "clean_cmd": "[(x, y)  x, y  zip(myList, myList[:])  y == ]",
    "canonical_cmd": "[(x, y) for x, y in zip(VAR_STR, VAR_STR[1:]) if y == 9]"
  },
  {
    "nl": "remove all instances of [1, 1] from list `a`",
    "cmd": "a[:] = [x for x in a if x != [1, 1]]",
    "question_id": "2186656-7",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "a[:] = [x  x  a  x != [, ]]",
    "canonical_cmd": "VAR_STR[:] = [x for x in VAR_STR if x != [1, 1]]"
  },
  {
    "nl": "remove all instances of `[1, 1]` from a list `a`",
    "cmd": "[x for x in a if x != [1, 1]]",
    "question_id": "2186656-86",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[x  x  a  x != [, ]]",
    "canonical_cmd": "[x for x in VAR_STR if x != [VAR_STR]]"
  },
  {
    "nl": "Convert nested list `x` into a flat list",
    "cmd": "[j for i in x for j in i]",
    "question_id": "716477-33",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[j  i  x  j  i]",
    "canonical_cmd": "[j for i in VAR_STR for j in i]"
  },
  {
    "nl": "get each value from a list of lists `a` using itertools",
    "cmd": "print(list(itertools.chain.from_iterable(a)))",
    "question_id": "716477-54",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.itertools#itertools.chain.from_iterable",
      "python.library.functions#list"
    ],
    "clean_cmd": "print(list(itertools.chain.from_iterable(a)))",
    "canonical_cmd": "print(list(itertools.chain.from_iterable(VAR_STR)))"
  },
  {
    "nl": "get the indices of tuples in list of tuples `L` where the first value is 53",
    "cmd": "[i for i, v in enumerate(L) if v[0] == 53]",
    "question_id": "2917372-41",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#enumerate"
    ],
    "clean_cmd": "[i  i, v  enumerate(L)  v[] == ]",
    "canonical_cmd": "[i for i, v in enumerate(VAR_STR) if v[0] == 53]"
  },
  {
    "nl": "convert string '2011221' into a DateTime object using format '%Y%W%w'",
    "cmd": "datetime.strptime('2011221', '%Y%W%w')",
    "question_id": "5882405-43",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.datetime#datetime.datetime.strptime"
    ],
    "clean_cmd": "datetime.strptime(, )",
    "canonical_cmd": "datetime.strptime('VAR_STR', 'VAR_STR')"
  },
  {
    "nl": "Create a dictionary from string `e` separated by `-` and `,`",
    "cmd": "dict((k, int(v)) for k, v in (e.split(' - ') for e in s.split(',')))",
    "question_id": "4627981-39",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#int",
      "python.library.stdtypes#dict",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "dict((k, int(v))  k, v  (e.split()  e  s.split()))",
    "canonical_cmd": "dict((k, int(v)) for k, v in (VAR_STR.split(' - ') for VAR_STR in s.split('VAR_STR'))\n    )"
  },
  {
    "nl": "insert directory './path/to/your/modules/' to current directory",
    "cmd": "sys.path.insert(0, './path/to/your/modules/')",
    "question_id": "24492327-75",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.insert"
    ],
    "clean_cmd": "sys.path.insert(, )",
    "canonical_cmd": "sys.path.insert(0, 'VAR_STR')"
  },
  {
    "nl": "Sort a list of strings 'words' such that items starting with 's' come first.",
    "cmd": "sorted(words, key=lambda x: 'a' + x if x.startswith('s') else 'b' + x)",
    "question_id": "17608210-91",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.stdtypes#str.startswith"
    ],
    "clean_cmd": "sorted(words, key= x:  + x  x.startswith()   + x)",
    "canonical_cmd": "sorted(VAR_STR, key=lambda x: 'a' + x if x.startswith('VAR_STR') else 'b' + x)"
  },
  {
    "nl": "execute a mv command `mv /home/somedir/subdir/* somedir/` in subprocess",
    "cmd": "subprocess.call('mv /home/somedir/subdir/* somedir/', shell=True)",
    "question_id": "21804935-73",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.subprocess#subprocess.call"
    ],
    "clean_cmd": "subprocess.call(, shell=True)",
    "canonical_cmd": "subprocess.call('VAR_STR', shell=True)"
  },
  {
    "nl": "How to use the mv command in Python with subprocess",
    "cmd": "subprocess.call('mv /home/somedir/subdir/* somedir/', shell=True)",
    "question_id": "21804935-87",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.subprocess#subprocess.call"
    ],
    "clean_cmd": "subprocess.call(, shell=True)",
    "canonical_cmd": "subprocess.call('mv /home/somedir/subdir/* somedir/', shell=True)"
  },
  {
    "nl": "use regex pattern '^12(?=.{4}$)' to remove digit 12 if followed by 4 other digits in column `c_contofficeID` of dataframe `df`",
    "cmd": "df.c_contofficeID.str.replace('^12(?=.{4}$)', '')",
    "question_id": "40273313-48",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.replace"
    ],
    "clean_cmd": "df.c_contofficeID.str.replace(, )",
    "canonical_cmd": "VAR_STR.VAR_STR.str.replace('VAR_STR', '')"
  },
  {
    "nl": "get the platform OS name",
    "cmd": "platform.system()",
    "question_id": "30015665-50",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.platform#platform.system"
    ],
    "clean_cmd": "platform.system()",
    "canonical_cmd": "platform.system()"
  },
  {
    "nl": "find all digits between two characters `\\xab` and `\\xbb`in a string `text`",
    "cmd": "print(re.findall('\\\\d+', '\\n'.join(re.findall('\\xab([\\\\s\\\\S]*?)\\xbb', text))))",
    "question_id": "31650399-37",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": "print(re.findall(, .join(re.findall(, text))))",
    "canonical_cmd": "print(re.findall('\\\\d+', '\\n'.join(re.findall('\u00ab([\\\\s\\\\S]*?)\u00bb', VAR_STR))))"
  },
  {
    "nl": "convert utf-8 with bom string `s` to utf-8 with no bom `u`",
    "cmd": "u = s.decode('utf-8-sig')",
    "question_id": "8898294-36",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#bytearray.decode"
    ],
    "clean_cmd": "u = s.decode()",
    "canonical_cmd": "VAR_STR = VAR_STR.decode('utf-8-sig')"
  },
  {
    "nl": "convert unicode string '\\xd0\\xbc\\xd0\\xb0\\xd1\\x80\\xd0\\xba\\xd0\\xb0' to byte string",
    "cmd": "'\\xd0\\xbc\\xd0\\xb0\\xd1\\x80\\xd0\\xba\\xd0\\xb0'.encode('latin-1')",
    "question_id": "11174790-90",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.encode"
    ],
    "clean_cmd": ".encode()",
    "canonical_cmd": "\"\"\"VAR_STR\"\"\".encode('latin-1')"
  },
  {
    "nl": "get a list of items from the list `some_list` that contain string 'abc'",
    "cmd": "matching = [s for s in some_list if 'abc' in s]",
    "question_id": "4843158-40",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "matching = [s  s  some_list    s]",
    "canonical_cmd": "matching = [s for s in VAR_STR if 'VAR_STR' in s]"
  },
  {
    "nl": "swap each pair of characters in string `s`",
    "cmd": "\"\"\"\"\"\".join([s[x:x + 2][::-1] for x in range(0, len(s), 2)])",
    "question_id": "4605439-78",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len",
      "python.library.functions#range",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": ".join([s[x:x + ][::-]  x  range(, len(s), )])",
    "canonical_cmd": "\"\"\"\"\"\".join([VAR_STR[x:x + 2][::-1] for x in range(0, len(VAR_STR), 2)])"
  },
  {
    "nl": "django return a QuerySet list containing the values of field 'eng_name' in model `Employees`",
    "cmd": "Employees.objects.values_list('eng_name', flat=True)",
    "question_id": "7503241-31",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "Employees.objects.values_list(, flat=True)",
    "canonical_cmd": "VAR_STR.objects.values_list('VAR_STR', flat=True)"
  },
  {
    "nl": "sum elements at the same index of each list in list `lists`",
    "cmd": "map(sum, zip(*lists))",
    "question_id": "11280536-19",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip",
      "python.library.functions#map"
    ],
    "clean_cmd": "map(sum, zip(*lists))",
    "canonical_cmd": "map(sum, zip(*VAR_STR))"
  },
  {
    "nl": "writing items in list `thelist` to file `thefile`",
    "cmd": "for item in thelist:\n    thefile.write(('%s\\n' % item))",
    "question_id": "899103-24",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.write"
    ],
    "clean_cmd": " item  thelist:thefile.write(( % item))",
    "canonical_cmd": "for item in VAR_STR:\n    VAR_STR.write('%s\\n' % item)"
  },
  {
    "nl": "writing items in list `thelist` to file `thefile`",
    "cmd": "for item in thelist:\n    pass",
    "question_id": "899103-78",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": " item  thelist:",
    "canonical_cmd": "for item in VAR_STR:\n    pass"
  },
  {
    "nl": "serialize `itemlist` to file `outfile`",
    "cmd": "pickle.dump(itemlist, outfile)",
    "question_id": "899103-57",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.pickle#pickle.dump"
    ],
    "clean_cmd": "pickle.dump(itemlist, outfile)",
    "canonical_cmd": "pickle.dump(VAR_STR, VAR_STR)"
  },
  {
    "nl": "writing items in list `itemlist` to file `outfile`",
    "cmd": "outfile.write('\\n'.join(itemlist))",
    "question_id": "899103-77",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.join",
      "python.library.os#os.write"
    ],
    "clean_cmd": "outfile.write(.join(itemlist))",
    "canonical_cmd": "VAR_STR.write('\\n'.join(VAR_STR))"
  },
  {
    "nl": "replace all the nan values with 0 in a pandas dataframe `df`",
    "cmd": "df.fillna(0)",
    "question_id": "13295735-56",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.fillna"
    ],
    "clean_cmd": "df.fillna()",
    "canonical_cmd": "VAR_STR.fillna(0)"
  },
  {
    "nl": "convert a 1d `A` array to a 2d array `B`",
    "cmd": "B = np.reshape(A, (-1, 2))",
    "question_id": "12575421-53",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.reshape"
    ],
    "clean_cmd": "B = np.reshape(A, (-, ))",
    "canonical_cmd": "VAR_STR = np.reshape(VAR_STR, (-1, 2))"
  },
  {
    "nl": "a sequence of empty lists of length `n`",
    "cmd": "[[] for _ in range(n)]",
    "question_id": "23612271-34",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#range"
    ],
    "clean_cmd": "[[]  _  range(n)]",
    "canonical_cmd": "[[] for _ in range(VAR_STR)]"
  },
  {
    "nl": "update a list `l1` dictionaries with a key `count` and value from list `l2`",
    "cmd": "[dict(d, count=n) for d, n in zip(l1, l2)]",
    "question_id": "10592674-55",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip",
      "python.library.stdtypes#dict"
    ],
    "clean_cmd": "[dict(d, count=n)  d, n  zip(l1, l2)]",
    "canonical_cmd": "[dict(d, VAR_STR=n) for d, n in zip(VAR_STR, VAR_STR)]"
  },
  {
    "nl": "merge rows from dataframe `df1` with rows from dataframe `df2` and calculate the mean for rows that have the same value of axis 1",
    "cmd": "pd.concat((df1, df2), axis=1).mean(axis=1)",
    "question_id": "19490064-50",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.concat",
      "pandas.reference.api.pandas.dataframe.mean"
    ],
    "clean_cmd": "pd.concat((df1, df2), axis=).mean(axis=)",
    "canonical_cmd": "pd.concat((VAR_STR, VAR_STR), axis=1).mean(axis=1)"
  },
  {
    "nl": "sum of all values in a python dict `d`",
    "cmd": "sum(d.values())",
    "question_id": "4880960-33",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sum",
      "python.library.stdtypes#dict.values"
    ],
    "clean_cmd": "sum(d.values())",
    "canonical_cmd": "sum(VAR_STR.values())"
  },
  {
    "nl": "Sum of all values in a Python dict",
    "cmd": "sum(d.values())",
    "question_id": "4880960-87",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sum",
      "python.library.stdtypes#dict.values"
    ],
    "clean_cmd": "sum(d.values())",
    "canonical_cmd": "sum(d.values())"
  },
  {
    "nl": "remove the last element in list `a`",
    "cmd": "del a[(-1)]",
    "question_id": "627435-73",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": " a[(-)]",
    "canonical_cmd": "del VAR_STR[-1]"
  },
  {
    "nl": "remove the element in list `a` with index 1",
    "cmd": "a.pop(1)",
    "question_id": "627435-63",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#frozenset.pop"
    ],
    "clean_cmd": "a.pop()",
    "canonical_cmd": "VAR_STR.pop(1)"
  },
  {
    "nl": "remove the last element in list `a`",
    "cmd": "a.pop()",
    "question_id": "627435-40",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#frozenset.pop"
    ],
    "clean_cmd": "a.pop()",
    "canonical_cmd": "VAR_STR.pop()"
  },
  {
    "nl": "remove the element in list `a` at index `index`",
    "cmd": "a.pop(index)",
    "question_id": "627435-31",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#frozenset.pop"
    ],
    "clean_cmd": "a.pop(index)",
    "canonical_cmd": "VAR_STR.pop(VAR_STR)"
  },
  {
    "nl": "remove the element in list `a` at index `index`",
    "cmd": "del a[index]",
    "question_id": "627435-36",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": " a[index]",
    "canonical_cmd": "del VAR_STR[VAR_STR]"
  },
  {
    "nl": "sort a dictionary `d` by length of its values and print as string",
    "cmd": "print(' '.join(sorted(d, key=lambda k: len(d[k]), reverse=True)))",
    "question_id": "16868457-20",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.functions#len",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": "print(.join(sorted(d, key= k: len(d[k]), reverse=True)))",
    "canonical_cmd": "print(' '.join(sorted(VAR_STR, key=lambda k: len(VAR_STR[k]), reverse=True)))"
  },
  {
    "nl": "Replace comma with dot in a string `original_string` using regex",
    "cmd": "new_string = re.sub('\"(\\\\d+),(\\\\d+)\"', '\\\\1.\\\\2', original_string)",
    "question_id": "8172861-52",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "new_string = re.sub(, , original_string)",
    "canonical_cmd": "new_string = re.sub('\"(\\\\d+),(\\\\d+)\"', '\\\\1.\\\\2', VAR_STR)"
  },
  {
    "nl": "plot data of column 'index' versus column 'A' of dataframe `monthly_mean` after resetting its index",
    "cmd": "monthly_mean.reset_index().plot(x='index', y='A')",
    "question_id": "20084487-20",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.reset_index",
      "pandas.reference.api.pandas.dataframe.plot"
    ],
    "clean_cmd": "monthly_mean.reset_index().plot(x=, y=)",
    "canonical_cmd": "VAR_STR.reset_index().plot(x='VAR_STR', y='VAR_STR')"
  },
  {
    "nl": "set environment variable 'DEBUSSY' equal to 1",
    "cmd": "os.environ['DEBUSSY'] = '1'",
    "question_id": "5971312-4",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "os.environ[] = ",
    "canonical_cmd": "os.environ['VAR_STR'] = '1'"
  },
  {
    "nl": "Get a environment variable `DEBUSSY`",
    "cmd": "print(os.environ['DEBUSSY'])",
    "question_id": "5971312-29",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "print(os.environ[])",
    "canonical_cmd": "print(os.environ['VAR_STR'])"
  },
  {
    "nl": "set environment variable 'DEBUSSY' to '1'",
    "cmd": "os.environ['DEBUSSY'] = '1'",
    "question_id": "5971312-73",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "os.environ[] = ",
    "canonical_cmd": "os.environ['VAR_STR'] = 'VAR_STR'"
  },
  {
    "nl": "flask-sqlalchemy delete row `page`",
    "cmd": "db.session.delete(page)",
    "question_id": "4921038-69",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.ast#ast.Delete"
    ],
    "clean_cmd": "db.session.delete(page)",
    "canonical_cmd": "db.session.delete(VAR_STR)"
  },
  {
    "nl": "convert pandas group by object to multi-indexed dataframe with indices 'Name' and 'Destination'",
    "cmd": "df.set_index(['Name', 'Destination'])",
    "question_id": "14301913-69",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.set_index"
    ],
    "clean_cmd": "df.set_index([, ])",
    "canonical_cmd": "df.set_index(['VAR_STR', 'VAR_STR'])"
  },
  {
    "nl": "decode unicode string `s` into a readable unicode literal",
    "cmd": "s.decode('unicode_escape')",
    "question_id": "6504200-56",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#bytearray.decode"
    ],
    "clean_cmd": "s.decode()",
    "canonical_cmd": "VAR_STR.decode('unicode_escape')"
  },
  {
    "nl": "get the non-masked values of array `m`",
    "cmd": "m[~m.mask]",
    "question_id": "3262437-3",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "m[~m.mask]",
    "canonical_cmd": "VAR_STR[~VAR_STR.mask]"
  },
  {
    "nl": "get a random key `country` and value `capital` form a dictionary `d`",
    "cmd": "country, capital = random.choice(list(d.items()))",
    "question_id": "4859292-70",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.random#random.choice",
      "python.library.functions#list",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "country, capital = random.choice(list(d.items()))",
    "canonical_cmd": "VAR_STR, VAR_STR = random.choice(list(VAR_STR.items()))"
  },
  {
    "nl": "zip file `pdffile` using its basename as directory name",
    "cmd": "archive.write(pdffile, os.path.basename(pdffile))",
    "question_id": "12777222-51",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os.path#os.path.basename",
      "python.library.os#os.write"
    ],
    "clean_cmd": "archive.write(pdffile, os.path.basename(pdffile))",
    "canonical_cmd": "archive.write(VAR_STR, os.path.basename(VAR_STR))"
  },
  {
    "nl": "call bash command 'tar c my_dir | md5sum' with pipe",
    "cmd": "subprocess.call('tar c my_dir | md5sum', shell=True)",
    "question_id": "7323859-99",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.subprocess#subprocess.call"
    ],
    "clean_cmd": "subprocess.call(, shell=True)",
    "canonical_cmd": "subprocess.call('VAR_STR', shell=True)"
  },
  {
    "nl": "trim whitespace in string `s`",
    "cmd": "s.strip()",
    "question_id": "1185524-91",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.strip"
    ],
    "clean_cmd": "s.strip()",
    "canonical_cmd": "VAR_STR.strip()"
  },
  {
    "nl": "trim whitespace (including tabs) in `s` on the left side",
    "cmd": "s = s.lstrip()",
    "question_id": "1185524-91",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.strip"
    ],
    "clean_cmd": "s = s.lstrip()",
    "canonical_cmd": "VAR_STR = VAR_STR.lstrip()"
  },
  {
    "nl": "trim whitespace (including tabs) in `s` on the right side",
    "cmd": "s = s.rstrip()",
    "question_id": "1185524-8",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.rstrip"
    ],
    "clean_cmd": "s = s.rstrip()",
    "canonical_cmd": "VAR_STR = VAR_STR.rstrip()"
  },
  {
    "nl": "trim characters ' \\t\\n\\r' in `s`",
    "cmd": "s = s.strip(' \\t\\n\\r')",
    "question_id": "1185524-9",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.strip"
    ],
    "clean_cmd": "s = s.strip()",
    "canonical_cmd": "VAR_STR = VAR_STR.strip(' \\t\\n\\r')"
  },
  {
    "nl": "trim whitespaces (including tabs) in string `s`",
    "cmd": "print(re.sub('[\\\\s+]', '', s))",
    "question_id": "1185524-31",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "print(re.sub(, , s))",
    "canonical_cmd": "print(re.sub('[\\\\s+]', '', VAR_STR))"
  },
  {
    "nl": "set color marker styles `--bo` in matplotlib",
    "cmd": "plt.plot(list(range(10)), '--bo')",
    "question_id": "8409095-82",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#range",
      "python.library.functions#list",
      "pandas.reference.api.pandas.dataframe.plot"
    ],
    "clean_cmd": "plt.plot(list(range()), )",
    "canonical_cmd": "plt.plot(list(range(10)), 'VAR_STR')"
  },
  {
    "nl": "set circle markers on plot for individual points  defined in list   `[1,2,3,4,5,6,7,8,9,10]` created by range(10)",
    "cmd": "plt.plot(list(range(10)), linestyle='--', marker='o', color='b')",
    "question_id": "8409095-48",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#range",
      "python.library.functions#list",
      "pandas.reference.api.pandas.dataframe.plot"
    ],
    "clean_cmd": "plt.plot(list(range()), linestyle=, marker=, color=)",
    "canonical_cmd": "plt.plot(list(range(10)), linestyle='--', marker='o', color='b')"
  },
  {
    "nl": "sort list `results` by keys value 'year'",
    "cmd": "sorted(results, key=itemgetter('year'))",
    "question_id": "13438574-91",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.operator#operator.itemgetter"
    ],
    "clean_cmd": "sorted(results, key=itemgetter())",
    "canonical_cmd": "sorted(VAR_STR, key=itemgetter('VAR_STR'))"
  },
  {
    "nl": "sort array `arr` in ascending order by values of the 3rd column",
    "cmd": "arr[arr[:, (2)].argsort()]",
    "question_id": "10078470-99",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.argsort"
    ],
    "clean_cmd": "arr[arr[:, ()].argsort()]",
    "canonical_cmd": "VAR_STR[VAR_STR[:, (2)].argsort()]"
  },
  {
    "nl": "sort rows of numpy matrix `arr` in ascending order according to all column values",
    "cmd": "numpy.sort(arr, axis=0)",
    "question_id": "10078470-100",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.sort"
    ],
    "clean_cmd": "numpy.sort(arr, axis=)",
    "canonical_cmd": "numpy.sort(VAR_STR, axis=0)"
  },
  {
    "nl": "Format a string `u'Andr\\xc3\\xa9'` that has unicode characters",
    "cmd": "\"\"\"\"\"\".join(chr(ord(c)) for c in 'Andr\\xc3\\xa9')",
    "question_id": "2783079-30",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#chr",
      "python.library.functions#ord",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": ".join(chr(ord(c))  c  )",
    "canonical_cmd": "\"\"\"\"\"\".join(chr(ord(c)) for c in 'Andr\u00c3\u00a9')"
  },
  {
    "nl": "convert a unicode 'Andr\\xc3\\xa9' to a string",
    "cmd": "\"\"\"\"\"\".join(chr(ord(c)) for c in 'Andr\\xc3\\xa9').decode('utf8')",
    "question_id": "2783079-88",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#chr",
      "python.library.functions#ord",
      "python.library.stdtypes#str.join",
      "python.library.stdtypes#bytearray.decode"
    ],
    "clean_cmd": ".join(chr(ord(c))  c  ).decode()",
    "canonical_cmd": "\"\"\"\"\"\".join(chr(ord(c)) for c in 'VAR_STR').decode('utf8')"
  },
  {
    "nl": "remove white spaces from the end of string \"    xyz     \"",
    "cmd": "\"\"\"    xyz     \"\"\".rstrip()",
    "question_id": "2372573-38",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.rstrip"
    ],
    "clean_cmd": ".rstrip()",
    "canonical_cmd": "\"\"\"    xyz     \"\"\".rstrip()"
  },
  {
    "nl": "Convert string '03:55' into datetime.time object",
    "cmd": "datetime.datetime.strptime('03:55', '%H:%M').time()",
    "question_id": "14295673-41",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.datetime#datetime.datetime.strptime",
      "python.library.datetime#datetime.datetime.time"
    ],
    "clean_cmd": "datetime.datetime.strptime(, ).time()",
    "canonical_cmd": "datetime.datetime.strptime('VAR_STR', '%H:%M').time()"
  },
  {
    "nl": "generate all possible string permutations of each two elements in list `['hel', 'lo', 'bye']`",
    "cmd": "print([''.join(a) for a in combinations(['hel', 'lo', 'bye'], 2)])",
    "question_id": "4059550-66",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.itertools#itertools.combinations",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": "print([.join(a)  a  combinations([, , ], )])",
    "canonical_cmd": "print([''.join(a) for a in combinations([VAR_STR], 2)])"
  },
  {
    "nl": "print a list of integers `list_of_ints` using string formatting",
    "cmd": "print(', '.join(str(x) for x in list_of_ints))",
    "question_id": "3590165-87",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": "print(.join(str(x)  x  list_of_ints))",
    "canonical_cmd": "print(', '.join(str(x) for x in VAR_STR))"
  },
  {
    "nl": "un-escaping characters in a string with python",
    "cmd": "\"\"\"\\\\u003Cp\\\\u003E\"\"\".decode('unicode-escape')",
    "question_id": "5555063-8",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#bytearray.decode"
    ],
    "clean_cmd": ".decode()",
    "canonical_cmd": "\"\"\"\\\\u003Cp\\\\u003E\"\"\".decode('unicode-escape')"
  },
  {
    "nl": "save current figure to file 'graph.png' with resolution of 1000 dpi",
    "cmd": "plt.savefig('graph.png', dpi=1000)",
    "question_id": "9402255-38",
    "cmd_name": "conala",
    "oracle_man": [
      "matplotlib.figure_api#matplotlib.figure.Figure.savefig"
    ],
    "clean_cmd": "plt.savefig(, dpi=)",
    "canonical_cmd": "plt.savefig('VAR_STR', dpi=1000)"
  },
  {
    "nl": "Print a emoji from a string `\\\\ud83d\\\\ude4f` having surrogate pairs",
    "cmd": "\"\"\"\\\\ud83d\\\\ude4f\"\"\".encode('utf-16', 'surrogatepass').decode('utf-16')",
    "question_id": "38147259-48",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.encode",
      "python.library.stdtypes#bytearray.decode"
    ],
    "clean_cmd": ".encode(, ).decode()",
    "canonical_cmd": "\"\"\"VAR_STR\"\"\".encode('utf-16', 'surrogatepass').decode('utf-16')"
  },
  {
    "nl": "apply two different aggregating functions `mean` and `sum` to the same column `dummy` in pandas data frame `df`",
    "cmd": "df.groupby('dummy').agg({'returns': [np.mean, np.sum]})",
    "question_id": "12589481-54",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.groupby",
      "pandas.reference.api.pandas.dataframe.agg"
    ],
    "clean_cmd": "df.groupby().agg({: [np.mean, np.sum]})",
    "canonical_cmd": "VAR_STR.groupby('VAR_STR').agg({'returns': [np.VAR_STR, np.VAR_STR]})"
  },
  {
    "nl": "map two lists `keys` and `values` into a dictionary",
    "cmd": "new_dict = {k: v for k, v in zip(keys, values)}",
    "question_id": "209840-22",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip"
    ],
    "clean_cmd": "new_dict = {k: v  k, v  zip(keys, values)}",
    "canonical_cmd": "new_dict = {k: v for k, v in zip(VAR_STR, VAR_STR)}"
  },
  {
    "nl": "map two lists `keys` and `values` into a dictionary",
    "cmd": "dict((k, v) for k, v in zip(keys, values))",
    "question_id": "209840-67",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip",
      "python.library.stdtypes#dict"
    ],
    "clean_cmd": "dict((k, v)  k, v  zip(keys, values))",
    "canonical_cmd": "dict((k, v) for k, v in zip(VAR_STR, VAR_STR))"
  },
  {
    "nl": "map two lists `keys` and `values` into a dictionary",
    "cmd": "dict([(k, v) for k, v in zip(keys, values)])",
    "question_id": "209840-17",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip",
      "python.library.stdtypes#dict"
    ],
    "clean_cmd": "dict([(k, v)  k, v  zip(keys, values)])",
    "canonical_cmd": "dict([(k, v) for k, v in zip(VAR_STR, VAR_STR)])"
  },
  {
    "nl": "get a list of substrings consisting of the first 5 characters of every string in list `buckets`",
    "cmd": "[s[:5] for s in buckets]",
    "question_id": "38379453-42",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[s[:]  s  buckets]",
    "canonical_cmd": "[s[:5] for s in VAR_STR]"
  },
  {
    "nl": "Rearrange the columns 'a','b','x','y' of pandas DataFrame `df` in mentioned sequence 'x' ,'y','a' ,'b'",
    "cmd": "df = df[['x', 'y', 'a', 'b']]",
    "question_id": "12329853-55",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "df = df[[, , , ]]",
    "canonical_cmd": "VAR_STR = VAR_STR[['VAR_STR', 'VAR_STR', 'VAR_STR', 'VAR_STR']]"
  },
  {
    "nl": "Get a list of all fields in class `User` that are marked `required`",
    "cmd": "[k for k, v in User._fields.items() if v.required]",
    "question_id": "8586738-95",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "[k  k, v  User._fields.items()  v.required]",
    "canonical_cmd": "[k for k, v in VAR_STR._fields.items() if v.VAR_STR]"
  },
  {
    "nl": "create a dictionary `{'spam': 5, 'ham': 6}` into another dictionary `d` field 'dict3'",
    "cmd": "d['dict3'] = {'spam': 5, 'ham': 6}",
    "question_id": "3817529-35",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "d[] = {: , : }",
    "canonical_cmd": "VAR_STR['VAR_STR'] = {VAR_STR}"
  },
  {
    "nl": "Get rank of rows from highest to lowest of dataframe `df`, grouped by value in column `group`, according to value in column `value`",
    "cmd": "df.groupby('group')['value'].rank(ascending=False)",
    "question_id": "26720916-8",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.groupby",
      "pandas.reference.api.pandas.dataframe.rank"
    ],
    "clean_cmd": "df.groupby()[].rank(ascending=False)",
    "canonical_cmd": "VAR_STR.groupby('VAR_STR')['VAR_STR'].rank(ascending=False)"
  },
  {
    "nl": "remove column by index `[:, 0:2]` in dataframe `df`",
    "cmd": "df = df.ix[:, 0:2]",
    "question_id": "19973489-29",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "df = df.ix[:, :]",
    "canonical_cmd": "VAR_STR = VAR_STR.ix[VAR_STR]"
  },
  {
    "nl": "convert a list of hex byte strings `['BB', 'A7', 'F6', '9E']` to a list of hex integers",
    "cmd": "[int(x, 16) for x in ['BB', 'A7', 'F6', '9E']]",
    "question_id": "2397687-56",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#int"
    ],
    "clean_cmd": "[int(x, )  x  [, , , ]]",
    "canonical_cmd": "[int(x, 16) for x in [VAR_STR]]"
  },
  {
    "nl": "convert the elements of list `L` from hex byte strings to hex integers",
    "cmd": "[int(x, 16) for x in L]",
    "question_id": "2397687-31",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#int"
    ],
    "clean_cmd": "[int(x, )  x  L]",
    "canonical_cmd": "[int(x, 16) for x in VAR_STR]"
  },
  {
    "nl": "Create a dictionary `d` from list `iterable`",
    "cmd": "d = dict(((key, value) for (key, value) in iterable))",
    "question_id": "1747817-96",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict"
    ],
    "clean_cmd": "d = dict(((key, value)  (key, value)  iterable))",
    "canonical_cmd": "VAR_STR = dict((key, value) for key, value in VAR_STR)"
  },
  {
    "nl": "Create a dictionary `d` from list `iterable`",
    "cmd": "d = {key: value for (key, value) in iterable}",
    "question_id": "1747817-8",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "d = {key: value  (key, value)  iterable}",
    "canonical_cmd": "VAR_STR = {key: value for key, value in VAR_STR}"
  },
  {
    "nl": "Create a dictionary `d` from list of key value pairs `iterable`",
    "cmd": "d = {k: v for (k, v) in iterable}",
    "question_id": "1747817-34",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "d = {k: v  (k, v)  iterable}",
    "canonical_cmd": "VAR_STR = {k: v for k, v in VAR_STR}"
  },
  {
    "nl": "drop a single subcolumn 'a' in column 'col1' from a dataframe `df`",
    "cmd": "df.drop(('col1', 'a'), axis=1)",
    "question_id": "22397058-19",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.drop"
    ],
    "clean_cmd": "df.drop((, ), axis=)",
    "canonical_cmd": "VAR_STR.drop(('VAR_STR', 'VAR_STR'), axis=1)"
  },
  {
    "nl": "dropping all columns named 'a' from a multiindex 'df', across all level.",
    "cmd": "df.drop('a', level=1, axis=1)",
    "question_id": "22397058-80",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.drop"
    ],
    "clean_cmd": "df.drop(, level=, axis=)",
    "canonical_cmd": "VAR_STR.drop('VAR_STR', level=1, axis=1)"
  },
  {
    "nl": "return list `result` of sum of elements of each list `b` in list of lists `a`",
    "cmd": "result = [sum(b) for b in a]",
    "question_id": "13283689-71",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sum"
    ],
    "clean_cmd": "result = [sum(b)  b  a]",
    "canonical_cmd": "VAR_STR = [sum(VAR_STR) for VAR_STR in VAR_STR]"
  },
  {
    "nl": "make a line plot with errorbars, `ebar`, from data `x, y, err` and set color of the errorbars to `y` (yellow)",
    "cmd": "ebar = plt.errorbar(x, y, yerr=err, ecolor='y')",
    "question_id": "13395888-35",
    "cmd_name": "conala",
    "oracle_man": [
      "matplotlib._as_gen.mpl_toolkits.mplot3d.axes3d.axes3d#mpl_toolkits.mplot3d.axes3d.Axes3D.errorbar"
    ],
    "clean_cmd": "ebar = plt.errorbar(x, y, yerr=err, ecolor=)",
    "canonical_cmd": "VAR_STR = plt.errorbar(x, VAR_STR, yerr=err, ecolor='VAR_STR')"
  },
  {
    "nl": "open a file `/home/user/test/wsservice/data.pkl` in binary write mode",
    "cmd": "output = open('/home/user/test/wsservice/data.pkl', 'wb')",
    "question_id": "5285181-18",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.request#open"
    ],
    "clean_cmd": "output = open(, )",
    "canonical_cmd": "output = open('VAR_STR', 'wb')"
  },
  {
    "nl": "compare two lists in python `a` and `b` and return matches",
    "cmd": "set(a).intersection(b)",
    "question_id": "1388818-50",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#set",
      "python.library.stdtypes#frozenset.intersection"
    ],
    "clean_cmd": "set(a).intersection(b)",
    "canonical_cmd": "set(VAR_STR).intersection(VAR_STR)"
  },
  {
    "nl": "How can I compare two lists in python and return matches",
    "cmd": "[i for i, j in zip(a, b) if i == j]",
    "question_id": "1388818-27",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip"
    ],
    "clean_cmd": "[i  i, j  zip(a, b)  i == j]",
    "canonical_cmd": "[i for i, j in zip(a, b) if i == j]"
  },
  {
    "nl": "sort a dictionary `a` by values that are list type",
    "cmd": "t = sorted(list(a.items()), key=lambda x: x[1])",
    "question_id": "20230211-71",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.functions#list",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "t = sorted(list(a.items()), key= x: x[])",
    "canonical_cmd": "t = sorted(list(VAR_STR.items()), key=lambda x: x[1])"
  },
  {
    "nl": "Replace all non-alphanumeric characters in a string",
    "cmd": "re.sub('[^0-9a-zA-Z]+', '*', 'h^&ell`.,|o w]{+orld')",
    "question_id": "12985456-66",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "re.sub(, , )",
    "canonical_cmd": "re.sub('[^0-9a-zA-Z]+', '*', 'h^&ell`.,|o w]{+orld')"
  },
  {
    "nl": "find all possible sequences of elements in a list `[2, 3, 4]`",
    "cmd": "map(list, permutations([2, 3, 4]))",
    "question_id": "9040939-18",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#map",
      "python.library.itertools#itertools.permutations"
    ],
    "clean_cmd": "map(list, permutations([, , ]))",
    "canonical_cmd": "map(list, permutations([VAR_STR]))"
  },
  {
    "nl": "create a list by appending components from list `a` and reversed list `b` interchangeably",
    "cmd": "[value for pair in zip(a, b[::-1]) for value in pair]",
    "question_id": "35797523-43",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip"
    ],
    "clean_cmd": "[value  pair  zip(a, b[::-])  value  pair]",
    "canonical_cmd": "[value for pair in zip(VAR_STR, VAR_STR[::-1]) for value in pair]"
  },
  {
    "nl": "get http header of the key 'your-header-name' in flask",
    "cmd": "request.headers['your-header-name']",
    "question_id": "29386995-92",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "request.headers[]",
    "canonical_cmd": "request.headers['VAR_STR']"
  },
  {
    "nl": "Create list `listy` containing 3 empty lists",
    "cmd": "listy = [[] for i in range(3)]",
    "question_id": "7745562-86",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#range"
    ],
    "clean_cmd": "listy = [[]  i  range()]",
    "canonical_cmd": "VAR_STR = [[] for i in range(3)]"
  },
  {
    "nl": "convert datetime.date `dt` to utc timestamp",
    "cmd": "timestamp = (dt - datetime(1970, 1, 1)).total_seconds()",
    "question_id": "8777753-52",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.datetime#datetime.timedelta.total_seconds",
      "python.library.datetime#datetime.datetime"
    ],
    "clean_cmd": "timestamp = (dt - datetime(, , )).total_seconds()",
    "canonical_cmd": "timestamp = (VAR_STR - datetime(1970, 1, 1)).total_seconds()"
  },
  {
    "nl": "find float number proceeding sub-string `par` in string `dir`",
    "cmd": "float(re.findall('(?:^|_)' + par + '(\\\\d+\\\\.\\\\d*)', dir)[0])",
    "question_id": "12211944-86",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall",
      "python.library.functions#float"
    ],
    "clean_cmd": "float(re.findall( + par + , dir)[])",
    "canonical_cmd": "float(re.findall('(?:^|_)' + VAR_STR + '(\\\\d+\\\\.\\\\d*)', VAR_STR)[0])"
  },
  {
    "nl": "Get all the matches from a string `abcd` if it begins with a character `a`",
    "cmd": "re.findall('[^a]', 'abcd')",
    "question_id": "12211944-0",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "re.findall(, )",
    "canonical_cmd": "re.findall('[^a]', 'VAR_STR')"
  },
  {
    "nl": "get a relative path of file 'my_file' into variable `fn`",
    "cmd": "fn = os.path.join(os.path.dirname(__file__), 'my_file')",
    "question_id": "1270951-66",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os.path#os.path.dirname",
      "python.library.os.path#os.path.join"
    ],
    "clean_cmd": "fn = os.path.join(os.path.dirname(__file__), )",
    "canonical_cmd": "VAR_STR = os.path.join(os.path.dirname(__file__), 'VAR_STR')"
  },
  {
    "nl": "Can I sort text by its numeric value in Python?",
    "cmd": "sorted(list(mydict.items()), key=lambda a: map(int, a[0].split('.')))",
    "question_id": "1534542-44",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.functions#map",
      "python.library.functions#list",
      "python.library.stdtypes#dict.items",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "sorted(list(mydict.items()), key= a: map(int, a[].split()))",
    "canonical_cmd": "sorted(list(mydict.items()), key=lambda a: map(int, a[0].split('.')))"
  },
  {
    "nl": "execute python code `myscript.py` in a virtualenv `/path/to/my/venv` from matlab",
    "cmd": "system('/path/to/my/venv/bin/python myscript.py')",
    "question_id": "39538010-46",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.system"
    ],
    "clean_cmd": "system()",
    "canonical_cmd": "system('/path/to/my/venv/bin/python myscript.py')"
  },
  {
    "nl": "remove dictionary from list `a` if the value associated with its key 'link' is in list `b`",
    "cmd": "a = [x for x in a if x['link'] not in b]",
    "question_id": "42260840-56",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "a = [x  x  a  x[]   b]",
    "canonical_cmd": "VAR_STR = [x for x in VAR_STR if x['VAR_STR'] not in VAR_STR]"
  },
  {
    "nl": "Convert a string of numbers `example_string` separated by `,` into a list of integers",
    "cmd": "map(int, example_string.split(','))",
    "question_id": "19334374-30",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#map",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "map(int, example_string.split())",
    "canonical_cmd": "map(int, VAR_STR.split('VAR_STR'))"
  },
  {
    "nl": "Convert a string of numbers 'example_string' separated by comma into a list of numbers",
    "cmd": "[int(s) for s in example_string.split(',')]",
    "question_id": "19334374-87",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#int",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "[int(s)  s  example_string.split()]",
    "canonical_cmd": "[int(s) for s in VAR_STR.split(',')]"
  },
  {
    "nl": "remove newlines and whitespace from string `yourstring`",
    "cmd": "re.sub('[\\\\ \\\\n]{2,}', '', yourstring)",
    "question_id": "4270742-97",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "re.sub(, , yourstring)",
    "canonical_cmd": "re.sub('[\\\\ \\\\n]{2,}', '', VAR_STR)"
  },
  {
    "nl": "sort dict `data` by value",
    "cmd": "sorted(data, key=data.get)",
    "question_id": "16772071-63",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted"
    ],
    "clean_cmd": "sorted(data, key=data.get)",
    "canonical_cmd": "sorted(VAR_STR, key=VAR_STR.get)"
  },
  {
    "nl": "Sort a dictionary `data` by its values",
    "cmd": "sorted(data.values())",
    "question_id": "16772071-53",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.stdtypes#dict.values"
    ],
    "clean_cmd": "sorted(data.values())",
    "canonical_cmd": "sorted(VAR_STR.values())"
  },
  {
    "nl": "Get a list of pairs of key-value sorted by values in dictionary `data`",
    "cmd": "sorted(list(data.items()), key=lambda x: x[1])",
    "question_id": "16772071-42",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.functions#list",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "sorted(list(data.items()), key= x: x[])",
    "canonical_cmd": "sorted(list(VAR_STR.items()), key=lambda x: x[1])"
  },
  {
    "nl": "sort dict by value python",
    "cmd": "sorted(list(data.items()), key=lambda x: x[1])",
    "question_id": "16772071-83",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.functions#list",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "sorted(list(data.items()), key= x: x[])",
    "canonical_cmd": "sorted(list(data.items()), key=lambda x: x[1])"
  },
  {
    "nl": "update all values associated with key `i` to string 'updated' if value `j` is not equal to 'None' in dictionary `d`",
    "cmd": "{i: 'updated' for i, j in list(d.items()) if j != 'None'}",
    "question_id": "4484690-74",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "{i:   i, j  list(d.items())  j != }",
    "canonical_cmd": "{VAR_STR: 'VAR_STR' for VAR_STR, VAR_STR in list(VAR_STR.items()) if VAR_STR != 'VAR_STR'}"
  },
  {
    "nl": "Filter a dictionary `d` to remove keys with value None and replace other values with 'updated'",
    "cmd": "dict((k, 'updated') for k, v in d.items() if v is None)",
    "question_id": "4484690-66",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "dict((k, )  k, v  d.items()  v  None)",
    "canonical_cmd": "dict((k, 'VAR_STR') for k, v in VAR_STR.items() if v is None)"
  },
  {
    "nl": "Filter a dictionary `d` to remove keys with value 'None' and replace other values with 'updated'",
    "cmd": "dict((k, 'updated') for k, v in d.items() if v != 'None')",
    "question_id": "4484690-31",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "dict((k, )  k, v  d.items()  v != )",
    "canonical_cmd": "dict((k, 'VAR_STR') for k, v in VAR_STR.items() if v != 'VAR_STR')"
  },
  {
    "nl": "create a list `listofzeros` of `n` zeros",
    "cmd": "listofzeros = [0] * n",
    "question_id": "8528178-96",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "listofzeros = [] * n",
    "canonical_cmd": "VAR_STR = [0] * VAR_STR"
  },
  {
    "nl": "sort a list `s` by first and second attributes",
    "cmd": "s = sorted(s, key=lambda x: (x[1], x[2]))",
    "question_id": "4233476-75",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted"
    ],
    "clean_cmd": "s = sorted(s, key= x: (x[], x[]))",
    "canonical_cmd": "VAR_STR = sorted(VAR_STR, key=lambda x: (x[1], x[2]))"
  },
  {
    "nl": "sort a list of lists `s` by second and third element in each list.",
    "cmd": "s.sort(key=operator.itemgetter(1, 2))",
    "question_id": "4233476-98",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.operator#operator.itemgetter",
      "python.library.stdtypes#list.sort"
    ],
    "clean_cmd": "s.sort(key=operator.itemgetter(, ))",
    "canonical_cmd": "VAR_STR.sort(key=operator.itemgetter(1, 2))"
  },
  {
    "nl": "sort dictionary of lists `myDict` by the third item in each list",
    "cmd": "sorted(list(myDict.items()), key=lambda e: e[1][2])",
    "question_id": "1217251-72",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.functions#list",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "sorted(list(myDict.items()), key= e: e[][])",
    "canonical_cmd": "sorted(list(VAR_STR.items()), key=lambda e: e[1][2])"
  },
  {
    "nl": "get the first element of each tuple in a list `rows`",
    "cmd": "[x[0] for x in rows]",
    "question_id": "22412258-62",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[x[]  x  rows]",
    "canonical_cmd": "[x[0] for x in VAR_STR]"
  },
  {
    "nl": "get a list `res_list` of the first elements of each tuple in a list of tuples `rows`",
    "cmd": "res_list = [x[0] for x in rows]",
    "question_id": "22412258-39",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "res_list = [x[]  x  rows]",
    "canonical_cmd": "VAR_STR = [x[0] for x in VAR_STR]"
  },
  {
    "nl": "append the first element of array `a` to array `a`",
    "cmd": "numpy.append(a, a[0])",
    "question_id": "7332841-60",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.append"
    ],
    "clean_cmd": "numpy.append(a, a[])",
    "canonical_cmd": "numpy.append(VAR_STR, VAR_STR[0])"
  },
  {
    "nl": "reset index of series `s`",
    "cmd": "s.reset_index(0).reset_index(drop=True)",
    "question_id": "18624039-75",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.reset_index"
    ],
    "clean_cmd": "s.reset_index().reset_index(drop=True)",
    "canonical_cmd": "VAR_STR.reset_index(0).reset_index(drop=True)"
  },
  {
    "nl": "open a file 'bundled-resource.jpg' in the same directory as a python script",
    "cmd": "f = open(os.path.join(__location__, 'bundled-resource.jpg'))",
    "question_id": "4060221-89",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os.path#os.path.join",
      "python.library.urllib.request#open"
    ],
    "clean_cmd": "f = open(os.path.join(__location__, ))",
    "canonical_cmd": "f = open(os.path.join(__location__, 'VAR_STR'))"
  },
  {
    "nl": "Set value for key `a` in dict `count` to `0` if key `a` does not exist or if value is `none`",
    "cmd": "count.setdefault('a', 0)",
    "question_id": "18663026-72",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict.setdefault"
    ],
    "clean_cmd": "count.setdefault(, )",
    "canonical_cmd": "VAR_STR.setdefault('VAR_STR', 0)"
  },
  {
    "nl": "serialise SqlAlchemy RowProxy object `row` to a json object",
    "cmd": "json.dumps([dict(list(row.items())) for row in rs])",
    "question_id": "5022066-15",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.json#json.dumps",
      "python.library.stdtypes#dict",
      "python.library.functions#list",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "json.dumps([dict(list(row.items()))  row  rs])",
    "canonical_cmd": "json.dumps([dict(list(VAR_STR.items())) for VAR_STR in rs])"
  },
  {
    "nl": "check if dictionary `L[0].f.items()` is in dictionary `a3.f.items()`",
    "cmd": "set(L[0].f.items()).issubset(set(a3.f.items()))",
    "question_id": "18170459-31",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#set",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "set(L[].f.items()).issubset(set(a3.f.items()))",
    "canonical_cmd": "set(L[0].f.items()).issubset(set(a3.f.items()))"
  },
  {
    "nl": "convert a python dictionary `d` to a list of tuples",
    "cmd": "[(v, k) for k, v in list(d.items())]",
    "question_id": "674519-53",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "[(v, k)  k, v  list(d.items())]",
    "canonical_cmd": "[(v, k) for k, v in list(VAR_STR.items())]"
  },
  {
    "nl": "convert dictionary of pairs `d` to a list of tuples",
    "cmd": "[(v, k) for k, v in d.items()]",
    "question_id": "674519-26",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "[(v, k)  k, v  d.items()]",
    "canonical_cmd": "[(v, k) for k, v in VAR_STR.items()]"
  },
  {
    "nl": "convert python 2 dictionary `a` to a list of tuples where the value is the first tuple element and the key is the second tuple element",
    "cmd": "[(v, k) for k, v in a.items()]",
    "question_id": "674519-5",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "[(v, k)  k, v  a.items()]",
    "canonical_cmd": "[(v, k) for k, v in VAR_STR.items()]"
  },
  {
    "nl": "convert a python dictionary 'a' to a list of tuples",
    "cmd": "[(k, v) for k, v in a.items()]",
    "question_id": "674519-55",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "[(k, v)  k, v  a.items()]",
    "canonical_cmd": "[(k, v) for k, v in VAR_STR.items()]"
  },
  {
    "nl": "split list `l` into `n` sized lists",
    "cmd": "[l[i:i + n] for i in range(0, len(l), n)]",
    "question_id": "312443-22",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len",
      "python.library.functions#range"
    ],
    "clean_cmd": "[l[i:i + n]  i  range(, len(l), n)]",
    "canonical_cmd": "[VAR_STR[i:i + VAR_STR] for i in range(0, len(VAR_STR), VAR_STR)]"
  },
  {
    "nl": "split a list `l` into evenly sized chunks `n`",
    "cmd": "[l[i:i + n] for i in range(0, len(l), n)]",
    "question_id": "312443-22",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len",
      "python.library.functions#range"
    ],
    "clean_cmd": "[l[i:i + n]  i  range(, len(l), n)]",
    "canonical_cmd": "[VAR_STR[i:i + VAR_STR] for i in range(0, len(VAR_STR), VAR_STR)]"
  },
  {
    "nl": "Sort Pandas Dataframe by Date",
    "cmd": "df.sort_values(by='Date')",
    "question_id": "28161356-78",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.sort_values"
    ],
    "clean_cmd": "df.sort_values(by=)",
    "canonical_cmd": "df.sort_values(by='Date')"
  },
  {
    "nl": "print 'here is your checkmark: ' plus unicode character u'\\u2713'",
    "cmd": "print('here is your checkmark: ' + '\\u2713')",
    "question_id": "10569438-9",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "print( + )",
    "canonical_cmd": "print('here is your checkmark: ' + 'VAR_STR')"
  },
  {
    "nl": "print unicode characters in a string `\\u0420\\u043e\\u0441\\u0441\\u0438\\u044f`",
    "cmd": "print('\\u0420\\u043e\\u0441\\u0441\\u0438\\u044f')",
    "question_id": "10569438-100",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "print()",
    "canonical_cmd": "print('VAR_STR')"
  },
  {
    "nl": "append 3 lists in one list",
    "cmd": "[[] for i in range(3)]",
    "question_id": "11219949-92",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#range"
    ],
    "clean_cmd": "[[]  i  range()]",
    "canonical_cmd": "[[] for i in range(3)]"
  },
  {
    "nl": "Initialize a list of empty lists `a` of size 3",
    "cmd": "a = [[] for i in range(3)]",
    "question_id": "11219949-47",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#range"
    ],
    "clean_cmd": "a = [[]  i  range()]",
    "canonical_cmd": "VAR_STR = [[] for i in range(3)]"
  },
  {
    "nl": "remove letters from string `example_line` if the letter exist in list `bad_chars`",
    "cmd": "\"\"\"\"\"\".join(dropwhile(lambda x: x in bad_chars, example_line[::-1]))[::-1]",
    "question_id": "39532974-100",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.itertools#itertools.dropwhile",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": ".join(dropwhile( x: x  bad_chars, example_line[::-]))[::-]",
    "canonical_cmd": "\"\"\"\"\"\".join(dropwhile(lambda x: x in VAR_STR, VAR_STR[::-1]))[::-1]"
  },
  {
    "nl": "get every thing after last `/`",
    "cmd": "url.rsplit('/', 1)",
    "question_id": "7253803-95",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.rsplit"
    ],
    "clean_cmd": "url.rsplit(, )",
    "canonical_cmd": "url.rsplit('VAR_STR', 1)"
  },
  {
    "nl": "get everything after last slash in a url stored in variable 'url'",
    "cmd": "url.rsplit('/', 1)[-1]",
    "question_id": "7253803-97",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.rsplit"
    ],
    "clean_cmd": "url.rsplit(, )[-]",
    "canonical_cmd": "VAR_STR.rsplit('/', 1)[-1]"
  },
  {
    "nl": "Find the list in a list of lists `alkaline_earth_values` with the max value of the second element.",
    "cmd": "max(alkaline_earth_values, key=lambda x: x[1])",
    "question_id": "4800419-29",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#max"
    ],
    "clean_cmd": "max(alkaline_earth_values, key= x: x[])",
    "canonical_cmd": "max(VAR_STR, key=lambda x: x[1])"
  },
  {
    "nl": "remove elements from list `centroids` the indexes of which are in array `index`",
    "cmd": "[element for i, element in enumerate(centroids) if i not in index]",
    "question_id": "31267493-65",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#enumerate"
    ],
    "clean_cmd": "[element  i, element  enumerate(centroids)  i   index]",
    "canonical_cmd": "[element for i, element in enumerate(VAR_STR) if i not in VAR_STR]"
  },
  {
    "nl": "convert a list of dictionaries `listofdict into a dictionary of dictionaries",
    "cmd": "dict((d['name'], d) for d in listofdict)",
    "question_id": "8303993-4",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict"
    ],
    "clean_cmd": "dict((d[], d)  d  listofdict)",
    "canonical_cmd": "dict((d['name'], d) for d in listofdict)"
  },
  {
    "nl": "sort a list `unsorted_list` based on another sorted list `presorted_list`",
    "cmd": "sorted(unsorted_list, key=presorted_list.index)",
    "question_id": "36518800-16",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted"
    ],
    "clean_cmd": "sorted(unsorted_list, key=presorted_list.index)",
    "canonical_cmd": "sorted(VAR_STR, key=VAR_STR.index)"
  },
  {
    "nl": "For each index `x` from 0 to 3, append the element at index `x` of list `b` to the list at index `x` of list a.",
    "cmd": "[a[x].append(b[x]) for x in range(3)]",
    "question_id": "18872717-86",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#range",
      "numpy.reference.generated.numpy.append"
    ],
    "clean_cmd": "[a[x].append(b[x])  x  range()]",
    "canonical_cmd": "[a[VAR_STR].append(VAR_STR[VAR_STR]) for VAR_STR in range(3)]"
  },
  {
    "nl": "convert dictionary `dict` into a flat list",
    "cmd": "print([y for x in list(dict.items()) for y in x])",
    "question_id": "11351874-64",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict.items",
      "python.library.functions#list"
    ],
    "clean_cmd": "print([y  x  list(dict.items())  y  x])",
    "canonical_cmd": "print([y for x in list(VAR_STR.items()) for y in x])"
  },
  {
    "nl": "Convert a dictionary `dict` into a list with key and values as list items.",
    "cmd": "[y for x in list(dict.items()) for y in x]",
    "question_id": "11351874-3",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict.items",
      "python.library.functions#list"
    ],
    "clean_cmd": "[y  x  list(dict.items())  y  x]",
    "canonical_cmd": "[y for x in list(VAR_STR.items()) for y in x]"
  },
  {
    "nl": "split a string 's' by space while ignoring spaces within square braces and quotes.",
    "cmd": "re.findall('\\\\[[^\\\\]]*\\\\]|\"[^\"]*\"|\\\\S+', s)",
    "question_id": "234512-44",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "re.findall(, s)",
    "canonical_cmd": "re.findall('\\\\[[^\\\\]]*\\\\]|\"[^\"]*\"|\\\\S+', VAR_STR)"
  },
  {
    "nl": "Sort dictionary `x` by value in ascending order",
    "cmd": "sorted(list(x.items()), key=operator.itemgetter(1))",
    "question_id": "613183-7",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.operator#operator.itemgetter",
      "python.library.functions#sorted",
      "python.library.functions#list",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "sorted(list(x.items()), key=operator.itemgetter())",
    "canonical_cmd": "sorted(list(VAR_STR.items()), key=operator.itemgetter(1))"
  },
  {
    "nl": "Sort dictionary `dict1` by value in ascending order",
    "cmd": "sorted(dict1, key=dict1.get)",
    "question_id": "613183-35",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted"
    ],
    "clean_cmd": "sorted(dict1, key=dict1.get)",
    "canonical_cmd": "sorted(VAR_STR, key=VAR_STR.get)"
  },
  {
    "nl": "Sort dictionary `d` by value in descending order",
    "cmd": "sorted(d, key=d.get, reverse=True)",
    "question_id": "613183-93",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted"
    ],
    "clean_cmd": "sorted(d, key=d.get, reverse=True)",
    "canonical_cmd": "sorted(VAR_STR, key=VAR_STR.get, reverse=True)"
  },
  {
    "nl": "Sort dictionary `d` by value in ascending order",
    "cmd": "sorted(list(d.items()), key=(lambda x: x[1]))",
    "question_id": "613183-66",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.functions#list",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "sorted(list(d.items()), key=( x: x[]))",
    "canonical_cmd": "sorted(list(VAR_STR.items()), key=lambda x: x[1])"
  },
  {
    "nl": "get the size of list `items`",
    "cmd": "len(items)",
    "question_id": "1712227-5",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len"
    ],
    "clean_cmd": "len(items)",
    "canonical_cmd": "len(VAR_STR)"
  },
  {
    "nl": "get the size of a list `[1,2,3]`",
    "cmd": "len([1, 2, 3])",
    "question_id": "1712227-52",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len"
    ],
    "clean_cmd": "len([, , ])",
    "canonical_cmd": "len([1, 2, 3])"
  },
  {
    "nl": "get the size of object `items`",
    "cmd": "items.__len__()",
    "question_id": "1712227-61",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.ndarray.__len__"
    ],
    "clean_cmd": "items.__len__()",
    "canonical_cmd": "VAR_STR.__len__()"
  },
  {
    "nl": "function to get the size of object",
    "cmd": "len()",
    "question_id": "1712227-3",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len"
    ],
    "clean_cmd": "len()",
    "canonical_cmd": "len()"
  },
  {
    "nl": "get the size of list `s`",
    "cmd": "len(s)",
    "question_id": "1712227-90",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len"
    ],
    "clean_cmd": "len(s)",
    "canonical_cmd": "len(VAR_STR)"
  },
  {
    "nl": "Fit Kmeans function to a one-dimensional array `x` by reshaping it to be a multidimensional array of single values",
    "cmd": "km.fit(x.reshape(-1, 1))",
    "question_id": "28416408-96",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.reshape",
      "pygame.ref.rect#pygame.Rect.fit"
    ],
    "clean_cmd": "km.fit(x.reshape(-, ))",
    "canonical_cmd": "km.fit(VAR_STR.reshape(-1, 1))"
  },
  {
    "nl": "django create a foreign key column `user` and link it to table 'User'",
    "cmd": "user = models.ForeignKey('User', unique=True)",
    "question_id": "19433630-36",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "user = models.ForeignKey(, unique=True)",
    "canonical_cmd": "VAR_STR = models.ForeignKey('VAR_STR', unique=True)"
  },
  {
    "nl": "write a list of strings `row` to csv object `csvwriter`",
    "cmd": "csvwriter.writerow(row)",
    "question_id": "6916542-23",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.csv#csv.csvwriter.writerow"
    ],
    "clean_cmd": "csvwriter.writerow(row)",
    "canonical_cmd": "VAR_STR.writerow(VAR_STR)"
  },
  {
    "nl": "pass dictionary items `data` as keyword arguments in function `my_function`",
    "cmd": "my_function(**data)",
    "question_id": "21986194-29",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "my_function(**data)",
    "canonical_cmd": "VAR_STR(**VAR_STR)"
  },
  {
    "nl": "merge the elements in a list `lst` sequentially",
    "cmd": "[''.join(seq) for seq in zip(lst, lst[1:])]",
    "question_id": "39646401-79",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": "[.join(seq)  seq  zip(lst, lst[:])]",
    "canonical_cmd": "[''.join(seq) for seq in zip(VAR_STR, VAR_STR[1:])]"
  },
  {
    "nl": "make matplotlib plot legend put marker in legend only once",
    "cmd": "legend(numpoints=1)",
    "question_id": "6146778-62",
    "cmd_name": "conala",
    "oracle_man": [
      "matplotlib.legend_api#matplotlib.legend.Legend"
    ],
    "clean_cmd": "legend(numpoints=)",
    "canonical_cmd": "legend(numpoints=1)"
  },
  {
    "nl": "switch keys and values in a dictionary `my_dict`",
    "cmd": "dict((v, k) for k, v in my_dict.items())",
    "question_id": "8305518-73",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "dict((v, k)  k, v  my_dict.items())",
    "canonical_cmd": "dict((v, k) for k, v in VAR_STR.items())"
  },
  {
    "nl": "regular expression for validating string 'user' containing a sequence of characters ending with '-' followed by any number of digits.",
    "cmd": "re.compile('{}-\\\\d*'.format(user))",
    "question_id": "5900683-7",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.compile",
      "python.library.functions#format"
    ],
    "clean_cmd": "re.compile(.format(user))",
    "canonical_cmd": "re.compile('{}-\\\\d*'.format(VAR_STR))"
  },
  {
    "nl": "Get a list comprehension in list of lists `X`",
    "cmd": "[[X[i][j] for j in range(len(X[i]))] for i in range(len(X))]",
    "question_id": "21360028-37",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len",
      "python.library.functions#range"
    ],
    "clean_cmd": "[[X[i][j]  j  range(len(X[i]))]  i  range(len(X))]",
    "canonical_cmd": "[[VAR_STR[i][j] for j in range(len(VAR_STR[i]))] for i in range(len(VAR_STR))]"
  },
  {
    "nl": "convert `ms` milliseconds to a datetime object",
    "cmd": "datetime.datetime.fromtimestamp(ms / 1000.0)",
    "question_id": "748491-31",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.datetime#datetime.datetime.fromtimestamp"
    ],
    "clean_cmd": "datetime.datetime.fromtimestamp(ms / 1000.0)",
    "canonical_cmd": "datetime.datetime.fromtimestamp(VAR_STR / 1000.0)"
  },
  {
    "nl": "change a string of integers `x` separated by spaces to a list of int",
    "cmd": "x = map(int, x.split())",
    "question_id": "19555472-57",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#map",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "x = map(int, x.split())",
    "canonical_cmd": "VAR_STR = map(int, VAR_STR.split())"
  },
  {
    "nl": "convert a string of integers `x` separated by spaces to a list of integers",
    "cmd": "x = [int(i) for i in x.split()]",
    "question_id": "19555472-58",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#int",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "x = [int(i)  i  x.split()]",
    "canonical_cmd": "VAR_STR = [int(i) for i in VAR_STR.split()]"
  },
  {
    "nl": "subprocess run command 'start command -flags arguments' through the shell",
    "cmd": "subprocess.call('start command -flags arguments', shell=True)",
    "question_id": "9554544-22",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.subprocess#subprocess.call"
    ],
    "clean_cmd": "subprocess.call(, shell=True)",
    "canonical_cmd": "subprocess.call('VAR_STR', shell=True)"
  },
  {
    "nl": "run command 'command -flags arguments &' on command line tools as separate processes",
    "cmd": "subprocess.call('command -flags arguments &', shell=True)",
    "question_id": "9554544-33",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.subprocess#subprocess.call"
    ],
    "clean_cmd": "subprocess.call(, shell=True)",
    "canonical_cmd": "subprocess.call('VAR_STR', shell=True)"
  },
  {
    "nl": "Selenium get the entire `driver` page text",
    "cmd": "driver.page_source",
    "question_id": "16114244-20",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "driver.page_source",
    "canonical_cmd": "VAR_STR.page_source"
  },
  {
    "nl": "regex matching 5-digit substrings not enclosed with digits in `s`",
    "cmd": "re.findall('(?<!\\\\d)\\\\d{5}(?!\\\\d)', s)",
    "question_id": "41807864-53",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "re.findall(, s)",
    "canonical_cmd": "re.findall('(?<!\\\\d)\\\\d{5}(?!\\\\d)', VAR_STR)"
  },
  {
    "nl": "convert a list of strings `['1', '-1', '1']` to a list of numbers",
    "cmd": "map(int, ['1', '-1', '1'])",
    "question_id": "5306079-82",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#map"
    ],
    "clean_cmd": "map(int, [, , ])",
    "canonical_cmd": "map(int, [VAR_STR])"
  },
  {
    "nl": "concatenate items from list `parts` into a string starting from the second element",
    "cmd": "\"\"\"\"\"\".join(parts[1:])",
    "question_id": "13655392-1",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": ".join(parts[:])",
    "canonical_cmd": "\"\"\"\"\"\".join(VAR_STR[1:])"
  },
  {
    "nl": "insert a character ',' into a string in front of '+' character in second part of the string",
    "cmd": "\"\"\",+\"\"\".join(c.rsplit('+', 1))",
    "question_id": "13655392-37",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.rsplit",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": ".join(c.rsplit(, ))",
    "canonical_cmd": "\"\"\",+\"\"\".join(c.rsplit('VAR_STR', 1))"
  },
  {
    "nl": "Use multiple groupby and agg operations `sum`, `count`, `std` for pandas data frame `df`",
    "cmd": "df.groupby(level=0).agg(['sum', 'count', 'std'])",
    "question_id": "39159475-83",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.groupby",
      "pandas.reference.api.pandas.dataframe.agg"
    ],
    "clean_cmd": "df.groupby(level=).agg([, , ])",
    "canonical_cmd": "VAR_STR.groupby(level=0).agg(['VAR_STR', 'VAR_STR', 'VAR_STR'])"
  },
  {
    "nl": "output data of the first 7 columns of Pandas dataframe",
    "cmd": "pandas.set_option('display.max_columns', 7)",
    "question_id": "11361985-41",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.set_option"
    ],
    "clean_cmd": "pandas.set_option(, )",
    "canonical_cmd": "pandas.set_option('display.max_columns', 7)"
  },
  {
    "nl": "Display maximum output data of columns in dataframe `pandas` that will fit into the screen",
    "cmd": "pandas.set_option('display.max_columns', None)",
    "question_id": "11361985-80",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.set_option"
    ],
    "clean_cmd": "pandas.set_option(, None)",
    "canonical_cmd": "VAR_STR.set_option('display.max_columns', None)"
  },
  {
    "nl": "sort list `the_list` by the length of string followed by alphabetical order",
    "cmd": "the_list.sort(key=lambda item: (-len(item), item))",
    "question_id": "4659524-76",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len",
      "python.library.stdtypes#list.sort"
    ],
    "clean_cmd": "the_list.sort(key= item: (-len(item), item))",
    "canonical_cmd": "VAR_STR.sort(key=lambda item: (-len(item), item))"
  },
  {
    "nl": "split a string `s` on last delimiter",
    "cmd": "s.rsplit(',', 1)",
    "question_id": "15012228-23",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.rsplit"
    ],
    "clean_cmd": "s.rsplit(, )",
    "canonical_cmd": "VAR_STR.rsplit(',', 1)"
  },
  {
    "nl": "convert list `a` from being consecutive sequences of tuples into a single sequence of elements",
    "cmd": "list(itertools.chain(*a))",
    "question_id": "15269161-89",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.itertools#itertools.chain",
      "python.library.functions#list"
    ],
    "clean_cmd": "list(itertools.chain(*a))",
    "canonical_cmd": "list(itertools.chain(*VAR_STR))"
  },
  {
    "nl": "Sum numbers in a list 'your_list'",
    "cmd": "sum(your_list)",
    "question_id": "11344827-83",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sum"
    ],
    "clean_cmd": "sum(your_list)",
    "canonical_cmd": "sum(VAR_STR)"
  },
  {
    "nl": "convert a flat list into a list of tuples of every two items in the list, in order",
    "cmd": "print(zip(my_list[0::2], my_list[1::2]))",
    "question_id": "15852295-80",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip"
    ],
    "clean_cmd": "print(zip(my_list[::], my_list[::]))",
    "canonical_cmd": "print(zip(my_list[0::2], my_list[1::2]))"
  },
  {
    "nl": "group a list of ints into a list of tuples of each 2 elements",
    "cmd": "my_new_list = zip(my_list[0::2], my_list[1::2])",
    "question_id": "15852295-94",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip"
    ],
    "clean_cmd": "my_new_list = zip(my_list[::], my_list[::])",
    "canonical_cmd": "my_new_list = zip(my_list[0::2], my_list[1::2])"
  },
  {
    "nl": "Slice `url` with '&' as delimiter  to get  \"http://www.domainname.com/page?CONTENT_ITEM_ID=1234\" from url \"http://www.domainname.com/page?CONTENT_ITEM_ID=1234&param2&param3\r\n\"",
    "cmd": "url.split('&')",
    "question_id": "258746-77",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "url.split()",
    "canonical_cmd": "VAR_STR.split('VAR_STR')"
  },
  {
    "nl": "convert python dictionary `your_data` to json array",
    "cmd": "json.dumps(your_data, ensure_ascii=False)",
    "question_id": "14661051-63",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.json#json.dumps"
    ],
    "clean_cmd": "json.dumps(your_data, ensure_ascii=False)",
    "canonical_cmd": "json.dumps(VAR_STR, ensure_ascii=False)"
  },
  {
    "nl": "retrieve arabic texts from string `my_string`",
    "cmd": "print(re.findall('[\\\\u0600-\\\\u06FF]+', my_string))",
    "question_id": "36661837-2",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "print(re.findall(, my_string))",
    "canonical_cmd": "print(re.findall('[\\\\u0600-\\\\u06FF]+', VAR_STR))"
  },
  {
    "nl": "sort list `L` based on the value of variable 'resultType' for each object in list `L`",
    "cmd": "sorted(L, key=operator.itemgetter('resultType'))",
    "question_id": "2338531-54",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.operator#operator.itemgetter",
      "python.library.functions#sorted"
    ],
    "clean_cmd": "sorted(L, key=operator.itemgetter())",
    "canonical_cmd": "sorted(VAR_STR, key=operator.itemgetter('VAR_STR'))"
  },
  {
    "nl": "sort a list of objects `s` by a member variable 'resultType'",
    "cmd": "s.sort(key=operator.attrgetter('resultType'))",
    "question_id": "2338531-20",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.operator#operator.attrgetter",
      "python.library.stdtypes#list.sort"
    ],
    "clean_cmd": "s.sort(key=operator.attrgetter())",
    "canonical_cmd": "VAR_STR.sort(key=operator.attrgetter('VAR_STR'))"
  },
  {
    "nl": "sort a list of objects 'somelist' where the object has member number variable `resultType`",
    "cmd": "somelist.sort(key=lambda x: x.resultType)",
    "question_id": "2338531-55",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#list.sort"
    ],
    "clean_cmd": "somelist.sort(key= x: x.resultType)",
    "canonical_cmd": "VAR_STR.sort(key=lambda x: x.VAR_STR)"
  },
  {
    "nl": "parse a YAML file \"example.yaml\"",
    "cmd": "with open('example.yaml', 'r') as stream:\n    try:\n        print((yaml.load(stream)))\n    except yaml.YAMLError as exc:\n        print(exc)",
    "question_id": "1773805-82",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.request#open",
      "python.library.json#json.load"
    ],
    "clean_cmd": " open(, )  stream::print((yaml.load(stream))) yaml.YAMLError  exc:print(exc)",
    "canonical_cmd": "with open('VAR_STR', 'r') as stream:\n    try:\n        print(yaml.load(stream))\n    except yaml.YAMLError as exc:\n        print(exc)"
  },
  {
    "nl": "parse a YAML file \"example.yaml\"",
    "cmd": "with open('example.yaml') as stream:\n    try:\n        print((yaml.load(stream)))\n    except yaml.YAMLError as exc:\n        print(exc)",
    "question_id": "1773805-43",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.request#open",
      "python.library.json#json.load"
    ],
    "clean_cmd": " open()  stream::print((yaml.load(stream))) yaml.YAMLError  exc:print(exc)",
    "canonical_cmd": "with open('VAR_STR') as stream:\n    try:\n        print(yaml.load(stream))\n    except yaml.YAMLError as exc:\n        print(exc)"
  },
  {
    "nl": "split string `s` into float values and write sum to `total`",
    "cmd": "total = sum(float(item) for item in s.split(','))",
    "question_id": "21212706-79",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#float",
      "python.library.functions#sum",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "total = sum(float(item)  item  s.split())",
    "canonical_cmd": "VAR_STR = sum(float(item) for item in VAR_STR.split(','))"
  },
  {
    "nl": "substitute occurrences of unicode regex pattern u'\\\\p{P}+' with empty string '' in string `text`",
    "cmd": "return re.sub('\\\\p{P}+', '', text)",
    "question_id": "11066400-12",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": " re.sub(, , text)",
    "canonical_cmd": "return re.sub('VAR_STR', 'VAR_STR', VAR_STR)"
  },
  {
    "nl": "get the absolute path of a running python script",
    "cmd": "os.path.abspath(__file__)",
    "question_id": "3283306-10",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os.path#os.path.abspath"
    ],
    "clean_cmd": "os.path.abspath(__file__)",
    "canonical_cmd": "os.path.abspath(__file__)"
  },
  {
    "nl": "access value associated with key 'American' of key 'Apple' from dictionary `dict`",
    "cmd": "dict['Apple']['American']",
    "question_id": "5404665-73",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "dict[][]",
    "canonical_cmd": "VAR_STR['VAR_STR']['VAR_STR']"
  },
  {
    "nl": "Python date string formatting",
    "cmd": "\"\"\"{0.month}/{0.day}/{0.year}\"\"\".format(my_date)",
    "question_id": "18724607-93",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#format"
    ],
    "clean_cmd": ".format(my_date)",
    "canonical_cmd": "\"\"\"{0.month}/{0.day}/{0.year}\"\"\".format(my_date)"
  },
  {
    "nl": "find rows with non zero values in a subset of columns where `df.dtypes` is not equal to `object` in pandas dataframe",
    "cmd": "df.loc[(df.loc[:, (df.dtypes != object)] != 0).any(1)]",
    "question_id": "39187788-53",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.loc",
      "python.library.functions#any"
    ],
    "clean_cmd": "df.loc[(df.loc[:, (df.dtypes != object)] != ).any()]",
    "canonical_cmd": "df.loc[(df.loc[:, (df.dtypes != VAR_STR)] != 0).any(1)]"
  },
  {
    "nl": "check if all elements in a tuple `(1, 6)` are in another `(1, 2, 3, 4, 5)`",
    "cmd": "all(i in (1, 2, 3, 4, 5) for i in (1, 6))",
    "question_id": "34468983-72",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#all"
    ],
    "clean_cmd": "all(i  (, , , , )  i  (, ))",
    "canonical_cmd": "all(i in (VAR_STR) for i in (VAR_STR))"
  },
  {
    "nl": "Get a list `C` by subtracting values in one list `B` from corresponding values in another list `A`",
    "cmd": "C = [(a - b) for a, b in zip(A, B)]",
    "question_id": "11677860-42",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip"
    ],
    "clean_cmd": "C = [(a - b)  a, b  zip(A, B)]",
    "canonical_cmd": "VAR_STR = [(a - b) for a, b in zip(VAR_STR, VAR_STR)]"
  },
  {
    "nl": "pull a value with key 'name' from a json object `item`",
    "cmd": "print(item['name'])",
    "question_id": "39605640-67",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "print(item[])",
    "canonical_cmd": "print(VAR_STR['VAR_STR'])"
  },
  {
    "nl": "removing vowel characters 'aeiouAEIOU' from string `text`",
    "cmd": "\"\"\"\"\"\".join(c for c in text if c not in 'aeiouAEIOU')",
    "question_id": "41083229-90",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": ".join(c  c  text  c   )",
    "canonical_cmd": "\"\"\"\"\"\".join(c for c in VAR_STR if c not in 'VAR_STR')"
  },
  {
    "nl": "get last element of string splitted by '\\\\' from list of strings `list_dirs`",
    "cmd": "[l.split('\\\\')[-1] for l in list_dirs]",
    "question_id": "28657018-77",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "[l.split()[-]  l  list_dirs]",
    "canonical_cmd": "[l.split('VAR_STR')[-1] for l in VAR_STR]"
  },
  {
    "nl": "check if string `a` is an integer",
    "cmd": "a.isdigit()",
    "question_id": "354038-4",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.isdigit"
    ],
    "clean_cmd": "a.isdigit()",
    "canonical_cmd": "VAR_STR.isdigit()"
  },
  {
    "nl": "function to check if a string is a number",
    "cmd": "isdigit()",
    "question_id": "354038-84",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.isdigit"
    ],
    "clean_cmd": "isdigit()",
    "canonical_cmd": "isdigit()"
  },
  {
    "nl": "check if string `b` is a number",
    "cmd": "b.isdigit()",
    "question_id": "354038-52",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.isdigit"
    ],
    "clean_cmd": "b.isdigit()",
    "canonical_cmd": "VAR_STR.isdigit()"
  },
  {
    "nl": "reverse a UTF-8 string 'a'",
    "cmd": "b = a.decode('utf8')[::-1].encode('utf8')",
    "question_id": "34015615-56",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.encode",
      "python.library.stdtypes#bytearray.decode"
    ],
    "clean_cmd": "b = a.decode()[::-].encode()",
    "canonical_cmd": "b = VAR_STR.decode('utf8')[::-1].encode('utf8')"
  },
  {
    "nl": "find all occurrences of regex pattern '(?:\\\\w+(?:\\\\s+\\\\w+)*,\\\\s)+(?:\\\\w+(?:\\\\s\\\\w+)*)' in string `x`",
    "cmd": "re.findall('(?:\\\\w+(?:\\\\s+\\\\w+)*,\\\\s)+(?:\\\\w+(?:\\\\s\\\\w+)*)', x)",
    "question_id": "20778951-67",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "re.findall(, x)",
    "canonical_cmd": "re.findall('VAR_STR', VAR_STR)"
  },
  {
    "nl": "Return rows of data associated with the maximum value of column 'Value' in dataframe `df`",
    "cmd": "df.loc[df['Value'].idxmax()]",
    "question_id": "15741759-23",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.loc",
      "pandas.reference.api.pandas.dataframe.idxmax"
    ],
    "clean_cmd": "df.loc[df[].idxmax()]",
    "canonical_cmd": "VAR_STR.loc[VAR_STR['VAR_STR'].idxmax()]"
  },
  {
    "nl": "Convert a datetime object `my_datetime` into readable format `%B %d, %Y`",
    "cmd": "my_datetime.strftime('%B %d, %Y')",
    "question_id": "2158347-51",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.time#time.strftime"
    ],
    "clean_cmd": "my_datetime.strftime()",
    "canonical_cmd": "VAR_STR.strftime('VAR_STR')"
  },
  {
    "nl": "Sort items in dictionary `d`  using the first part of the key after splitting the key",
    "cmd": "sorted(list(d.items()), key=lambda name_num: (name_num[0].rsplit(None, 1)[0], name_num[1]))",
    "question_id": "15795525-71",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.functions#list",
      "python.library.stdtypes#str.rsplit",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "sorted(list(d.items()), key= name_num: (name_num[].rsplit(None, )[], name_num[]))",
    "canonical_cmd": "sorted(list(VAR_STR.items()), key=lambda name_num: (name_num[0].rsplit(None, \n    1)[0], name_num[1]))"
  },
  {
    "nl": "Execute a put request to the url `url`",
    "cmd": "response = requests.put(url, data=json.dumps(data), headers=headers)",
    "question_id": "33127636-66",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.json#json.dumps",
      "numpy.reference.generated.numpy.put"
    ],
    "clean_cmd": "response = requests.put(url, data=json.dumps(data), headers=headers)",
    "canonical_cmd": "response = requests.put(VAR_STR, data=json.dumps(data), headers=headers)"
  },
  {
    "nl": "plot a data logarithmically in y axis",
    "cmd": "plt.yscale('log', nonposy='clip')",
    "question_id": "17952279-42",
    "cmd_name": "conala",
    "oracle_man": [
      "matplotlib._as_gen.matplotlib.pyplot.yscale"
    ],
    "clean_cmd": "plt.yscale(, nonposy=)",
    "canonical_cmd": "plt.yscale('log', nonposy='clip')"
  },
  {
    "nl": "build a dictionary containing the conversion of each list in list `[['two', 2], ['one', 1]]` to a key/value pair as its items",
    "cmd": "dict([['two', 2], ['one', 1]])",
    "question_id": "6900955-83",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict"
    ],
    "clean_cmd": "dict([[, ], [, ]])",
    "canonical_cmd": "dict([VAR_STR])"
  },
  {
    "nl": "convert list `l` to dictionary having each two adjacent elements as key/value pair",
    "cmd": "dict(zip(l[::2], l[1::2]))",
    "question_id": "6900955-75",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip",
      "python.library.stdtypes#dict"
    ],
    "clean_cmd": "dict(zip(l[::], l[::]))",
    "canonical_cmd": "dict(zip(VAR_STR[::2], VAR_STR[1::2]))"
  },
  {
    "nl": "create list `c` containing items from list `b` whose index is in list `index`",
    "cmd": "c = [b[i] for i in index]",
    "question_id": "12768504-68",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "c = [b[i]  i  index]",
    "canonical_cmd": "VAR_STR = [VAR_STR[i] for i in VAR_STR]"
  },
  {
    "nl": "get geys of dictionary `my_dict` that contain any values from list `lst`",
    "cmd": "[key for key, value in list(my_dict.items()) if set(value).intersection(lst)]",
    "question_id": "32792874-24",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list",
      "python.library.stdtypes#set",
      "python.library.stdtypes#frozenset.intersection",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "[key  key, value  list(my_dict.items())  set(value).intersection(lst)]",
    "canonical_cmd": "[key for key, value in list(VAR_STR.items()) if set(value).intersection(VAR_STR)]"
  },
  {
    "nl": "get list of keys in dictionary `my_dict` whose values contain values from list `lst`",
    "cmd": "[key for item in lst for key, value in list(my_dict.items()) if item in value]",
    "question_id": "32792874-83",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "[key  item  lst  key, value  list(my_dict.items())  item  value]",
    "canonical_cmd": "[key for item in VAR_STR for key, value in list(VAR_STR.items()) if item in value]"
  },
  {
    "nl": "get first non-null value per each row from dataframe `df`",
    "cmd": "df.stack().groupby(level=0).first()",
    "question_id": "31828240-66",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.stack",
      "pandas.reference.api.pandas.dataframe.first",
      "pandas.reference.api.pandas.dataframe.groupby"
    ],
    "clean_cmd": "df.stack().groupby(level=).first()",
    "canonical_cmd": "VAR_STR.stack().groupby(level=0).first()"
  },
  {
    "nl": "Update row values for a column `Season` using vectorized string operation in pandas",
    "cmd": "df['Season'].str.split('-').str[0].astype(int)",
    "question_id": "25292838-12",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.split",
      "pandas.reference.api.pandas.series.astype"
    ],
    "clean_cmd": "df[].str.split().str[].astype(int)",
    "canonical_cmd": "df['VAR_STR'].str.split('-').str[0].astype(int)"
  },
  {
    "nl": "find all digits in string '6,7)' and put them to a list",
    "cmd": "re.findall('\\\\d|\\\\d,\\\\d\\\\)', '6,7)')",
    "question_id": "31465002-50",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "re.findall(, )",
    "canonical_cmd": "re.findall('\\\\d|\\\\d,\\\\d\\\\)', 'VAR_STR')"
  },
  {
    "nl": "check if string `foo` is UTF-8 encoded",
    "cmd": "foo.decode('utf8').encode('utf8')",
    "question_id": "3487377-65",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.encode",
      "python.library.stdtypes#bytearray.decode"
    ],
    "clean_cmd": "foo.decode().encode()",
    "canonical_cmd": "VAR_STR.decode('utf8').encode('utf8')"
  },
  {
    "nl": "check if  dictionary `d` contains all keys in list `['somekey', 'someotherkey', 'somekeyggg']`",
    "cmd": "all(word in d for word in ['somekey', 'someotherkey', 'somekeyggg'])",
    "question_id": "7128153-81",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#all"
    ],
    "clean_cmd": "all(word  d  word  [, , ])",
    "canonical_cmd": "all(word in VAR_STR for word in [VAR_STR])"
  },
  {
    "nl": "Get only digits from a string `strs`",
    "cmd": "\"\"\"\"\"\".join([c for c in strs if c.isdigit()])",
    "question_id": "18116235-9",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.isdigit",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": ".join([c  c  strs  c.isdigit()])",
    "canonical_cmd": "\"\"\"\"\"\".join([c for c in VAR_STR if c.isdigit()])"
  },
  {
    "nl": "sort dictionary `tag_weight` in reverse order by values cast to integers",
    "cmd": "sorted(list(tag_weight.items()), key=lambda x: int(x[1]), reverse=True)",
    "question_id": "40208429-82",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.functions#int",
      "python.library.functions#list",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "sorted(list(tag_weight.items()), key= x: int(x[]), reverse=True)",
    "canonical_cmd": "sorted(list(VAR_STR.items()), key=lambda x: int(x[1]), reverse=True)"
  },
  {
    "nl": "sort list `mylist` of tuples by arbitrary key from list `order`",
    "cmd": "sorted(mylist, key=lambda x: order.index(x[1]))",
    "question_id": "13408919-82",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.stdtypes#str.index"
    ],
    "clean_cmd": "sorted(mylist, key= x: order.index(x[]))",
    "canonical_cmd": "sorted(VAR_STR, key=lambda x: VAR_STR.index(x[1]))"
  },
  {
    "nl": "multiply values of dictionary `dict` with their respective values in dictionary `dict2`",
    "cmd": "dict((k, v * dict2[k]) for k, v in list(dict1.items()) if k in dict2)",
    "question_id": "15334783-78",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict",
      "python.library.functions#list",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "dict((k, v * dict2[k])  k, v  list(dict1.items())  k  dict2)",
    "canonical_cmd": "VAR_STR((k, v * VAR_STR[k]) for k, v in list(dict1.items()) if k in VAR_STR)"
  },
  {
    "nl": "insert directory 'libs' at the 0th index of current directory",
    "cmd": "sys.path.insert(0, 'libs')",
    "question_id": "14850853-99",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.insert"
    ],
    "clean_cmd": "sys.path.insert(, )",
    "canonical_cmd": "sys.path.insert(0, 'VAR_STR')"
  },
  {
    "nl": "download to a directory '/path/to/dir/filename.ext' from source 'http://example.com/file.ext'",
    "cmd": "urllib.request.urlretrieve('http://example.com/file.ext', '/path/to/dir/filename.ext')",
    "question_id": "21018612-78",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.request#urllib.request.urlretrieve"
    ],
    "clean_cmd": "urllib.request.urlretrieve(, )",
    "canonical_cmd": "urllib.request.urlretrieve('VAR_STR', 'VAR_STR')"
  },
  {
    "nl": "scroll to the bottom of a web page using selenium webdriver",
    "cmd": "driver.execute_script('window.scrollTo(0, Y)')",
    "question_id": "20986631-1",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "driver.execute_script()",
    "canonical_cmd": "driver.execute_script('window.scrollTo(0, Y)')"
  },
  {
    "nl": "scroll a to the bottom of a web page using selenium webdriver",
    "cmd": "driver.execute_script('window.scrollTo(0, document.body.scrollHeight);')",
    "question_id": "20986631-45",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "driver.execute_script()",
    "canonical_cmd": "driver.execute_script('window.scrollTo(0, document.body.scrollHeight);')"
  },
  {
    "nl": "remove tags from a string `mystring`",
    "cmd": "re.sub('<[^>]*>', '', mystring)",
    "question_id": "3662142-4",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "re.sub(, , mystring)",
    "canonical_cmd": "re.sub('<[^>]*>', '', VAR_STR)"
  },
  {
    "nl": "append array of strings `['x', 'x', 'x']` into one string",
    "cmd": "\"\"\"\"\"\".join(['x', 'x', 'x'])",
    "question_id": "25678689-75",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": ".join([, , ])",
    "canonical_cmd": "\"\"\"\"\"\".join([VAR_STR])"
  },
  {
    "nl": "Find all the items from a dictionary `D` if the key contains the string `Light`",
    "cmd": "[(k, v) for k, v in D.items() if 'Light' in k]",
    "question_id": "3252590-78",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "[(k, v)  k, v  D.items()    k]",
    "canonical_cmd": "[(k, v) for k, v in VAR_STR.items() if 'VAR_STR' in k]"
  },
  {
    "nl": "create a list `result` containing elements form list `list_a` if first element of list `list_a` is in list `list_b`",
    "cmd": "result = [x for x in list_a if x[0] in list_b]",
    "question_id": "18448469-27",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "result = [x  x  list_a  x[]  list_b]",
    "canonical_cmd": "VAR_STR = [x for x in VAR_STR if x[0] in VAR_STR]"
  },
  {
    "nl": "transforming the string `s` into dictionary",
    "cmd": "dict(map(int, x.split(':')) for x in s.split(','))",
    "question_id": "4127344-57",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#map",
      "python.library.stdtypes#dict",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "dict(map(int, x.split())  x  s.split())",
    "canonical_cmd": "dict(map(int, x.split(':')) for x in VAR_STR.split(','))"
  },
  {
    "nl": "get output of script `proc`",
    "cmd": "print(proc.communicate()[0])",
    "question_id": "6086047-89",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.subprocess#subprocess.Popen.communicate"
    ],
    "clean_cmd": "print(proc.communicate()[])",
    "canonical_cmd": "print(VAR_STR.communicate()[0])"
  },
  {
    "nl": "set UTC offset by 9 hrs ahead for date '2013/09/11 00:17'",
    "cmd": "dateutil.parser.parse('2013/09/11 00:17 +0900')",
    "question_id": "18722196-70",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.email.parser#email.parser.Parser.parse"
    ],
    "clean_cmd": "dateutil.parser.parse()",
    "canonical_cmd": "dateutil.parser.parse('2013/09/11 00:17 +0900')"
  },
  {
    "nl": "Return a subplot axes positioned by the grid definition `1,1,1` using matpotlib",
    "cmd": "fig.add_subplot(1, 1, 1)",
    "question_id": "1358977-73",
    "cmd_name": "conala",
    "oracle_man": [
      "matplotlib.figure_api#matplotlib.figure.FigureBase.add_subplot"
    ],
    "clean_cmd": "fig.add_subplot(, , )",
    "canonical_cmd": "fig.add_subplot(1, 1, 1)"
  },
  {
    "nl": "replace nan values in a pandas data frame with the average of columns",
    "cmd": "df.apply(lambda x: x.fillna(x.mean()), axis=0)",
    "question_id": "18689823-5",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.apply",
      "pandas.reference.api.pandas.dataframe.fillna",
      "pandas.reference.api.pandas.dataframe.mean"
    ],
    "clean_cmd": "df.apply( x: x.fillna(x.mean()), axis=)",
    "canonical_cmd": "df.apply(lambda x: x.fillna(x.mean()), axis=0)"
  },
  {
    "nl": "Creating an empty list `l`",
    "cmd": "l = []",
    "question_id": "2972212-34",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "l = []",
    "canonical_cmd": "VAR_STR = []"
  },
  {
    "nl": "Creating an empty list `l`",
    "cmd": "l = list()",
    "question_id": "2972212-64",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list"
    ],
    "clean_cmd": "l = list()",
    "canonical_cmd": "VAR_STR = list()"
  },
  {
    "nl": "Creating an empty list",
    "cmd": "list()",
    "question_id": "2972212-86",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list"
    ],
    "clean_cmd": "list()",
    "canonical_cmd": "list()"
  },
  {
    "nl": "Creating an empty list",
    "cmd": "[]",
    "question_id": "2972212-96",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[]",
    "canonical_cmd": "[]"
  },
  {
    "nl": "sort dictionary of dictionaries `dic` according to the key 'Fisher'",
    "cmd": "sorted(list(dic.items()), key=lambda x: x[1]['Fisher'], reverse=True)",
    "question_id": "16412563-51",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.functions#list",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "sorted(list(dic.items()), key= x: x[][], reverse=True)",
    "canonical_cmd": "sorted(list(VAR_STR.items()), key=lambda x: x[1]['VAR_STR'], reverse=True)"
  },
  {
    "nl": "check if the string `myString` is empty",
    "cmd": "if (not myString):\n    pass",
    "question_id": "9573244-39",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": " ( myString):",
    "canonical_cmd": "if not VAR_STR:\n    pass"
  },
  {
    "nl": "check if string `some_string` is empty",
    "cmd": "if (not some_string):\n    pass",
    "question_id": "9573244-74",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": " ( some_string):",
    "canonical_cmd": "if not VAR_STR:\n    pass"
  },
  {
    "nl": "check if string `my_string` is empty",
    "cmd": "if (not my_string):\n    pass",
    "question_id": "9573244-79",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": " ( my_string):",
    "canonical_cmd": "if not VAR_STR:\n    pass"
  },
  {
    "nl": "check if string `my_string` is empty",
    "cmd": "if some_string:\n    pass",
    "question_id": "9573244-4",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": " some_string:",
    "canonical_cmd": "if some_string:\n    pass"
  },
  {
    "nl": "set font size of axis legend of plot `plt` to 'xx-small'",
    "cmd": "plt.setp(legend.get_title(), fontsize='xx-small')",
    "question_id": "12402561-84",
    "cmd_name": "conala",
    "oracle_man": [
      "matplotlib.legend_api#matplotlib.legend.Legend.get_title",
      "matplotlib._as_gen.matplotlib.artist.setp"
    ],
    "clean_cmd": "plt.setp(legend.get_title(), fontsize=)",
    "canonical_cmd": "VAR_STR.setp(legend.get_title(), fontsize='VAR_STR')"
  },
  {
    "nl": "Get a list of  words from a string `Hello world, my name is...James the 2nd!` removing punctuation",
    "cmd": "re.compile('\\\\w+').findall('Hello world, my name is...James the 2nd!')",
    "question_id": "7633274-73",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.compile",
      "python.library.re#re.findall"
    ],
    "clean_cmd": "re.compile().findall()",
    "canonical_cmd": "re.compile('\\\\w+').findall('VAR_STR')"
  },
  {
    "nl": "list all files in a current directory",
    "cmd": "glob.glob('*')",
    "question_id": "22625616-82",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "glob.glob()",
    "canonical_cmd": "glob.glob('*')"
  },
  {
    "nl": "List all the files that doesn't contain the name `hello`",
    "cmd": "glob.glob('[!hello]*.txt')",
    "question_id": "22625616-43",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "glob.glob()",
    "canonical_cmd": "glob.glob('[!hello]*.txt')"
  },
  {
    "nl": "List all the files that matches the pattern `hello*.txt`",
    "cmd": "glob.glob('hello*.txt')",
    "question_id": "22625616-69",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "glob.glob()",
    "canonical_cmd": "glob.glob('VAR_STR')"
  },
  {
    "nl": "upload binary file `myfile.txt` with ftplib",
    "cmd": "ftp.storbinary('STOR myfile.txt', open('myfile.txt', 'rb'))",
    "question_id": "2911754-43",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.request#open",
      "python.library.ftplib#ftplib.FTP.storbinary"
    ],
    "clean_cmd": "ftp.storbinary(, open(, ))",
    "canonical_cmd": "ftp.storbinary('STOR myfile.txt', open('VAR_STR', 'rb'))"
  },
  {
    "nl": "convert and escape string \"\\\\xc3\\\\x85\u3042\" to UTF-8 code",
    "cmd": "\"\"\"\\\\xc3\\\\x85\u3042\"\"\".encode('utf-8').decode('unicode_escape')",
    "question_id": "41552839-94",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.encode",
      "python.library.stdtypes#bytearray.decode"
    ],
    "clean_cmd": ".encode().decode()",
    "canonical_cmd": "\"\"\"\\\\xc3\\\\x85\u3042\"\"\".encode('utf-8').decode('unicode_escape')"
  },
  {
    "nl": "encode string \"\\\\xc3\\\\x85\u3042\" to bytes",
    "cmd": "\"\"\"\\\\xc3\\\\x85\u3042\"\"\".encode('utf-8')",
    "question_id": "41552839-27",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.encode"
    ],
    "clean_cmd": ".encode()",
    "canonical_cmd": "\"\"\"\\\\xc3\\\\x85\u3042\"\"\".encode('utf-8')"
  },
  {
    "nl": "combine  dataframe `df1` and dataframe `df2` by index number",
    "cmd": "pd.merge(df1, df2, left_index=True, right_index=True, how='outer')",
    "question_id": "28773683-11",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.merge"
    ],
    "clean_cmd": "pd.merge(df1, df2, left_index=True, right_index=True, how=)",
    "canonical_cmd": "pd.merge(VAR_STR, VAR_STR, left_index=True, right_index=True, how='outer')"
  },
  {
    "nl": "Combine two Pandas dataframes with the same index",
    "cmd": "pandas.concat([df1, df2], axis=1)",
    "question_id": "28773683-66",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.concat"
    ],
    "clean_cmd": "pandas.concat([df1, df2], axis=)",
    "canonical_cmd": "pandas.concat([df1, df2], axis=1)"
  },
  {
    "nl": "replace parentheses and all data within it with empty string '' in column 'name' of dataframe `df`",
    "cmd": "df['name'].str.replace('\\\\(.*\\\\)', '')",
    "question_id": "20894525-98",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.replace"
    ],
    "clean_cmd": "df[].str.replace(, )",
    "canonical_cmd": "VAR_STR['VAR_STR'].str.replace('\\\\(.*\\\\)', 'VAR_STR')"
  },
  {
    "nl": "delete items from list `my_list` if the item exist in list `to_dell`",
    "cmd": "my_list = [[x for x in sublist if x not in to_del] for sublist in my_list]",
    "question_id": "41313232-31",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "my_list = [[x  x  sublist  x   to_del]  sublist  my_list]",
    "canonical_cmd": "VAR_STR = [[x for x in sublist if x not in to_del] for sublist in VAR_STR]"
  },
  {
    "nl": "invoke perl script './uireplace.pl' using perl interpeter '/usr/bin/perl' and send argument `var` to it",
    "cmd": "subprocess.call(['/usr/bin/perl', './uireplace.pl', var])",
    "question_id": "4682088-12",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.subprocess#subprocess.call"
    ],
    "clean_cmd": "subprocess.call([, , var])",
    "canonical_cmd": "subprocess.call(['VAR_STR', 'VAR_STR', VAR_STR])"
  },
  {
    "nl": "filter lines from a text file 'textfile' which contain a word 'apple'",
    "cmd": "[line for line in open('textfile') if 'apple' in line]",
    "question_id": "5245058-99",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.request#open"
    ],
    "clean_cmd": "[line  line  open()    line]",
    "canonical_cmd": "[line for line in open('VAR_STR') if 'VAR_STR' in line]"
  },
  {
    "nl": "Check if the value of the key \"name\" is \"Test\" in a list of dictionaries `label`",
    "cmd": "any(d['name'] == 'Test' for d in label)",
    "question_id": "17149561-2",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#any"
    ],
    "clean_cmd": "any(d[] ==   d  label)",
    "canonical_cmd": "any(d['VAR_STR'] == 'VAR_STR' for d in VAR_STR)"
  },
  {
    "nl": "get value in string `line` matched by regex pattern '\\\\bLOG_ADDR\\\\s+(\\\\S+)'",
    "cmd": "print(re.search('\\\\bLOG_ADDR\\\\s+(\\\\S+)', line).group(1))",
    "question_id": "32950347-8",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.search",
      "python.library.re#re.Match.group"
    ],
    "clean_cmd": "print(re.search(, line).group())",
    "canonical_cmd": "print(re.search('VAR_STR', VAR_STR).group(1))"
  },
  {
    "nl": "remove all null values from columns 'three', 'four' and 'five' of dataframe `df2`",
    "cmd": "df2.dropna(subset=['three', 'four', 'five'], how='all')",
    "question_id": "14991195-10",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.dropna"
    ],
    "clean_cmd": "df2.dropna(subset=[, , ], how=)",
    "canonical_cmd": "VAR_STR.dropna(subset=['VAR_STR', 'VAR_STR', 'VAR_STR'], how='all')"
  },
  {
    "nl": "insert variables `(var1, var2, var3)` into sql statement 'INSERT INTO table VALUES (?, ?, ?)'",
    "cmd": "cursor.execute('INSERT INTO table VALUES (?, ?, ?)', (var1, var2, var3))",
    "question_id": "902408-3",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.sqlite3#sqlite3.Cursor.execute"
    ],
    "clean_cmd": "cursor.execute(, (var1, var2, var3))",
    "canonical_cmd": "cursor.execute('VAR_STR', (VAR_STR))"
  },
  {
    "nl": "Execute a sql statement using variables `var1`, `var2` and `var3`",
    "cmd": "cursor.execute('INSERT INTO table VALUES (%s, %s, %s)', (var1, var2, var3))",
    "question_id": "902408-38",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.sqlite3#sqlite3.Cursor.execute"
    ],
    "clean_cmd": "cursor.execute(, (var1, var2, var3))",
    "canonical_cmd": "cursor.execute('INSERT INTO table VALUES (%s, %s, %s)', (VAR_STR, VAR_STR, VAR_STR))"
  },
  {
    "nl": "How to use variables in SQL statement in Python?",
    "cmd": "cursor.execute('INSERT INTO table VALUES (%s, %s, %s)', (var1, var2, var3))",
    "question_id": "902408-71",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.sqlite3#sqlite3.Cursor.execute"
    ],
    "clean_cmd": "cursor.execute(, (var1, var2, var3))",
    "canonical_cmd": "cursor.execute('INSERT INTO table VALUES (%s, %s, %s)', (var1, var2, var3))"
  },
  {
    "nl": "Selenium `driver` click a hyperlink with the pattern \"a[href^='javascript']\"",
    "cmd": "driver.find_element_by_css_selector(\"a[href^='javascript']\").click()",
    "question_id": "23931444-78",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "driver.find_element_by_css_selector().click()",
    "canonical_cmd": "VAR_STR.find_element_by_css_selector('VAR_STR').click()"
  },
  {
    "nl": "Print string `t` with proper unicode representations",
    "cmd": "print(t.decode('unicode_escape'))",
    "question_id": "13793973-44",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#bytearray.decode"
    ],
    "clean_cmd": "print(t.decode())",
    "canonical_cmd": "print(VAR_STR.decode('unicode_escape'))"
  },
  {
    "nl": "convert list `lst` of key, value pairs into a dictionary",
    "cmd": "dict([(e[0], int(e[1])) for e in lst])",
    "question_id": "33824334-79",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#int",
      "python.library.stdtypes#dict"
    ],
    "clean_cmd": "dict([(e[], int(e[]))  e  lst])",
    "canonical_cmd": "dict([(e[0], int(e[1])) for e in VAR_STR])"
  },
  {
    "nl": "Print a string `card` with string formatting",
    "cmd": "print('I have: {0.price}'.format(card))",
    "question_id": "14041791-87",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#format"
    ],
    "clean_cmd": "print(.format(card))",
    "canonical_cmd": "print('I have: {0.price}'.format(VAR_STR))"
  },
  {
    "nl": "None",
    "cmd": "datetime.datetime.strptime('2007-03-04T21:08:12', '%Y-%m-%dT%H:%M:%S')",
    "question_id": "969285-80",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.datetime#datetime.datetime.strptime"
    ],
    "clean_cmd": "datetime.datetime.strptime(, )",
    "canonical_cmd": "datetime.datetime.strptime('2007-03-04T21:08:12', '%Y-%m-%dT%H:%M:%S')"
  },
  {
    "nl": "get current requested url",
    "cmd": "self.request.url",
    "question_id": "2764586-55",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "self.request.url",
    "canonical_cmd": "self.request.url"
  },
  {
    "nl": "convert list of strings `str_list` into list of integers",
    "cmd": "[int(i) for i in str_list]",
    "question_id": "2424412-78",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#int"
    ],
    "clean_cmd": "[int(i)  i  str_list]",
    "canonical_cmd": "[int(i) for i in VAR_STR]"
  },
  {
    "nl": "convert a list with string `['1', '2', '3']` into list with integers",
    "cmd": "map(int, ['1', '2', '3'])",
    "question_id": "2424412-99",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#map"
    ],
    "clean_cmd": "map(int, [, , ])",
    "canonical_cmd": "map(int, [VAR_STR])"
  },
  {
    "nl": "convert list with str into list with int",
    "cmd": "list(map(int, ['1', '2', '3']))",
    "question_id": "2424412-2",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#map",
      "python.library.functions#list"
    ],
    "clean_cmd": "list(map(int, [, , ]))",
    "canonical_cmd": "list(map(int, ['1', '2', '3']))"
  },
  {
    "nl": "Return values for column `C` after group by on column `A` and `B` in dataframe `df`",
    "cmd": "df.groupby(['A', 'B'])['C'].unique()",
    "question_id": "33680914-50",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.groupby",
      "pandas.reference.api.pandas.core.groupby.seriesgroupby.unique"
    ],
    "clean_cmd": "df.groupby([, ])[].unique()",
    "canonical_cmd": "VAR_STR.groupby(['VAR_STR', 'VAR_STR'])['VAR_STR'].unique()"
  },
  {
    "nl": "plot point marker '.' on series `ts`",
    "cmd": "ts.plot(marker='.')",
    "question_id": "19939084-58",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.plot"
    ],
    "clean_cmd": "ts.plot(marker=)",
    "canonical_cmd": "VAR_STR.plot(marker='VAR_STR')"
  },
  {
    "nl": "Add header `('Cookie', 'cookiename=cookie value')` to mechanize browser `br`",
    "cmd": "br.addheaders = [('Cookie', 'cookiename=cookie value')]",
    "question_id": "15459217-11",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "br.addheaders = [(, )]",
    "canonical_cmd": "VAR_STR.addheaders = [(VAR_STR)]"
  },
  {
    "nl": "execute a command `command ` in the terminal from a python script",
    "cmd": "os.system(command)",
    "question_id": "33065588-27",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.system"
    ],
    "clean_cmd": "os.system(command)",
    "canonical_cmd": "os.system(VAR_STR)"
  },
  {
    "nl": "attach debugger pdb to class `ForkedPdb`",
    "cmd": "ForkedPdb().set_trace()",
    "question_id": "4716533-82",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.bdb#bdb.set_trace"
    ],
    "clean_cmd": "ForkedPdb().set_trace()",
    "canonical_cmd": "VAR_STR().set_trace()"
  },
  {
    "nl": "drop all columns in dataframe `df` that holds a maximum value bigger than 0",
    "cmd": "df.columns[df.max() > 0]",
    "question_id": "26897536-42",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.max"
    ],
    "clean_cmd": "df.columns[df.max() &gt; ]",
    "canonical_cmd": "VAR_STR.columns[VAR_STR.max() > 0]"
  },
  {
    "nl": "concatenate lists `listone` and `listtwo`",
    "cmd": "(listone + listtwo)",
    "question_id": "1720421-45",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "(listone + listtwo)",
    "canonical_cmd": "VAR_STR + VAR_STR"
  },
  {
    "nl": "iterate items in lists `listone` and `listtwo`",
    "cmd": "for item in itertools.chain(listone, listtwo):\n    pass",
    "question_id": "1720421-49",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.itertools#itertools.chain"
    ],
    "clean_cmd": " item  itertools.chain(listone, listtwo):",
    "canonical_cmd": "for item in itertools.chain(VAR_STR, VAR_STR):\n    pass"
  },
  {
    "nl": "convert date string `s` in format pattern '%d/%m/%Y' into a timestamp",
    "cmd": "time.mktime(datetime.datetime.strptime(s, '%d/%m/%Y').timetuple())",
    "question_id": "9637838-45",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.datetime#datetime.datetime.strptime",
      "python.library.time#time.mktime",
      "python.library.datetime#datetime.datetime.timetuple"
    ],
    "clean_cmd": "time.mktime(datetime.datetime.strptime(s, ).timetuple())",
    "canonical_cmd": "time.mktime(datetime.datetime.strptime(VAR_STR, 'VAR_STR').timetuple())"
  },
  {
    "nl": "convert string '01/12/2011' to an integer timestamp",
    "cmd": "int(datetime.datetime.strptime('01/12/2011', '%d/%m/%Y').strftime('%s'))",
    "question_id": "9637838-89",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.datetime#datetime.datetime.strptime",
      "python.library.functions#int",
      "python.library.datetime#datetime.datetime.strftime"
    ],
    "clean_cmd": "int(datetime.datetime.strptime(, ).strftime())",
    "canonical_cmd": "int(datetime.datetime.strptime('VAR_STR', '%d/%m/%Y').strftime('%s'))"
  },
  {
    "nl": "move the last item in list `a` to the beginning",
    "cmd": "a = a[-1:] + a[:-1]",
    "question_id": "6490560-12",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "a = a[-:] + a[:-]",
    "canonical_cmd": "VAR_STR = VAR_STR[-1:] + VAR_STR[:-1]"
  },
  {
    "nl": "Remove the string value `item` from a list of strings `my_sequence`",
    "cmd": "[item for item in my_sequence if item != 'item']",
    "question_id": "14961014-3",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[item  item  my_sequence  item != ]",
    "canonical_cmd": "[VAR_STR for VAR_STR in VAR_STR if VAR_STR != 'VAR_STR']"
  },
  {
    "nl": "print each first value from a list of tuples `mytuple` with string formatting",
    "cmd": "print(', ,'.join([str(i[0]) for i in mytuple]))",
    "question_id": "19112735-16",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": "print(.join([str(i[])  i  mytuple]))",
    "canonical_cmd": "print(', ,'.join([str(i[0]) for i in VAR_STR]))"
  },
  {
    "nl": "Serialize dictionary `data` and its keys to a JSON formatted string",
    "cmd": "json.dumps({str(k): v for k, v in data.items()})",
    "question_id": "12337583-90",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.json#json.dumps",
      "python.library.stdtypes#str",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "json.dumps({str(k): v  k, v  data.items()})",
    "canonical_cmd": "json.dumps({str(k): v for k, v in VAR_STR.items()})"
  },
  {
    "nl": "remove dollar sign '$' from second to last column data in dataframe 'df' and convert the data into floats",
    "cmd": "df[df.columns[1:]].replace('[\\\\$,]', '', regex=True).astype(float)",
    "question_id": "32464280-95",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.astype",
      "pandas.reference.api.pandas.dataframe.replace"
    ],
    "clean_cmd": "df[df.columns[:]].replace(, , regex=True).astype(float)",
    "canonical_cmd": "VAR_STR[VAR_STR.columns[1:]].replace('[\\\\$,]', '', regex=True).astype(float)"
  },
  {
    "nl": "insert spaces before capital letters in string `text`",
    "cmd": "re.sub('([a-z](?=[A-Z])|[A-Z](?=[A-Z][a-z]))', '\\\\1 ', text)",
    "question_id": "199059-12",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "re.sub(, , text)",
    "canonical_cmd": "re.sub('([a-z](?=[A-Z])|[A-Z](?=[A-Z][a-z]))', '\\\\1 ', VAR_STR)"
  },
  {
    "nl": "Get all the sentences from a string `text` using regex",
    "cmd": "re.split('\\\\.\\\\s', text)",
    "question_id": "27946742-34",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.split"
    ],
    "clean_cmd": "re.split(, text)",
    "canonical_cmd": "re.split('\\\\.\\\\s', VAR_STR)"
  },
  {
    "nl": "Regular expression in Python sentence extractor",
    "cmd": "re.split('\\\\.\\\\s', re.sub('\\\\.\\\\s*$', '', text))",
    "question_id": "27946742-72",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub",
      "python.library.re#re.split"
    ],
    "clean_cmd": "re.split(, re.sub(, , text))",
    "canonical_cmd": "re.split('\\\\.\\\\s', re.sub('\\\\.\\\\s*$', '', text))"
  },
  {
    "nl": "convert a list 'a' to a dictionary where each even element represents the key to the dictionary, and the following odd element is the value",
    "cmd": "b = {a[i]: a[i + 1] for i in range(0, len(a), 2)}",
    "question_id": "4576115-20",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len",
      "python.library.functions#range"
    ],
    "clean_cmd": "b = {a[i]: a[i + ]  i  range(, len(a), )}",
    "canonical_cmd": "b = {VAR_STR[i]: VAR_STR[i + 1] for i in range(0, len(VAR_STR), 2)}"
  },
  {
    "nl": "replace value 0 with 'Female' and value 1 with 'Male' in column 'sex' of dataframe `data`",
    "cmd": "data['sex'].replace([0, 1], ['Female', 'Male'], inplace=True)",
    "question_id": "31888871-4",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.replace"
    ],
    "clean_cmd": "data[].replace([, ], [, ], inplace=True)",
    "canonical_cmd": "VAR_STR['VAR_STR'].replace([0, 1], ['VAR_STR', 'VAR_STR'], inplace=True)"
  },
  {
    "nl": "convert date string 'January 11, 2010'  into day of week",
    "cmd": "datetime.datetime.strptime('January 11, 2010', '%B %d, %Y').strftime('%A')",
    "question_id": "16766643-79",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.datetime#datetime.datetime.strptime",
      "python.library.datetime#datetime.datetime.strftime"
    ],
    "clean_cmd": "datetime.datetime.strptime(, ).strftime()",
    "canonical_cmd": "datetime.datetime.strptime('VAR_STR', '%B %d, %Y').strftime('%A')"
  },
  {
    "nl": "Convert Date String to Day of Week",
    "cmd": "datetime.datetime.strptime('January 11, 2010', '%B %d, %Y').strftime('%a')",
    "question_id": "16766643-43",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.datetime#datetime.datetime.strptime",
      "python.library.datetime#datetime.datetime.strftime"
    ],
    "clean_cmd": "datetime.datetime.strptime(, ).strftime()",
    "canonical_cmd": "datetime.datetime.strptime('January 11, 2010', '%B %d, %Y').strftime('%a')"
  },
  {
    "nl": "check if key 'stackoverflow' and key 'google' are presented in dictionary `sites`",
    "cmd": "set(['stackoverflow', 'google']).issubset(sites)",
    "question_id": "2813806-71",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#set",
      "python.library.stdtypes#frozenset.issubset"
    ],
    "clean_cmd": "set([, ]).issubset(sites)",
    "canonical_cmd": "set(['VAR_STR', 'VAR_STR']).issubset(VAR_STR)"
  },
  {
    "nl": "convert unicode text from list `elems` with index 0 to normal text 'utf-8'",
    "cmd": "elems[0].getText().encode('utf-8')",
    "question_id": "36623789-63",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.gettext#gettext.gettext",
      "python.library.stdtypes#str.encode"
    ],
    "clean_cmd": "elems[].getText().encode()",
    "canonical_cmd": "VAR_STR[0].getText().encode('VAR_STR')"
  },
  {
    "nl": "Convert a datetime object `dt` to microtime",
    "cmd": "time.mktime(dt.timetuple()) + dt.microsecond / 1000000.0",
    "question_id": "7238226-76",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.time#time.mktime",
      "python.library.datetime#datetime.datetime.timetuple"
    ],
    "clean_cmd": "time.mktime(dt.timetuple()) + dt.microsecond / 1000000.0",
    "canonical_cmd": "time.mktime(VAR_STR.timetuple()) + VAR_STR.microsecond / 1000000.0"
  },
  {
    "nl": "check whether a file `fname` exists",
    "cmd": "os.path.isfile(fname)",
    "question_id": "82831-64",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os.path#os.path.isfile"
    ],
    "clean_cmd": "os.path.isfile(fname)",
    "canonical_cmd": "os.path.isfile(VAR_STR)"
  },
  {
    "nl": "check whether file \"/path/to/file\" exists",
    "cmd": "my_file = Path('/path/to/file')\nif my_file.is_file():\n    pass",
    "question_id": "82831-96",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.zipfile#zipfile.Path.is_file",
      "matplotlib.path_api#matplotlib.path.Path"
    ],
    "clean_cmd": "my_file = Path() my_file.is_file():",
    "canonical_cmd": "my_file = Path('VAR_STR')\nif my_file.is_file():\n    pass"
  },
  {
    "nl": "check whether file `file_path` exists",
    "cmd": "os.path.exists(file_path)",
    "question_id": "82831-55",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os.path#os.path.exists"
    ],
    "clean_cmd": "os.path.exists(file_path)",
    "canonical_cmd": "os.path.exists(VAR_STR)"
  },
  {
    "nl": "check whether a file \"/etc/password.txt\" exists",
    "cmd": "print(os.path.isfile('/etc/password.txt'))",
    "question_id": "82831-86",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os.path#os.path.isfile"
    ],
    "clean_cmd": "print(os.path.isfile())",
    "canonical_cmd": "print(os.path.isfile('VAR_STR'))"
  },
  {
    "nl": "check whether a file \"/etc\" exists",
    "cmd": "print(os.path.isfile('/etc'))",
    "question_id": "82831-46",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os.path#os.path.isfile"
    ],
    "clean_cmd": "print(os.path.isfile())",
    "canonical_cmd": "print(os.path.isfile('VAR_STR'))"
  },
  {
    "nl": "check whether a path \"/does/not/exist\" exists",
    "cmd": "print(os.path.exists('/does/not/exist'))",
    "question_id": "82831-2",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os.path#os.path.exists"
    ],
    "clean_cmd": "print(os.path.exists())",
    "canonical_cmd": "print(os.path.exists('VAR_STR'))"
  },
  {
    "nl": "check whether a file \"/does/not/exist\" exists",
    "cmd": "print(os.path.isfile('/does/not/exist'))",
    "question_id": "82831-53",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os.path#os.path.isfile"
    ],
    "clean_cmd": "print(os.path.isfile())",
    "canonical_cmd": "print(os.path.isfile('VAR_STR'))"
  },
  {
    "nl": "check whether a path \"/etc\" exists",
    "cmd": "print(os.path.exists('/etc'))",
    "question_id": "82831-40",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os.path#os.path.exists"
    ],
    "clean_cmd": "print(os.path.exists())",
    "canonical_cmd": "print(os.path.exists('VAR_STR'))"
  },
  {
    "nl": "check whether a path \"/etc/password.txt\" exists",
    "cmd": "print(os.path.exists('/etc/password.txt'))",
    "question_id": "82831-24",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os.path#os.path.exists"
    ],
    "clean_cmd": "print(os.path.exists())",
    "canonical_cmd": "print(os.path.exists('VAR_STR'))"
  },
  {
    "nl": "print line `line` from text file with 'utf-16-le' format",
    "cmd": "print(line.decode('utf-16-le').split())",
    "question_id": "19328874-69",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#bytearray.decode",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "print(line.decode().split())",
    "canonical_cmd": "print(VAR_STR.decode('VAR_STR').split())"
  },
  {
    "nl": "open a text file `data.txt` in io module with encoding `utf-16-le`",
    "cmd": "file = io.open('data.txt', 'r', encoding='utf-16-le')",
    "question_id": "19328874-21",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.io#io.open"
    ],
    "clean_cmd": "file = io.open(, , encoding=)",
    "canonical_cmd": "file = io.open('VAR_STR', 'r', encoding='VAR_STR')"
  },
  {
    "nl": "create a list with the sum of respective elements of the tuples of list `l`",
    "cmd": "[sum(x) for x in zip(*l)]",
    "question_id": "14180866-79",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip",
      "python.library.functions#sum"
    ],
    "clean_cmd": "[sum(x)  x  zip(*l)]",
    "canonical_cmd": "[sum(x) for x in zip(*VAR_STR)]"
  },
  {
    "nl": "sum each value in a list `l` of tuples",
    "cmd": "map(sum, zip(*l))",
    "question_id": "14180866-73",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip",
      "python.library.functions#map"
    ],
    "clean_cmd": "map(sum, zip(*l))",
    "canonical_cmd": "map(sum, zip(*VAR_STR))"
  },
  {
    "nl": "count the number of elements in array `myArray`",
    "cmd": "len(myArray)",
    "question_id": "187455-29",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len"
    ],
    "clean_cmd": "len(myArray)",
    "canonical_cmd": "len(VAR_STR)"
  },
  {
    "nl": "BeautifulSoup select 'div' elements with an id attribute value ending with sub-string '_answer' in HTML parsed string `soup`",
    "cmd": "soup.select('div[id$=_answer]')",
    "question_id": "42180455-54",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.select#select.select"
    ],
    "clean_cmd": "soup.select()",
    "canonical_cmd": "VAR_STR.select('div[id$=_answer]')"
  },
  {
    "nl": "kill a process with id `process.pid`",
    "cmd": "os.kill(process.pid, signal.SIGKILL)",
    "question_id": "1064335-63",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.kill"
    ],
    "clean_cmd": "os.kill(process.pid, signal.SIGKILL)",
    "canonical_cmd": "os.kill(process.pid, signal.SIGKILL)"
  },
  {
    "nl": "Get Last Day of the first month in 2002",
    "cmd": "calendar.monthrange(2002, 1)",
    "question_id": "42950-17",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.calendar#calendar.monthrange"
    ],
    "clean_cmd": "calendar.monthrange(, )",
    "canonical_cmd": "calendar.monthrange(2002, 1)"
  },
  {
    "nl": "Get Last Day of the second month in 2002",
    "cmd": "calendar.monthrange(2008, 2)",
    "question_id": "42950-48",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.calendar#calendar.monthrange"
    ],
    "clean_cmd": "calendar.monthrange(, )",
    "canonical_cmd": "calendar.monthrange(2008, 2)"
  },
  {
    "nl": "Get Last Day of the second month in 2100",
    "cmd": "calendar.monthrange(2100, 2)",
    "question_id": "42950-64",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.calendar#calendar.monthrange"
    ],
    "clean_cmd": "calendar.monthrange(, )",
    "canonical_cmd": "calendar.monthrange(2100, 2)"
  },
  {
    "nl": "Get Last Day of the month `month` in year `year`",
    "cmd": "calendar.monthrange(year, month)[1]",
    "question_id": "42950-85",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.calendar#calendar.monthrange"
    ],
    "clean_cmd": "calendar.monthrange(year, month)[]",
    "canonical_cmd": "calendar.monthrange(VAR_STR, VAR_STR)[1]"
  },
  {
    "nl": "Get Last Day of the second month in year 2012",
    "cmd": "monthrange(2012, 2)",
    "question_id": "42950-60",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.calendar#calendar.monthrange"
    ],
    "clean_cmd": "monthrange(, )",
    "canonical_cmd": "monthrange(2012, 2)"
  },
  {
    "nl": "Get Last Day of the first month in year 2000",
    "cmd": "(datetime.date(2000, 2, 1) - datetime.timedelta(days=1))",
    "question_id": "42950-91",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.datetime#datetime.timedelta",
      "python.library.datetime#datetime.date"
    ],
    "clean_cmd": "(datetime.date(, , ) - datetime.timedelta(days=))",
    "canonical_cmd": "datetime.date(2000, 2, 1) - datetime.timedelta(days=1)"
  },
  {
    "nl": "print the string `Total score for`, the value of the variable `name`, the string `is` and the value of the variable `score` in one print call.",
    "cmd": "print(('Total score for', name, 'is', score))",
    "question_id": "15286401-73",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "print((, name, , score))",
    "canonical_cmd": "print(('VAR_STR', VAR_STR, 'VAR_STR', VAR_STR))"
  },
  {
    "nl": "print multiple arguments 'name' and 'score'.",
    "cmd": "print('Total score for {} is {}'.format(name, score))",
    "question_id": "15286401-49",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#format"
    ],
    "clean_cmd": "print(.format(name, score))",
    "canonical_cmd": "print('Total score for {} is {}'.format(VAR_STR, VAR_STR))"
  },
  {
    "nl": "print a string using multiple strings `name` and `score`",
    "cmd": "print('Total score for %s is %s  ' % (name, score))",
    "question_id": "15286401-39",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "print( % (name, score))",
    "canonical_cmd": "print('Total score for %s is %s  ' % (VAR_STR, VAR_STR))"
  },
  {
    "nl": "print string including multiple variables `name` and `score`",
    "cmd": "print(('Total score for', name, 'is', score))",
    "question_id": "15286401-72",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "print((, name, , score))",
    "canonical_cmd": "print(('Total score for', VAR_STR, 'is', VAR_STR))"
  },
  {
    "nl": "print float `a` with two decimal points",
    "cmd": "print(('%.2f' % a))",
    "question_id": "455612-84",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "print(( % a))",
    "canonical_cmd": "print('%.2f' % VAR_STR)"
  },
  {
    "nl": "print float `a` with two decimal points",
    "cmd": "print(('{0:.2f}'.format(a)))",
    "question_id": "455612-33",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#format"
    ],
    "clean_cmd": "print((.format(a)))",
    "canonical_cmd": "print('{0:.2f}'.format(VAR_STR))"
  },
  {
    "nl": "print float `a` with two decimal points",
    "cmd": "print(('{0:.2f}'.format(round(a, 2))))",
    "question_id": "455612-78",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#round",
      "python.library.functions#format"
    ],
    "clean_cmd": "print((.format(round(a, ))))",
    "canonical_cmd": "print('{0:.2f}'.format(round(VAR_STR, 2)))"
  },
  {
    "nl": "print float `a` with two decimal points",
    "cmd": "print(('%.2f' % round(a, 2)))",
    "question_id": "455612-39",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#round"
    ],
    "clean_cmd": "print(( % round(a, )))",
    "canonical_cmd": "print('%.2f' % round(VAR_STR, 2))"
  },
  {
    "nl": "limit float 13.9499999 to two decimal points",
    "cmd": "('%.2f' % 13.9499999)",
    "question_id": "455612-31",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "( % 13.9499999)",
    "canonical_cmd": "'%.2f' % 13.9499999"
  },
  {
    "nl": "limit float 3.14159 to two decimal points",
    "cmd": "('%.2f' % 3.14159)",
    "question_id": "455612-37",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "( % 3.14159)",
    "canonical_cmd": "'%.2f' % 3.14159"
  },
  {
    "nl": "limit float 13.949999999999999 to two decimal points",
    "cmd": "float('{0:.2f}'.format(13.95))",
    "question_id": "455612-21",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#float",
      "python.library.functions#format"
    ],
    "clean_cmd": "float(.format(13.95))",
    "canonical_cmd": "float('{0:.2f}'.format(13.95))"
  },
  {
    "nl": "limit float 13.949999999999999 to two decimal points",
    "cmd": "'{0:.2f}'.format(13.95)",
    "question_id": "455612-19",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#format"
    ],
    "clean_cmd": ".format(13.95)",
    "canonical_cmd": "\"\"\"{0:.2f}\"\"\".format(13.95)"
  },
  {
    "nl": "sort list of strings in list `the_list` by integer suffix",
    "cmd": "sorted(the_list, key=lambda k: int(k.split('_')[1]))",
    "question_id": "4287209-80",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.functions#int",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "sorted(the_list, key= k: int(k.split()[]))",
    "canonical_cmd": "sorted(VAR_STR, key=lambda k: int(k.split('_')[1]))"
  },
  {
    "nl": "sort list of strings `the_list` by integer suffix before \"_\"",
    "cmd": "sorted(the_list, key=lambda x: int(x.split('_')[1]))",
    "question_id": "4287209-74",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.functions#int",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "sorted(the_list, key= x: int(x.split()[]))",
    "canonical_cmd": "sorted(VAR_STR, key=lambda x: int(x.split('VAR_STR')[1]))"
  },
  {
    "nl": "pass a list of parameters `((1, 2, 3),) to sql queue 'SELECT * FROM table WHERE column IN %s;'",
    "cmd": "cur.mogrify('SELECT * FROM table WHERE column IN %s;', ((1, 2, 3),))",
    "question_id": "8671702-94",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "cur.mogrify(, ((, , ),))",
    "canonical_cmd": "cur.mogrify('VAR_STR', ((1, 2, 3),))"
  },
  {
    "nl": "flush output of python print",
    "cmd": "sys.stdout.flush()",
    "question_id": "3804727-58",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.logging#logging.Handler.flush"
    ],
    "clean_cmd": "sys.stdout.flush()",
    "canonical_cmd": "sys.stdout.flush()"
  },
  {
    "nl": "find indexes of all occurrences of a substring `tt` in a string `ttt`",
    "cmd": "[m.start() for m in re.finditer('(?=tt)', 'ttt')]",
    "question_id": "4664850-7",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.finditer",
      "python.library.re#re.Match.start"
    ],
    "clean_cmd": "[m.start()  m  re.finditer(, )]",
    "canonical_cmd": "[m.start() for m in re.finditer('(?=tt)', 'VAR_STR')]"
  },
  {
    "nl": "find all occurrences of a substring in a string",
    "cmd": "[m.start() for m in re.finditer('test', 'test test test test')]",
    "question_id": "4664850-33",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.finditer",
      "python.library.re#re.Match.start"
    ],
    "clean_cmd": "[m.start()  m  re.finditer(, )]",
    "canonical_cmd": "[m.start() for m in re.finditer('test', 'test test test test')]"
  },
  {
    "nl": "join Numpy array `b` with Numpy array 'a' along axis 0",
    "cmd": "b = np.concatenate((a, a), axis=0)",
    "question_id": "6740311-53",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.concatenate"
    ],
    "clean_cmd": "b = np.concatenate((a, a), axis=)",
    "canonical_cmd": "VAR_STR = np.concatenate((VAR_STR, VAR_STR), axis=0)"
  },
  {
    "nl": "select multiple ranges of columns 1-10, 15, 17, and 50-100 in pandas dataframe `df`",
    "cmd": "df.iloc[:, (np.r_[1:10, (15), (17), 50:100])]",
    "question_id": "41256648-47",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "df.iloc[:, (np.r_[:, (), (), :])]",
    "canonical_cmd": "VAR_STR.iloc[:, (np.r_[1:10, (15), (17), 50:100])]"
  },
  {
    "nl": "fetch all elements in a dictionary `parent_dict`, falling between two keys 2 and 4",
    "cmd": "dict((k, v) for k, v in parent_dict.items() if 2 < k < 4)",
    "question_id": "8654637-6",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "dict((k, v)  k, v  parent_dict.items()   &lt; k &lt; )",
    "canonical_cmd": "dict((k, v) for k, v in VAR_STR.items() if 2 < k < 4)"
  },
  {
    "nl": "fetch all elements in a dictionary 'parent_dict' where the key is between the range of 2 to 4",
    "cmd": "dict((k, v) for k, v in parent_dict.items() if k > 2 and k < 4)",
    "question_id": "8654637-44",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "dict((k, v)  k, v  parent_dict.items()  k &gt;   k &lt; )",
    "canonical_cmd": "dict((k, v) for k, v in VAR_STR.items() if k > 2 and k < 4)"
  },
  {
    "nl": "concatenate strings in tuple `('a', 'b', 'c', 'd', 'g', 'x', 'r', 'e')` into a single string",
    "cmd": "\"\"\"\"\"\".join(('a', 'b', 'c', 'd', 'g', 'x', 'r', 'e'))",
    "question_id": "19641579-81",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": ".join((, , , , , , , ))",
    "canonical_cmd": "\"\"\"\"\"\".join((VAR_STR))"
  },
  {
    "nl": "check if string `the_string` contains any upper or lower-case ASCII letters",
    "cmd": "re.search('[a-zA-Z]', the_string)",
    "question_id": "9072844-100",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.search"
    ],
    "clean_cmd": "re.search(, the_string)",
    "canonical_cmd": "re.search('[a-zA-Z]', VAR_STR)"
  },
  {
    "nl": "get alpha value `alpha` of a png image `img`",
    "cmd": "alpha = img.split()[-1]",
    "question_id": "1962795-99",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "alpha = img.split()[-]",
    "canonical_cmd": "VAR_STR = VAR_STR.split()[-1]"
  },
  {
    "nl": "save a numpy array `image_array` as an image 'outfile.jpg'",
    "cmd": "scipy.misc.imsave('outfile.jpg', image_array)",
    "question_id": "902761-90",
    "cmd_name": "conala",
    "oracle_man": [
      "matplotlib.image_api#matplotlib.image.imsave"
    ],
    "clean_cmd": "scipy.misc.imsave(, image_array)",
    "canonical_cmd": "scipy.misc.imsave('VAR_STR', VAR_STR)"
  },
  {
    "nl": "delete the last column of numpy array `a` and assign resulting array to `b`",
    "cmd": "b = np.delete(a, -1, 1)",
    "question_id": "6710684-100",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.delete"
    ],
    "clean_cmd": "b = np.delete(a, -, )",
    "canonical_cmd": "VAR_STR = np.delete(VAR_STR, -1, 1)"
  },
  {
    "nl": "Convert float 24322.34 to comma-separated string",
    "cmd": "\"\"\"{0:,.2f}\"\"\".format(24322.34)",
    "question_id": "10677350-1",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#format"
    ],
    "clean_cmd": ".format(24322.34)",
    "canonical_cmd": "\"\"\"{0:,.2f}\"\"\".format(24322.34)"
  },
  {
    "nl": "Setting stacksize in a python script",
    "cmd": "os.system('ulimit -s unlimited; some_executable')",
    "question_id": "5061582-64",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.system"
    ],
    "clean_cmd": "os.system()",
    "canonical_cmd": "os.system('ulimit -s unlimited; some_executable')"
  },
  {
    "nl": "assign values to two variables, `var1` and `var2` from user input response to `'Enter two numbers here: ` split on whitespace",
    "cmd": "var1, var2 = input('Enter two numbers here: ').split()",
    "question_id": "961263-25",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#input",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "var1, var2 = input().split()",
    "canonical_cmd": "VAR_STR, VAR_STR = input('Enter two numbers here: ').split()"
  },
  {
    "nl": "get canonical path of the filename `path`",
    "cmd": "os.path.realpath(path)",
    "question_id": "3220755-78",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os.path#os.path.realpath"
    ],
    "clean_cmd": "os.path.realpath(path)",
    "canonical_cmd": "os.VAR_STR.realpath(VAR_STR)"
  },
  {
    "nl": "fill missing value in one column 'Cat1' with the value of another column 'Cat2'",
    "cmd": "df['Cat1'].fillna(df['Cat2'])",
    "question_id": "30357276-39",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.fillna"
    ],
    "clean_cmd": "df[].fillna(df[])",
    "canonical_cmd": "df['VAR_STR'].fillna(df['VAR_STR'])"
  },
  {
    "nl": "request URI '<MY_URI>' and pass authorization token 'TOK:<MY_TOKEN>' to the header",
    "cmd": "r = requests.get('<MY_URI>', headers={'Authorization': 'TOK:<MY_TOKEN>'})",
    "question_id": "19069701-24",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.webbrowser#webbrowser.get"
    ],
    "clean_cmd": "r = requests.get(, headers={: })",
    "canonical_cmd": "r = requests.get('VAR_STR', headers={'Authorization': 'VAR_STR'})"
  },
  {
    "nl": "check if list `a` is empty",
    "cmd": "if (not a):\n    pass",
    "question_id": "53513-86",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": " ( a):",
    "canonical_cmd": "if not VAR_STR:\n    pass"
  },
  {
    "nl": "check if list `seq` is empty",
    "cmd": "if (not seq):\n    pass",
    "question_id": "53513-49",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": " ( seq):",
    "canonical_cmd": "if not VAR_STR:\n    pass"
  },
  {
    "nl": "check if list `li` is empty",
    "cmd": "if (len(li) == 0):\n    pass",
    "question_id": "53513-83",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len"
    ],
    "clean_cmd": " (len(li) == ):",
    "canonical_cmd": "if len(VAR_STR) == 0:\n    pass"
  },
  {
    "nl": "Extract values not equal to 0 from numpy array `a`",
    "cmd": "a[a != 0]",
    "question_id": "5927180-86",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "a[a != ]",
    "canonical_cmd": "VAR_STR[VAR_STR != 0]"
  },
  {
    "nl": "print a string `s` by splitting with comma `,`",
    "cmd": "print(s.split(','))",
    "question_id": "5864485-86",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "print(s.split())",
    "canonical_cmd": "print(VAR_STR.split('VAR_STR'))"
  },
  {
    "nl": "Create list  by splitting string `mystring` using \",\" as delimiter",
    "cmd": "mystring.split(',')",
    "question_id": "5864485-36",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "mystring.split()",
    "canonical_cmd": "VAR_STR.split('VAR_STR')"
  },
  {
    "nl": "remove periods inbetween capital letters that aren't immediately preceeded by word character(s) in a string `s` using regular expressions",
    "cmd": "re.sub('(?<!\\\\w)([A-Z])\\\\.', '\\\\1', s)",
    "question_id": "40196941-40",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "re.sub(, , s)",
    "canonical_cmd": "re.sub('(?<!\\\\w)([A-Z])\\\\.', '\\\\1', VAR_STR)"
  },
  {
    "nl": "Construct an array with data type float32 `a` from data in binary file 'filename'",
    "cmd": "a = numpy.fromfile('filename', dtype=numpy.float32)",
    "question_id": "4365964-71",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.fromfile"
    ],
    "clean_cmd": "a = numpy.fromfile(, dtype=numpy.float32)",
    "canonical_cmd": "VAR_STR = numpy.fromfile('VAR_STR', dtype=numpy.float32)"
  },
  {
    "nl": "How to get the length of words in a sentence?",
    "cmd": "[len(x) for x in s.split()]",
    "question_id": "22749706-34",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "[len(x)  x  s.split()]",
    "canonical_cmd": "[len(x) for x in s.split()]"
  },
  {
    "nl": "get a string `randomkey123xyz987` between two substrings in a string `api('randomkey123xyz987', 'key', 'text')` using regex",
    "cmd": "re.findall(\"api\\\\('(.*?)'\", \"api('randomkey123xyz987', 'key', 'text')\")",
    "question_id": "29703793-83",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "re.findall(, )",
    "canonical_cmd": "re.findall(\"api\\\\('(.*?)'\", 'VAR_STR')"
  },
  {
    "nl": "create a list of aggregation of each element from list `l2` to all elements of list `l1`",
    "cmd": "[(x + y) for x in l2 for y in l1]",
    "question_id": "16568056-66",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[(x + y)  x  l2  y  l1]",
    "canonical_cmd": "[(x + y) for x in VAR_STR for y in VAR_STR]"
  },
  {
    "nl": "get date from dataframe `df` column 'dates' to column 'just_date'",
    "cmd": "df['just_date'] = df['dates'].dt.date",
    "question_id": "16176996-7",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "df[] = df[].dt.date",
    "canonical_cmd": "VAR_STR['VAR_STR'] = VAR_STR['VAR_STR'].dt.date"
  },
  {
    "nl": "sort a list `your_list` of class objects by their values for the attribute `anniversary_score`",
    "cmd": "your_list.sort(key=operator.attrgetter('anniversary_score'))",
    "question_id": "17038639-32",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.operator#operator.attrgetter",
      "python.library.stdtypes#list.sort"
    ],
    "clean_cmd": "your_list.sort(key=operator.attrgetter())",
    "canonical_cmd": "VAR_STR.sort(key=operator.attrgetter('VAR_STR'))"
  },
  {
    "nl": "sort list `your_list` by the `anniversary_score` attribute of each object",
    "cmd": "your_list.sort(key=lambda x: x.anniversary_score)",
    "question_id": "17038639-24",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#list.sort"
    ],
    "clean_cmd": "your_list.sort(key= x: x.anniversary_score)",
    "canonical_cmd": "VAR_STR.sort(key=lambda x: x.VAR_STR)"
  },
  {
    "nl": "split string 'a b.c' on space \" \" and dot character \".\"",
    "cmd": "re.split('[ .]', 'a b.c')",
    "question_id": "373459-40",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.split"
    ],
    "clean_cmd": "re.split(, )",
    "canonical_cmd": "re.split('[ .]', 'VAR_STR')"
  },
  {
    "nl": "count number of rows in a group `key_columns` in pandas groupby object `df`",
    "cmd": "df.groupby(key_columns).size()",
    "question_id": "19384532-12",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.groupby",
      "pandas.reference.api.pandas.dataframe.size"
    ],
    "clean_cmd": "df.groupby(key_columns).size()",
    "canonical_cmd": "VAR_STR.groupby(VAR_STR).size()"
  },
  {
    "nl": "Print +1 using format '{0:+d}'",
    "cmd": "print('{0:+d}'.format(score))",
    "question_id": "8337004-51",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#format"
    ],
    "clean_cmd": "print(.format(score))",
    "canonical_cmd": "print('VAR_STR'.format(score))"
  },
  {
    "nl": "delete every non `utf-8` characters from a string `line`",
    "cmd": "line = line.decode('utf-8', 'ignore').encode('utf-8')",
    "question_id": "26541968-79",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.encode",
      "python.library.stdtypes#bytearray.decode"
    ],
    "clean_cmd": "line = line.decode(, ).encode()",
    "canonical_cmd": "VAR_STR = VAR_STR.decode('VAR_STR', 'ignore').encode('VAR_STR')"
  },
  {
    "nl": "lowercase a python dataframe string in column 'x' if it has missing values in dataframe `df`",
    "cmd": "df['x'].str.lower()",
    "question_id": "22245171-54",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.lower"
    ],
    "clean_cmd": "df[].str.lower()",
    "canonical_cmd": "VAR_STR['VAR_STR'].str.lower()"
  },
  {
    "nl": "swap values in a tuple/list inside a list `mylist`",
    "cmd": "map(lambda t: (t[1], t[0]), mylist)",
    "question_id": "13384841-37",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#map"
    ],
    "clean_cmd": "map( t: (t[], t[]), mylist)",
    "canonical_cmd": "map(lambda t: (t[1], t[0]), VAR_STR)"
  },
  {
    "nl": "Swap values in a tuple/list in list `mylist`",
    "cmd": "[(t[1], t[0]) for t in mylist]",
    "question_id": "13384841-49",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[(t[], t[])  t  mylist]",
    "canonical_cmd": "[(t[1], t[0]) for t in VAR_STR]"
  },
  {
    "nl": "Set index equal to field 'TRX_DATE' in dataframe `df`",
    "cmd": "df = df.set_index(['TRX_DATE'])",
    "question_id": "33565643-86",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.set_index"
    ],
    "clean_cmd": "df = df.set_index([])",
    "canonical_cmd": "VAR_STR = VAR_STR.set_index(['VAR_STR'])"
  },
  {
    "nl": "change figure size to 3 by 4 in matplotlib",
    "cmd": "plt.figure(figsize=(3, 4))",
    "question_id": "17109608-36",
    "cmd_name": "conala",
    "oracle_man": [
      "matplotlib.figure_api#matplotlib.figure.Figure"
    ],
    "clean_cmd": "plt.figure(figsize=(, ))",
    "canonical_cmd": "plt.figure(figsize=(3, 4))"
  },
  {
    "nl": "add column `d` to index of dataframe `df`",
    "cmd": "df.set_index(['d'], append=True)",
    "question_id": "11040626-65",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.set_index"
    ],
    "clean_cmd": "df.set_index([], append=True)",
    "canonical_cmd": "VAR_STR.set_index(['VAR_STR'], append=True)"
  },
  {
    "nl": "create a regular expression that matches the pattern '^(.+)(?:\\\\n|\\\\r\\\\n?)((?:(?:\\\\n|\\\\r\\\\n?).+)+)' over multiple lines of text",
    "cmd": "re.compile('^(.+)(?:\\\\n|\\\\r\\\\n?)((?:(?:\\\\n|\\\\r\\\\n?).+)+)', re.MULTILINE)",
    "question_id": "587345-88",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.compile"
    ],
    "clean_cmd": "re.compile(, re.MULTILINE)",
    "canonical_cmd": "re.compile('VAR_STR', re.MULTILINE)"
  },
  {
    "nl": "regular expression \"^(.+)\\\\n((?:\\\\n.+)+)\" matching a multiline block of text",
    "cmd": "re.compile('^(.+)\\\\n((?:\\\\n.+)+)', re.MULTILINE)",
    "question_id": "587345-23",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.compile"
    ],
    "clean_cmd": "re.compile(, re.MULTILINE)",
    "canonical_cmd": "re.compile('VAR_STR', re.MULTILINE)"
  },
  {
    "nl": "get equivalent week number from a date `2010/6/16` using  isocalendar",
    "cmd": "datetime.date(2010, 6, 16).isocalendar()[1]",
    "question_id": "2600775-7",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.datetime#datetime.date",
      "python.library.datetime#datetime.date.isocalendar"
    ],
    "clean_cmd": "datetime.date(, , ).isocalendar()[]",
    "canonical_cmd": "datetime.date(2010, 6, 16).isocalendar()[1]"
  },
  {
    "nl": "format floating point number  `TotalAmount` to be rounded off to two decimal places and have a comma thousands' seperator",
    "cmd": "print('Total cost is: ${:,.2f}'.format(TotalAmount))",
    "question_id": "5180365-48",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#format"
    ],
    "clean_cmd": "print(.format(TotalAmount))",
    "canonical_cmd": "print('Total cost is: ${:,.2f}'.format(VAR_STR))"
  },
  {
    "nl": "split string 'abcdefg' into a list of characters",
    "cmd": "re.findall('\\\\w', 'abcdefg')",
    "question_id": "464736-72",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "re.findall(, )",
    "canonical_cmd": "re.findall('\\\\w', 'VAR_STR')"
  },
  {
    "nl": "set columns `['race_date', 'track_code', 'race_number']` as indexes in dataframe `rdata`",
    "cmd": "rdata.set_index(['race_date', 'track_code', 'race_number'])",
    "question_id": "18071222-58",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.set_index"
    ],
    "clean_cmd": "rdata.set_index([, , ])",
    "canonical_cmd": "VAR_STR.set_index([VAR_STR])"
  },
  {
    "nl": "replace a string `Abc` in case sensitive way using maketrans",
    "cmd": "\"\"\"Abc\"\"\".translate(maketrans('abcABC', 'defDEF'))",
    "question_id": "3008992-74",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.maketrans",
      "python.library.stdtypes#str.translate"
    ],
    "clean_cmd": ".translate(maketrans(, ))",
    "canonical_cmd": "\"\"\"VAR_STR\"\"\".translate(maketrans('abcABC', 'defDEF'))"
  },
  {
    "nl": "get a list of values with key 'key' from a list of dictionaries `l`",
    "cmd": "[d['key'] for d in l if 'key' in d]",
    "question_id": "25040875-80",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[d[]  d  l    d]",
    "canonical_cmd": "[d['VAR_STR'] for d in VAR_STR if 'VAR_STR' in d]"
  },
  {
    "nl": "get a list of values for key 'key' from a list of dictionaries `l`",
    "cmd": "[d['key'] for d in l]",
    "question_id": "25040875-99",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[d[]  d  l]",
    "canonical_cmd": "[d['VAR_STR'] for d in VAR_STR]"
  },
  {
    "nl": "get a list of values for key \"key\" from a list of dictionaries in `l`",
    "cmd": "[d['key'] for d in l]",
    "question_id": "25040875-16",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[d[]  d  l]",
    "canonical_cmd": "[d['VAR_STR'] for d in VAR_STR]"
  },
  {
    "nl": "change the font size on plot `matplotlib` to 22",
    "cmd": "matplotlib.rcParams.update({'font.size': 22})",
    "question_id": "3899980-30",
    "cmd_name": "conala",
    "oracle_man": [
      "matplotlib.figure_api#matplotlib.figure.SubplotParams.update"
    ],
    "clean_cmd": "matplotlib.rcParams.update({: })",
    "canonical_cmd": "VAR_STR.rcParams.update({'font.size': 22})"
  },
  {
    "nl": "replace comma in string `s` with empty string ''",
    "cmd": "s = s.replace(',', '')",
    "question_id": "16233593-48",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.replace"
    ],
    "clean_cmd": "s = s.replace(, )",
    "canonical_cmd": "VAR_STR = VAR_STR.replace(',', 'VAR_STR')"
  },
  {
    "nl": "get the count of each unique value in column `Country` of dataframe `df` and store in column `Sum of Accidents`",
    "cmd": "df.Country.value_counts().reset_index(name='Sum of Accidents')",
    "question_id": "39607540-52",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.reset_index",
      "pandas.reference.api.pandas.dataframe.value_counts"
    ],
    "clean_cmd": "df.Country.value_counts().reset_index(name=)",
    "canonical_cmd": "VAR_STR.VAR_STR.value_counts().reset_index(name='VAR_STR')"
  },
  {
    "nl": "sort dictionary `d` by key",
    "cmd": "od = collections.OrderedDict(sorted(d.items()))",
    "question_id": "9001509-32",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.collections#collections.OrderedDict",
      "python.library.functions#sorted",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "od = collections.OrderedDict(sorted(d.items()))",
    "canonical_cmd": "od = collections.OrderedDict(sorted(VAR_STR.items()))"
  },
  {
    "nl": "sort a dictionary `d` by key",
    "cmd": "OrderedDict(sorted(list(d.items()), key=(lambda t: t[0])))",
    "question_id": "9001509-28",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.functions#list",
      "python.library.collections#collections.OrderedDict",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "OrderedDict(sorted(list(d.items()), key=( t: t[])))",
    "canonical_cmd": "OrderedDict(sorted(list(VAR_STR.items()), key=lambda t: t[0]))"
  },
  {
    "nl": "get dictionary with max value of key 'size' in list of dicts `ld`",
    "cmd": "max(ld, key=lambda d: d['size'])",
    "question_id": "30546889-40",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#max"
    ],
    "clean_cmd": "max(ld, key= d: d[])",
    "canonical_cmd": "max(VAR_STR, key=lambda d: d['VAR_STR'])"
  },
  {
    "nl": "create a dictionary containing each string in list `my_list` split by '=' as a key/value pairs",
    "cmd": "print(dict([s.split('=') for s in my_list]))",
    "question_id": "12739911-21",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "print(dict([s.split()  s  my_list]))",
    "canonical_cmd": "print(dict([s.split('VAR_STR') for s in VAR_STR]))"
  },
  {
    "nl": "remove all values within one list `[2, 3, 7]` from another list `a`",
    "cmd": "[x for x in a if x not in [2, 3, 7]]",
    "question_id": "2514961-92",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[x  x  a  x   [, , ]]",
    "canonical_cmd": "[x for x in VAR_STR if x not in [VAR_STR]]"
  },
  {
    "nl": "remove all duplicates from a list of sets `L`",
    "cmd": "list(set(frozenset(item) for item in L))",
    "question_id": "32296933-71",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#frozenset",
      "python.library.functions#list",
      "python.library.stdtypes#set"
    ],
    "clean_cmd": "list(set(frozenset(item)  item  L))",
    "canonical_cmd": "list(set(frozenset(item) for item in VAR_STR))"
  },
  {
    "nl": "remove duplicates from a list of sets 'L'",
    "cmd": "[set(item) for item in set(frozenset(item) for item in L)]",
    "question_id": "32296933-32",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#set",
      "python.library.stdtypes#frozenset"
    ],
    "clean_cmd": "[set(item)  item  set(frozenset(item)  item  L)]",
    "canonical_cmd": "[set(item) for item in set(frozenset(item) for item in VAR_STR)]"
  },
  {
    "nl": "Concatenate elements of a list 'x' of multiple integers to a single integer",
    "cmd": "sum(d * 10 ** i for i, d in enumerate(x[::-1]))",
    "question_id": "41067960-51",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#enumerate",
      "python.library.functions#sum"
    ],
    "clean_cmd": "sum(d *  ** i  i, d  enumerate(x[::-]))",
    "canonical_cmd": "sum(d * 10 ** i for i, d in enumerate(VAR_STR[::-1]))"
  },
  {
    "nl": "convert a list of integers into a single integer",
    "cmd": "r = int(''.join(map(str, x)))",
    "question_id": "41067960-57",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#int",
      "python.library.functions#map",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": "r = int(.join(map(str, x)))",
    "canonical_cmd": "r = int(''.join(map(str, x)))"
  },
  {
    "nl": "create dict of squared int values in range of 100",
    "cmd": "{(x ** 2) for x in range(100)}",
    "question_id": "19454970-33",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#range"
    ],
    "clean_cmd": "{(x ** )  x  range()}",
    "canonical_cmd": "{(x ** 2) for x in range(100)}"
  },
  {
    "nl": "get complete path of a module named `os`",
    "cmd": "imp.find_module('os')[1]",
    "question_id": "9534608-97",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.zipimport#zipimport.zipimporter.find_module"
    ],
    "clean_cmd": "imp.find_module()[]",
    "canonical_cmd": "imp.find_module('VAR_STR')[1]"
  },
  {
    "nl": "get current CPU and RAM usage",
    "cmd": "psutil.cpu_percent()\npsutil.virtual_memory()",
    "question_id": "276052-11",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "psutil.cpu_percent()psutil.virtual_memory()",
    "canonical_cmd": "psutil.cpu_percent()\npsutil.virtual_memory()"
  },
  {
    "nl": "get current RAM usage of current program",
    "cmd": "pid = os.getpid()\npy = psutil.Process(pid)\nmemoryUse = (py.memory_info()[0] / (2.0 ** 30))",
    "question_id": "276052-21",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.getpid",
      "python.library.multiprocessing#multiprocessing.Process"
    ],
    "clean_cmd": "pid = os.getpid()py = psutil.Process(pid)memoryUse = (py.memory_info()[] / (2.0 ** ))",
    "canonical_cmd": "pid = os.getpid()\npy = psutil.Process(pid)\nmemoryUse = py.memory_info()[0] / 2.0 ** 30"
  },
  {
    "nl": "print cpu and memory usage",
    "cmd": "print((psutil.cpu_percent()))\nprint((psutil.virtual_memory()))",
    "question_id": "276052-66",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "print((psutil.cpu_percent()))print((psutil.virtual_memory()))",
    "canonical_cmd": "print(psutil.cpu_percent())\nprint(psutil.virtual_memory())"
  },
  {
    "nl": "given list `to_reverse`, reverse the all sublists and the list itself",
    "cmd": "[sublist[::-1] for sublist in to_reverse[::-1]]",
    "question_id": "39821166-6",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[sublist[::-]  sublist  to_reverse[::-]]",
    "canonical_cmd": "[sublist[::-1] for sublist in VAR_STR[::-1]]"
  },
  {
    "nl": "check if 7 is in `a`",
    "cmd": "(7 in a)",
    "question_id": "7571635-38",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "(  a)",
    "canonical_cmd": "7 in VAR_STR"
  },
  {
    "nl": "check if 'a' is in list `a`",
    "cmd": "('a' in a)",
    "question_id": "7571635-96",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "(  a)",
    "canonical_cmd": "'VAR_STR' in VAR_STR"
  },
  {
    "nl": "extract table data from table `rows` using beautifulsoup",
    "cmd": "[[td.findNext(text=True) for td in tr.findAll('td')] for tr in rows]",
    "question_id": "8139797-27",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "[[td.findNext(text=True)  td  tr.findAll()]  tr  rows]",
    "canonical_cmd": "[[td.findNext(text=True) for td in tr.findAll('td')] for tr in VAR_STR]"
  },
  {
    "nl": "find element `a` that contains string \"TEXT A\" in file `root`",
    "cmd": "e = root.xpath('.//a[contains(text(),\"TEXT A\")]')",
    "question_id": "14299978-48",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "e = root.xpath()",
    "canonical_cmd": "e = VAR_STR.xpath('.//a[contains(text(),\"TEXT A\")]')"
  },
  {
    "nl": "Find the`a` tag in html `root` which starts with the text `TEXT A`  and assign it to `e`",
    "cmd": "e = root.xpath('.//a[starts-with(text(),\"TEXT A\")]')",
    "question_id": "14299978-16",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "e = root.xpath()",
    "canonical_cmd": "VAR_STR = VAR_STR.xpath('.//a[starts-with(text(),\"TEXT A\")]')"
  },
  {
    "nl": "find the element that holds string 'TEXT A' in file `root`",
    "cmd": "e = root.xpath('.//a[text()=\"TEXT A\"]')",
    "question_id": "14299978-52",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "e = root.xpath()",
    "canonical_cmd": "e = VAR_STR.xpath('.//a[text()=\"TEXT A\"]')"
  },
  {
    "nl": "round number `x` to nearest integer",
    "cmd": "int(round(x))",
    "question_id": "31818050-79",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#int",
      "python.library.functions#round"
    ],
    "clean_cmd": "int(round(x))",
    "canonical_cmd": "int(round(VAR_STR))"
  },
  {
    "nl": "round number `h` to nearest integer",
    "cmd": "h = int(round(h))",
    "question_id": "31818050-74",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#int",
      "python.library.functions#round"
    ],
    "clean_cmd": "h = int(round(h))",
    "canonical_cmd": "VAR_STR = int(round(VAR_STR))"
  },
  {
    "nl": "round number 32.268907563 up to 3 decimal points",
    "cmd": "round(32.268907563, 3)",
    "question_id": "31818050-9",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#round"
    ],
    "clean_cmd": "round(32.268907563, )",
    "canonical_cmd": "round(32.268907563, 3)"
  },
  {
    "nl": "round number `value` up to `significantDigit` decimal places",
    "cmd": "round(value, significantDigit)",
    "question_id": "31818050-24",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#round"
    ],
    "clean_cmd": "round(value, significantDigit)",
    "canonical_cmd": "round(VAR_STR, VAR_STR)"
  },
  {
    "nl": "round number 1.0005 up to 3 decimal places",
    "cmd": "round(1.0005, 3)",
    "question_id": "31818050-19",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#round"
    ],
    "clean_cmd": "round(1.0005, )",
    "canonical_cmd": "round(1.0005, 3)"
  },
  {
    "nl": "round number 2.0005 up to 3 decimal places",
    "cmd": "round(2.0005, 3)",
    "question_id": "31818050-37",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#round"
    ],
    "clean_cmd": "round(2.0005, )",
    "canonical_cmd": "round(2.0005, 3)"
  },
  {
    "nl": "round number 3.0005 up to 3 decimal places",
    "cmd": "round(3.0005, 3)",
    "question_id": "31818050-47",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#round"
    ],
    "clean_cmd": "round(3.0005, )",
    "canonical_cmd": "round(3.0005, 3)"
  },
  {
    "nl": "round number 4.0005 up to 3 decimal places",
    "cmd": "round(4.0005, 3)",
    "question_id": "31818050-37",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#round"
    ],
    "clean_cmd": "round(4.0005, )",
    "canonical_cmd": "round(4.0005, 3)"
  },
  {
    "nl": "round number 8.005 up to 2 decimal places",
    "cmd": "round(8.005, 2)",
    "question_id": "31818050-23",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#round"
    ],
    "clean_cmd": "round(8.005, )",
    "canonical_cmd": "round(8.005, 2)"
  },
  {
    "nl": "round number 7.005 up to 2 decimal places",
    "cmd": "round(7.005, 2)",
    "question_id": "31818050-74",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#int",
      "python.library.functions#round"
    ],
    "clean_cmd": "round(7.005, )",
    "canonical_cmd": "round(7.005, 2)"
  },
  {
    "nl": "round number 6.005 up to 2 decimal places",
    "cmd": "round(6.005, 2)",
    "question_id": "31818050-68",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#round"
    ],
    "clean_cmd": "round(6.005, )",
    "canonical_cmd": "round(6.005, 2)"
  },
  {
    "nl": "round number 1.005 up to 2 decimal places",
    "cmd": "round(1.005, 2)",
    "question_id": "31818050-55",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#round"
    ],
    "clean_cmd": "round(1.005, )",
    "canonical_cmd": "round(1.005, 2)"
  },
  {
    "nl": "Divide elements in list `a` from elements at the same index in list `b`",
    "cmd": "[(x / y) for x, y in zip(a, b)]",
    "question_id": "16418415-99",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip"
    ],
    "clean_cmd": "[(x / y)  x, y  zip(a, b)]",
    "canonical_cmd": "[(x / y) for x, y in zip(VAR_STR, VAR_STR)]"
  },
  {
    "nl": "Unpack each value in list `x` to its placeholder '%' in string '%.2f'",
    "cmd": "\"\"\", \"\"\".join(['%.2f'] * len(x))",
    "question_id": "7568627-60",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": ".join([] * len(x))",
    "canonical_cmd": "\"\"\", \"\"\".join(['VAR_STR'] * len(VAR_STR))"
  },
  {
    "nl": "delete all columns in  DataFrame `df` that do not hold a non-zero value in its records",
    "cmd": "df.loc[:, ((df != 0).any(axis=0))]",
    "question_id": "21164910-6",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.loc",
      "python.library.functions#any"
    ],
    "clean_cmd": "df.loc[:, ((df != ).any(axis=))]",
    "canonical_cmd": "VAR_STR.loc[:, ((VAR_STR != 0).any(axis=0))]"
  },
  {
    "nl": "Delete an element `key` from a dictionary `d`",
    "cmd": "del d[key]",
    "question_id": "5844672-30",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": " d[key]",
    "canonical_cmd": "del VAR_STR[VAR_STR]"
  },
  {
    "nl": "Delete an element 0 from a dictionary `a`",
    "cmd": "{i: a[i] for i in a if (i != 0)}",
    "question_id": "5844672-73",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "{i: a[i]  i  a  (i != )}",
    "canonical_cmd": "{i: VAR_STR[i] for i in VAR_STR if i != 0}"
  },
  {
    "nl": "Delete an element \"hello\" from a dictionary `lol`",
    "cmd": "lol.pop('hello')",
    "question_id": "5844672-74",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict.pop"
    ],
    "clean_cmd": "lol.pop()",
    "canonical_cmd": "VAR_STR.pop('VAR_STR')"
  },
  {
    "nl": "Delete an element with key `key` dictionary `r`",
    "cmd": "del r[key]",
    "question_id": "5844672-98",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": " r[key]",
    "canonical_cmd": "del VAR_STR[VAR_STR]"
  },
  {
    "nl": "python pandas: apply a function with arguments to a series",
    "cmd": "my_series.apply(your_function, args=(2, 3, 4), extra_kw=1)",
    "question_id": "12182744-50",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.series.apply"
    ],
    "clean_cmd": "my_series.apply(your_function, args=(, , ), extra_kw=)",
    "canonical_cmd": "my_series.apply(your_function, args=(2, 3, 4), extra_kw=1)"
  },
  {
    "nl": "find element by css selector \"input[onclick*='1 Bedroom Deluxe']\"",
    "cmd": "driver.find_element_by_css_selector(\"input[onclick*='1 Bedroom Deluxe']\")",
    "question_id": "21691126-12",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "driver.find_element_by_css_selector()",
    "canonical_cmd": "driver.find_element_by_css_selector('VAR_STR')"
  },
  {
    "nl": "get  value at index `[2, 0]` in dataframe `df`",
    "cmd": "df.iloc[2, 0]",
    "question_id": "29902714-77",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.loc"
    ],
    "clean_cmd": "df.iloc[, ]",
    "canonical_cmd": "VAR_STR.iloc[VAR_STR]"
  },
  {
    "nl": "check if the third element of all the lists in a list \"items\" is equal to zero.",
    "cmd": "any(item[2] == 0 for item in items)",
    "question_id": "10666163-6",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#any"
    ],
    "clean_cmd": "any(item[] ==   item  items)",
    "canonical_cmd": "any(item[2] == 0 for item in VAR_STR)"
  },
  {
    "nl": "Find all the lists from a lists of list 'items' if third element in all sub-lists is '0'",
    "cmd": "[x for x in items if x[2] == 0]",
    "question_id": "10666163-14",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[x  x  items  x[] == ]",
    "canonical_cmd": "[x for x in VAR_STR if x[2] == 0]"
  },
  {
    "nl": "find all substrings in `mystring` beginning and ending with square brackets",
    "cmd": "re.findall('\\\\[(.*?)\\\\]', mystring)",
    "question_id": "9889635-12",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "re.findall(, mystring)",
    "canonical_cmd": "re.findall('\\\\[(.*?)\\\\]', VAR_STR)"
  },
  {
    "nl": "Delete third row in a numpy array `x`",
    "cmd": "x = numpy.delete(x, 2, axis=1)",
    "question_id": "3877491-18",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.delete"
    ],
    "clean_cmd": "x = numpy.delete(x, , axis=)",
    "canonical_cmd": "VAR_STR = numpy.delete(VAR_STR, 2, axis=1)"
  },
  {
    "nl": "delete first row of array `x`",
    "cmd": "x = numpy.delete(x, 0, axis=0)",
    "question_id": "3877491-16",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.delete"
    ],
    "clean_cmd": "x = numpy.delete(x, , axis=)",
    "canonical_cmd": "VAR_STR = numpy.delete(VAR_STR, 0, axis=0)"
  },
  {
    "nl": "request URL `url` using http header `{'referer': my_referer}`",
    "cmd": "requests.get(url, headers={'referer': my_referer})",
    "question_id": "20837786-15",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.webbrowser#webbrowser.get"
    ],
    "clean_cmd": "requests.get(url, headers={: my_referer})",
    "canonical_cmd": "requests.get(VAR_STR, headers={VAR_STR})"
  },
  {
    "nl": "get the widget which has currently the focus in tkinter instance `window2`",
    "cmd": "print(('focus object class:', window2.focus_get().__class__))",
    "question_id": "27867754-13",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "print((, window2.focus_get().__class__))",
    "canonical_cmd": "print(('focus object class:', VAR_STR.focus_get().__class__))"
  },
  {
    "nl": "What is the best way to sort list with custom sorting parameters in Python?",
    "cmd": "li1.sort(key=lambda x: not x.startswith('b.'))",
    "question_id": "7996940-71",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.startswith",
      "python.library.stdtypes#list.sort"
    ],
    "clean_cmd": "li1.sort(key= x:  x.startswith())",
    "canonical_cmd": "li1.sort(key=lambda x: not x.startswith('b.'))"
  },
  {
    "nl": "get user input using message 'Enter name here: ' and insert it to the first placeholder in string 'Hello, {0}, how do you do?'",
    "cmd": "print('Hello, {0}, how do you do?'.format(input('Enter name here: ')))",
    "question_id": "7173850-2",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#input",
      "python.library.functions#format"
    ],
    "clean_cmd": "print(.format(input()))",
    "canonical_cmd": "print('VAR_STR'.format(input('Enter name here: ')))"
  },
  {
    "nl": "Get all texts and tags from a tag `strong` from etree tag `some_tag` using lxml",
    "cmd": "print(etree.tostring(some_tag.find('strong')))",
    "question_id": "10258584-10",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.xml.etree.elementtree#xml.etree.ElementTree.tostring",
      "python.library.xml.etree.elementtree#xml.etree.ElementTree.Element.find"
    ],
    "clean_cmd": "print(etree.tostring(some_tag.find()))",
    "canonical_cmd": "print(etree.tostring(VAR_STR.find('VAR_STR')))"
  },
  {
    "nl": "get modified time of file `file`",
    "cmd": "time.ctime(os.path.getmtime(file))",
    "question_id": "237079-99",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os.path#os.path.getmtime",
      "python.library.time#time.ctime"
    ],
    "clean_cmd": "time.ctime(os.path.getmtime(file))",
    "canonical_cmd": "time.ctime(os.path.getmtime(VAR_STR))"
  },
  {
    "nl": "get creation time of file `file`",
    "cmd": "time.ctime(os.path.getctime(file))",
    "question_id": "237079-26",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os.path#os.path.getctime",
      "python.library.time#time.ctime"
    ],
    "clean_cmd": "time.ctime(os.path.getctime(file))",
    "canonical_cmd": "time.ctime(os.path.getctime(VAR_STR))"
  },
  {
    "nl": "get modification time of file `filename`",
    "cmd": "t = os.path.getmtime(filename)",
    "question_id": "237079-17",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os.path#os.path.getmtime"
    ],
    "clean_cmd": "t = os.path.getmtime(filename)",
    "canonical_cmd": "t = os.path.getmtime(VAR_STR)"
  },
  {
    "nl": "get modification time of file `path`",
    "cmd": "os.path.getmtime(path)",
    "question_id": "237079-40",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os.path#os.path.getmtime"
    ],
    "clean_cmd": "os.path.getmtime(path)",
    "canonical_cmd": "os.VAR_STR.getmtime(VAR_STR)"
  },
  {
    "nl": "get modified time of file `file`",
    "cmd": "print(('last modified: %s' % time.ctime(os.path.getmtime(file))))",
    "question_id": "237079-48",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os.path#os.path.getmtime",
      "python.library.time#time.ctime"
    ],
    "clean_cmd": "print(( % time.ctime(os.path.getmtime(file))))",
    "canonical_cmd": "print('last modified: %s' % time.ctime(os.path.getmtime(VAR_STR)))"
  },
  {
    "nl": "get the creation time of file `file`",
    "cmd": "print(('created: %s' % time.ctime(os.path.getctime(file))))",
    "question_id": "237079-32",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os.path#os.path.getctime",
      "python.library.time#time.ctime"
    ],
    "clean_cmd": "print(( % time.ctime(os.path.getctime(file))))",
    "canonical_cmd": "print('created: %s' % time.ctime(os.path.getctime(VAR_STR)))"
  },
  {
    "nl": "get the creation time of file `path_to_file`",
    "cmd": "return os.path.getctime(path_to_file)",
    "question_id": "237079-78",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os.path#os.path.getctime"
    ],
    "clean_cmd": " os.path.getctime(path_to_file)",
    "canonical_cmd": "return os.path.getctime(VAR_STR)"
  },
  {
    "nl": "generate a string of numbers separated by comma which is divisible by `4` with remainder `1` or `2`.",
    "cmd": "\"\"\",\"\"\".join(str(i) for i in range(100) if i % 4 in (1, 2))",
    "question_id": "11064917-93",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#range",
      "python.library.stdtypes#str",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": ".join(str(i)  i  range()  i %   (, ))",
    "canonical_cmd": "\"\"\",\"\"\".join(str(i) for i in range(100) if i % 4 in (1, 2))"
  },
  {
    "nl": "remove first directory from path '/First/Second/Third/Fourth/Fifth'",
    "cmd": "os.path.join(*x.split(os.path.sep)[2:])",
    "question_id": "26724275-54",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os.path#os.path.join",
      "python.library.os.path#os.path.split"
    ],
    "clean_cmd": "os.path.join(*x.split(os.path.sep)[:])",
    "canonical_cmd": "os.path.join(*x.split(os.path.sep)[2:])"
  },
  {
    "nl": "check if `x` is an integer",
    "cmd": "isinstance(x, int)",
    "question_id": "3501382-87",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#isinstance"
    ],
    "clean_cmd": "isinstance(x, int)",
    "canonical_cmd": "isinstance(VAR_STR, int)"
  },
  {
    "nl": "check if `x` is an integer",
    "cmd": "(type(x) == int)",
    "question_id": "3501382-37",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#type"
    ],
    "clean_cmd": "(type(x) == int)",
    "canonical_cmd": "type(VAR_STR) == int"
  },
  {
    "nl": "convert nested list of lists `[['tom', 'cat'], ['jerry', 'mouse'], ['spark', 'dog']]` into a list of tuples",
    "cmd": "list(map(tuple, [['tom', 'cat'], ['jerry', 'mouse'], ['spark', 'dog']]))",
    "question_id": "18938276-57",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#map",
      "python.library.functions#list"
    ],
    "clean_cmd": "list(map(tuple, [[, ], [, ], [, ]]))",
    "canonical_cmd": "list(map(tuple, [VAR_STR]))"
  },
  {
    "nl": "print a digit `your_number` with exactly 2 digits after decimal",
    "cmd": "print('{0:.2f}'.format(your_number))",
    "question_id": "5229425-29",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#format"
    ],
    "clean_cmd": "print(.format(your_number))",
    "canonical_cmd": "print('{0:.2f}'.format(VAR_STR))"
  },
  {
    "nl": "get biggest 3 values from each column of the pandas dataframe `data`",
    "cmd": "data.apply(lambda x: sorted(x, 3))",
    "question_id": "20477190-38",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "pandas.reference.api.pandas.series.apply"
    ],
    "clean_cmd": "data.apply( x: sorted(x, ))",
    "canonical_cmd": "VAR_STR.apply(lambda x: sorted(x, 3))"
  },
  {
    "nl": "replace periods `.` that are not followed by periods or spaces with a period and a space `. `",
    "cmd": "re.sub('\\\\.(?=[^ .])', '. ', para)",
    "question_id": "42731970-80",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "re.sub(, , para)",
    "canonical_cmd": "re.sub('\\\\.(?=[^ .])', '. ', para)"
  },
  {
    "nl": "append values `[3, 4]` to a set `a`",
    "cmd": "a.update([3, 4])",
    "question_id": "3392354-94",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.turtle#turtle.update"
    ],
    "clean_cmd": "a.update([, ])",
    "canonical_cmd": "VAR_STR.update([VAR_STR])"
  },
  {
    "nl": "convert hex string `s` to decimal",
    "cmd": "i = int(s, 16)",
    "question_id": "9210525-51",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#int"
    ],
    "clean_cmd": "i = int(s, )",
    "canonical_cmd": "i = int(VAR_STR, 16)"
  },
  {
    "nl": "convert hex string \"0xff\" to decimal",
    "cmd": "int('0xff', 16)",
    "question_id": "9210525-67",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#int"
    ],
    "clean_cmd": "int(, )",
    "canonical_cmd": "int('VAR_STR', 16)"
  },
  {
    "nl": "convert hex string \"FFFF\" to decimal",
    "cmd": "int('FFFF', 16)",
    "question_id": "9210525-47",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#int"
    ],
    "clean_cmd": "int(, )",
    "canonical_cmd": "int('VAR_STR', 16)"
  },
  {
    "nl": "convert hex string '0xdeadbeef' to decimal",
    "cmd": "ast.literal_eval('0xdeadbeef')",
    "question_id": "9210525-85",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.ast#ast.literal_eval"
    ],
    "clean_cmd": "ast.literal_eval()",
    "canonical_cmd": "ast.literal_eval('VAR_STR')"
  },
  {
    "nl": "convert hex string 'deadbeef' to decimal",
    "cmd": "int('deadbeef', 16)",
    "question_id": "9210525-67",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#int"
    ],
    "clean_cmd": "int(, )",
    "canonical_cmd": "int('VAR_STR', 16)"
  },
  {
    "nl": "Strip all non-ASCII characters from a unicode string, `\\xa3\\u20ac\\xa3\\u20ac`",
    "cmd": "print(set(re.sub('[\\x00-\\x7f]', '', '\\xa3\\u20ac\\xa3\\u20ac')))",
    "question_id": "16866261-93",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub",
      "python.library.stdtypes#set"
    ],
    "clean_cmd": "print(set(re.sub(, , )))",
    "canonical_cmd": "print(set(re.sub('[\\x00-\\x7f]', '', 'VAR_STR')))"
  },
  {
    "nl": "Get all non-ascii characters in a unicode string `\\xa3100 is worth more than \\u20ac100`",
    "cmd": "print(re.sub('[\\x00-\\x7f]', '', '\\xa3100 is worth more than \\u20ac100'))",
    "question_id": "16866261-39",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "print(re.sub(, , ))",
    "canonical_cmd": "print(re.sub('[\\x00-\\x7f]', '', 'VAR_STR'))"
  },
  {
    "nl": "removing duplicate characters from a string variable \"foo\"",
    "cmd": "\"\"\"\"\"\".join(set(foo))",
    "question_id": "9841303-32",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#set",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": ".join(set(foo))",
    "canonical_cmd": "\"\"\"\"\"\".join(set(VAR_STR))"
  },
  {
    "nl": "convert a list of characters `['a', 'b', 'c', 'd']` into a string",
    "cmd": "\"\"\"\"\"\".join(['a', 'b', 'c', 'd'])",
    "question_id": "4481724-87",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": ".join([, , , ])",
    "canonical_cmd": "\"\"\"\"\"\".join([VAR_STR])"
  },
  {
    "nl": "join elements of each tuple in list `a` into one string",
    "cmd": "[''.join(x) for x in a]",
    "question_id": "35015693-21",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": "[.join(x)  x  a]",
    "canonical_cmd": "[''.join(x) for x in VAR_STR]"
  },
  {
    "nl": "join items of each tuple in list of tuples `a` into a list of strings",
    "cmd": "list(map(''.join, a))",
    "question_id": "35015693-23",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#map",
      "python.library.functions#list"
    ],
    "clean_cmd": "list(map(.join, a))",
    "canonical_cmd": "list(map(''.join, VAR_STR))"
  },
  {
    "nl": "split string \"jvm.args= -Dappdynamics.com=true, -Dsomeotherparam=false,\" on the first occurrence of delimiter '='",
    "cmd": "\"\"\"jvm.args= -Dappdynamics.com=true, -Dsomeotherparam=false,\"\"\".split('=', 1)",
    "question_id": "11009155-40",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": ".split(, )",
    "canonical_cmd": "\"\"\"VAR_STR\"\"\".split('VAR_STR', 1)"
  },
  {
    "nl": "get the middle two characters of a string 'state' in a pandas dataframe `df`",
    "cmd": "df['state'].apply(lambda x: x[len(x) / 2 - 1:len(x) / 2 + 1])",
    "question_id": "20970279-77",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len",
      "pandas.reference.api.pandas.series.apply"
    ],
    "clean_cmd": "df[].apply( x: x[len(x) /  - :len(x) /  + ])",
    "canonical_cmd": "VAR_STR['VAR_STR'].apply(lambda x: x[len(x) / 2 - 1:len(x) / 2 + 1])"
  },
  {
    "nl": "join multiple dataframes `d1`, `d2`, and `d3` on column 'name'",
    "cmd": "df1.merge(df2, on='name').merge(df3, on='name')",
    "question_id": "23668427-2",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.merge"
    ],
    "clean_cmd": "df1.merge(df2, on=).merge(df3, on=)",
    "canonical_cmd": "df1.merge(df2, on='VAR_STR').merge(df3, on='VAR_STR')"
  },
  {
    "nl": "How to sort a dictionary in python by value when the value is a list and I want to sort it by the first index of that list",
    "cmd": "sorted(list(data.items()), key=lambda x: x[1][0])",
    "question_id": "42765620-65",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.functions#list",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "sorted(list(data.items()), key= x: x[][])",
    "canonical_cmd": "sorted(list(data.items()), key=lambda x: x[1][0])"
  },
  {
    "nl": "sort query set by number of characters in a field `length` in django model `MyModel`",
    "cmd": "MyModel.objects.extra(select={'length': 'Length(name)'}).order_by('length')",
    "question_id": "12804801-42",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.zipfile#zipfile.ZipInfo.extra"
    ],
    "clean_cmd": "MyModel.objects.extra(select={: }).order_by()",
    "canonical_cmd": "VAR_STR.objects.extra(select={'VAR_STR': 'Length(name)'}).order_by('VAR_STR')"
  },
  {
    "nl": "sort column `m` in panda dataframe `df`",
    "cmd": "df.sort('m')",
    "question_id": "13838405-78",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.index.sort"
    ],
    "clean_cmd": "df.sort()",
    "canonical_cmd": "VAR_STR.sort('VAR_STR')"
  },
  {
    "nl": "read a ragged csv file `D:/Temp/tt.csv` using `names` parameter in pandas",
    "cmd": "pd.read_csv('D:/Temp/tt.csv', names=list('abcdef'))",
    "question_id": "20154303-58",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.read_csv",
      "python.library.functions#list"
    ],
    "clean_cmd": "pd.read_csv(, names=list())",
    "canonical_cmd": "pd.read_csv('VAR_STR', VAR_STR=list('abcdef'))"
  },
  {
    "nl": "Concatenating two one-dimensional NumPy arrays 'a' and 'b'.",
    "cmd": "numpy.concatenate([a, b])",
    "question_id": "9236926-97",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.concatenate"
    ],
    "clean_cmd": "numpy.concatenate([a, b])",
    "canonical_cmd": "numpy.concatenate([VAR_STR, VAR_STR])"
  },
  {
    "nl": "assign float 9.8 to variable `GRAVITY`",
    "cmd": "GRAVITY = 9.8",
    "question_id": "18224991-15",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "GRAVITY = 9.8",
    "canonical_cmd": "VAR_STR = 9.8"
  },
  {
    "nl": "create a regular expression object with the pattern '\\xe2\\x80\\x93'",
    "cmd": "re.compile('\\xe2\\x80\\x93')",
    "question_id": "372102-55",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.compile"
    ],
    "clean_cmd": "re.compile()",
    "canonical_cmd": "re.compile('VAR_STR')"
  },
  {
    "nl": "SQLAlchemy select records of columns of table `my_table` in addition to current date column",
    "cmd": "print(select([my_table, func.current_date()]).execute())",
    "question_id": "4552380-10",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.select#select.select",
      "python.library.msilib#msilib.View.Execute"
    ],
    "clean_cmd": "print(select([my_table, func.current_date()]).execute())",
    "canonical_cmd": "print(select([VAR_STR, func.current_date()]).execute())"
  },
  {
    "nl": "change NaN values in dataframe `df` using preceding values in the frame",
    "cmd": "df.fillna(method='ffill', inplace=True)",
    "question_id": "38457059-97",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.fillna"
    ],
    "clean_cmd": "df.fillna(method=, inplace=True)",
    "canonical_cmd": "VAR_STR.fillna(method='ffill', inplace=True)"
  },
  {
    "nl": "counting the number of true booleans in a python list `[True, True, False, False, False, True]`",
    "cmd": "sum([True, True, False, False, False, True])",
    "question_id": "12765833-84",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sum"
    ],
    "clean_cmd": "sum([True, True, False, False, False, True])",
    "canonical_cmd": "sum([VAR_STR])"
  },
  {
    "nl": "replacing  '\\u200b' with '*' in a string  using regular expressions",
    "cmd": "'used\\u200b'.replace('\\u200b', '*')",
    "question_id": "31522361-37",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.replace"
    ],
    "clean_cmd": ".replace(, )",
    "canonical_cmd": "\"\"\"used\u200b\"\"\".replace('VAR_STR', 'VAR_STR')"
  },
  {
    "nl": "display a grayscale image from array of pixels `imageArray`",
    "cmd": "imshow(imageArray, cmap='Greys_r')",
    "question_id": "14111705-53",
    "cmd_name": "conala",
    "oracle_man": [
      "matplotlib._as_gen.matplotlib.pyplot.imshow"
    ],
    "clean_cmd": "imshow(imageArray, cmap=)",
    "canonical_cmd": "imshow(VAR_STR, cmap='Greys_r')"
  },
  {
    "nl": "unpivot first 2 columns into new columns 'year' and 'value' from a pandas dataframe `x`",
    "cmd": "pd.melt(x, id_vars=['farm', 'fruit'], var_name='year', value_name='value')",
    "question_id": "23354124-66",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.melt"
    ],
    "clean_cmd": "pd.melt(x, id_vars=[, ], var_name=, value_name=)",
    "canonical_cmd": "pd.melt(VAR_STR, id_vars=['farm', 'fruit'], var_name='VAR_STR', value_name='VAR_STR')"
  },
  {
    "nl": "add unicode string '1' to UTF-8 decoded string '\\xc2\\xa3'",
    "cmd": "print('\\xc2\\xa3'.decode('utf8') + '1')",
    "question_id": "31771758-13",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#bytearray.decode"
    ],
    "clean_cmd": "print(.decode() + )",
    "canonical_cmd": "print('VAR_STR'.decode('utf8') + 'VAR_STR')"
  },
  {
    "nl": "convert unicode string `s` into string literals",
    "cmd": "print(s.encode('unicode_escape'))",
    "question_id": "20774910-90",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.encode"
    ],
    "clean_cmd": "print(s.encode())",
    "canonical_cmd": "print(VAR_STR.encode('unicode_escape'))"
  },
  {
    "nl": "Initialize a list of empty lists `x` of size 3",
    "cmd": "x = [[] for i in range(3)]",
    "question_id": "12791501-50",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#range"
    ],
    "clean_cmd": "x = [[]  i  range()]",
    "canonical_cmd": "VAR_STR = [[] for i in range(3)]"
  },
  {
    "nl": "count the number of items in a generator/iterator `it`",
    "cmd": "sum(1 for i in it)",
    "question_id": "5384570-34",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sum"
    ],
    "clean_cmd": "sum(  i  it)",
    "canonical_cmd": "sum(1 for i in VAR_STR)"
  },
  {
    "nl": "convert nested list 'Cards' into a flat list",
    "cmd": "[a for c in Cards for b in c for a in b]",
    "question_id": "16734590-48",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[a  c  Cards  b  c  a  b]",
    "canonical_cmd": "[a for c in VAR_STR for b in c for a in b]"
  },
  {
    "nl": "remove specific elements in a numpy array `a`",
    "cmd": "numpy.delete(a, index)",
    "question_id": "10996140-0",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.delete"
    ],
    "clean_cmd": "numpy.delete(a, index)",
    "canonical_cmd": "numpy.delete(VAR_STR, index)"
  },
  {
    "nl": "get a list of all items in list `j` with values greater than `5`",
    "cmd": "[x for x in j if x >= 5]",
    "question_id": "4587915-52",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[x  x  j  x &gt;= ]",
    "canonical_cmd": "[x for x in VAR_STR if x >= 5]"
  },
  {
    "nl": "merge dictionaries form array `dicts` in a single expression",
    "cmd": "dict((k, v) for d in dicts for k, v in list(d.items()))",
    "question_id": "38987-80",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict",
      "python.library.functions#list",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "dict((k, v)  d  dicts  k, v  list(d.items()))",
    "canonical_cmd": "dict((k, v) for d in VAR_STR for k, v in list(d.items()))"
  },
  {
    "nl": "decode url-encoded string `some_string` to its character equivalents",
    "cmd": "urllib.parse.unquote(urllib.parse.unquote(some_string))",
    "question_id": "28431359-82",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.parse#urllib.parse.unquote"
    ],
    "clean_cmd": "urllib.parse.unquote(urllib.parse.unquote(some_string))",
    "canonical_cmd": "urllib.parse.unquote(urllib.parse.unquote(VAR_STR))"
  },
  {
    "nl": "decode a double URL encoded string \r\n'FireShot3%2B%25282%2529.png' to\r\n'FireShot3+(2).png'",
    "cmd": "urllib.parse.unquote(urllib.parse.unquote('FireShot3%2B%25282%2529.png'))",
    "question_id": "28431359-67",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.parse#urllib.parse.unquote"
    ],
    "clean_cmd": "urllib.parse.unquote(urllib.parse.unquote())",
    "canonical_cmd": "urllib.parse.unquote(urllib.parse.unquote('VAR_STR'))"
  },
  {
    "nl": "join list of numbers `[1,2,3,4] ` to string of numbers.",
    "cmd": "\"\"\"\"\"\".join([1, 2, 3, 4])",
    "question_id": "2597932-72",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": ".join([, , , ])",
    "canonical_cmd": "\"\"\"\"\"\".join([1, 2, 3, 4])"
  },
  {
    "nl": "start a new thread for `myfunction` with parameters 'MyStringHere' and 1",
    "cmd": "thread.start_new_thread(myfunction, ('MyStringHere', 1))",
    "question_id": "849674-71",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library._thread#_thread.start_new_thread"
    ],
    "clean_cmd": "thread.start_new_thread(myfunction, (, ))",
    "canonical_cmd": "thread.start_new_thread(VAR_STR, ('VAR_STR', 1))"
  },
  {
    "nl": "start a new thread for `myfunction` with parameters 'MyStringHere' and 1",
    "cmd": "thread.start_new_thread(myfunction, ('MyStringHere', 1))",
    "question_id": "849674-100",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library._thread#_thread.start_new_thread"
    ],
    "clean_cmd": "thread.start_new_thread(myfunction, (, ))",
    "canonical_cmd": "thread.start_new_thread(VAR_STR, ('VAR_STR', 1))"
  },
  {
    "nl": "python sum of ascii values of all characters in a string `string`",
    "cmd": "sum(map(ord, string))",
    "question_id": "12492137-8",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#map",
      "python.library.functions#sum"
    ],
    "clean_cmd": "sum(map(ord, string))",
    "canonical_cmd": "sum(map(ord, VAR_STR))"
  },
  {
    "nl": "remove first word in string `s`",
    "cmd": "s.split(' ', 1)[1]",
    "question_id": "12883376-74",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "s.split(, )[]",
    "canonical_cmd": "VAR_STR.split(' ', 1)[1]"
  },
  {
    "nl": "open a file \"$file\" under Unix",
    "cmd": "os.system('start \"$file\"')",
    "question_id": "1679798-8",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.system"
    ],
    "clean_cmd": "os.system()",
    "canonical_cmd": "os.system('start \"$file\"')"
  },
  {
    "nl": "create variable key/value pairs with argparse",
    "cmd": "parser.add_argument('--conf', nargs=2, action='append')",
    "question_id": "27146262-46",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.argparse#argparse.ArgumentParser.add_argument"
    ],
    "clean_cmd": "parser.add_argument(, nargs=, action=)",
    "canonical_cmd": "parser.add_argument('--conf', nargs=2, action='append')"
  },
  {
    "nl": "convert the zip of range `(1, 5)` and range `(7, 11)` into a dictionary",
    "cmd": "dict(zip(list(range(1, 5)), list(range(7, 11))))",
    "question_id": "18789262-62",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#range",
      "python.library.functions#list",
      "python.library.functions#zip",
      "python.library.stdtypes#dict"
    ],
    "clean_cmd": "dict(zip(list(range(, )), list(range(, ))))",
    "canonical_cmd": "dict(zip(list(range(VAR_STR)), list(range(VAR_STR))))"
  },
  {
    "nl": "create a list of integers between 2 values `11` and `17`",
    "cmd": "list(range(11, 17))",
    "question_id": "18265935-5",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#range",
      "python.library.functions#list"
    ],
    "clean_cmd": "list(range(, ))",
    "canonical_cmd": "list(range(11, 17))"
  },
  {
    "nl": "argparse associate zero or more arguments with flag 'file'",
    "cmd": "parser.add_argument('file', nargs='*')",
    "question_id": "26727314-83",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.argparse#argparse.ArgumentParser.add_argument"
    ],
    "clean_cmd": "parser.add_argument(, nargs=)",
    "canonical_cmd": "parser.add_argument('VAR_STR', nargs='*')"
  },
  {
    "nl": "print a character that has unicode value `\\u25b2`",
    "cmd": "print('\\u25b2'.encode('utf-8'))",
    "question_id": "16658068-0",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.encode"
    ],
    "clean_cmd": "print(.encode())",
    "canonical_cmd": "print('VAR_STR'.encode('utf-8'))"
  },
  {
    "nl": "set every two-stride far element to -1 starting from second element in array `a`",
    "cmd": "a[1::2] = -1",
    "question_id": "7154739-87",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "a[::] = -",
    "canonical_cmd": "VAR_STR[1::2] = -1"
  },
  {
    "nl": "Get an item from a list of dictionary `lst` which has maximum value in the key `score` using lambda function",
    "cmd": "max(lst, key=lambda x: x['score'])",
    "question_id": "6561653-2",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#max"
    ],
    "clean_cmd": "max(lst, key= x: x[])",
    "canonical_cmd": "max(VAR_STR, key=lambda x: x['VAR_STR'])"
  },
  {
    "nl": "SQLAlchemy count the number of rows with distinct values in column `name` of table `Tag`",
    "cmd": "session.query(Tag).distinct(Tag.name).group_by(Tag.name).count()",
    "question_id": "17223174-79",
    "cmd_name": "conala",
    "oracle_man": [
      "django.ref.models.querysets#django.db.models.Count.distinct",
      "python.library.stdtypes#str.count"
    ],
    "clean_cmd": "session.query(Tag).distinct(Tag.name).group_by(Tag.name).count()",
    "canonical_cmd": "session.query(VAR_STR).distinct(VAR_STR.VAR_STR).group_by(VAR_STR.VAR_STR).count()"
  },
  {
    "nl": "get a list of items form nested list `li` where third element of each item contains string 'ar'",
    "cmd": "[x for x in li if 'ar' in x[2]]",
    "question_id": "6889785-68",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[x  x  li    x[]]",
    "canonical_cmd": "[x for x in VAR_STR if 'VAR_STR' in x[2]]"
  },
  {
    "nl": "get index of character 'b' in list '['a', 'b']'",
    "cmd": "['a', 'b'].index('b')",
    "question_id": "3847472-86",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.index"
    ],
    "clean_cmd": "[, ].index()",
    "canonical_cmd": "['a', 'VAR_STR'].index('VAR_STR')"
  },
  {
    "nl": "remove null columns in a dataframe `df`",
    "cmd": "df = df.dropna(axis=1, how='all')",
    "question_id": "10857924-28",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.dropna"
    ],
    "clean_cmd": "df = df.dropna(axis=, how=)",
    "canonical_cmd": "VAR_STR = VAR_STR.dropna(axis=1, how='all')"
  },
  {
    "nl": "Log info message 'Log message' with attributes `{'app_name': 'myapp'}`",
    "cmd": "logging.info('Log message', extra={'app_name': 'myapp'})",
    "question_id": "17558552-75",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.logging#logging.info"
    ],
    "clean_cmd": "logging.info(, extra={: })",
    "canonical_cmd": "logging.info('VAR_STR', extra={VAR_STR})"
  },
  {
    "nl": "Merge column  'word' in dataframe `df2` with column 'word' on dataframe `df1`",
    "cmd": "df1.merge(df2, how='left', on='word')",
    "question_id": "42060144-97",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.merge"
    ],
    "clean_cmd": "df1.merge(df2, how=, on=)",
    "canonical_cmd": "VAR_STR.merge(VAR_STR, how='left', on='VAR_STR')"
  },
  {
    "nl": "get the maximum of 'salary' and 'bonus' values in a dictionary",
    "cmd": "print(max(d, key=lambda x: (d[x]['salary'], d[x]['bonus'])))",
    "question_id": "42211584-15",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#max"
    ],
    "clean_cmd": "print(max(d, key= x: (d[x][], d[x][])))",
    "canonical_cmd": "print(max(d, key=lambda x: (d[x]['VAR_STR'], d[x]['VAR_STR'])))"
  },
  {
    "nl": "match blank lines in `s` with regular expressions",
    "cmd": "re.split('\\n\\\\s*\\n', s)",
    "question_id": "1197600-71",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.split"
    ],
    "clean_cmd": "re.split(, s)",
    "canonical_cmd": "re.split('\\n\\\\s*\\n', VAR_STR)"
  },
  {
    "nl": "replace carriage return in string `somestring` with empty string ''",
    "cmd": "somestring.replace('\\\\r', '')",
    "question_id": "11755208-97",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.replace"
    ],
    "clean_cmd": "somestring.replace(, )",
    "canonical_cmd": "VAR_STR.replace('\\\\r', 'VAR_STR')"
  },
  {
    "nl": "print string \"ABC\" as hex literal",
    "cmd": "\"\"\"\u0001ABC\"\"\".encode('hex')",
    "question_id": "21947035-29",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.encode"
    ],
    "clean_cmd": ".encode()",
    "canonical_cmd": "\"\"\"\u0001ABC\"\"\".encode('hex')"
  },
  {
    "nl": "sort a list `L` by number after second '.'",
    "cmd": "print(sorted(L, key=lambda x: int(x.split('.')[2])))",
    "question_id": "21361604-32",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.functions#int",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "print(sorted(L, key= x: int(x.split()[])))",
    "canonical_cmd": "print(sorted(VAR_STR, key=lambda x: int(x.split('VAR_STR')[2])))"
  },
  {
    "nl": "Filter duplicate entries w.r.t. value in 'id' from a list of dictionaries 'L'",
    "cmd": "list(dict((x['id'], x) for x in L).values())",
    "question_id": "11114358-38",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict",
      "python.library.functions#list",
      "python.library.stdtypes#dict.values"
    ],
    "clean_cmd": "list(dict((x[], x)  x  L).values())",
    "canonical_cmd": "list(dict((x['VAR_STR'], x) for x in VAR_STR).values())"
  },
  {
    "nl": "create list of 'size' empty strings",
    "cmd": "strs = ['' for x in range(size)]",
    "question_id": "6376886-84",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#range"
    ],
    "clean_cmd": "strs = [  x  range(size)]",
    "canonical_cmd": "strs = ['' for x in range(VAR_STR)]"
  },
  {
    "nl": "Copy list `old_list` and name it `new_list`",
    "cmd": "new_list = [x[:] for x in old_list]",
    "question_id": "28684154-81",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "new_list = [x[:]  x  old_list]",
    "canonical_cmd": "VAR_STR = [x[:] for x in VAR_STR]"
  },
  {
    "nl": "append string 'str' at the beginning of each value in column 'col' of dataframe `df`",
    "cmd": "df['col'] = 'str' + df['col'].astype(str)",
    "question_id": "20025882-78",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.astype"
    ],
    "clean_cmd": "df[] =  + df[].astype(str)",
    "canonical_cmd": "VAR_STR['VAR_STR'] = 'VAR_STR' + VAR_STR['VAR_STR'].astype(VAR_STR)"
  },
  {
    "nl": "BeautifulSoup find a tag whose id ends with string 'para'",
    "cmd": "soup.findAll(id=re.compile('para$'))",
    "question_id": "11924135-42",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.compile",
      "python.library.re#re.findall"
    ],
    "clean_cmd": "soup.findAll(id=re.compile())",
    "canonical_cmd": "soup.findAll(id=re.compile('para$'))"
  },
  {
    "nl": "select `div` tags whose `id`s begin with `value_xxx_c_1_f_8_a_`",
    "cmd": "soup.select('div[id^=\"value_xxx_c_1_f_8_a_\"]')",
    "question_id": "11924135-85",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.select#select.select"
    ],
    "clean_cmd": "soup.select()",
    "canonical_cmd": "soup.select('div[id^=\"value_xxx_c_1_f_8_a_\"]')"
  },
  {
    "nl": "match string 'this is my string' with regex '\\\\b(this|string)\\\\b'\r\nthen replace it with regex '<markup>\\\\1</markup>'",
    "cmd": "re.sub('\\\\b(this|string)\\\\b', '<markup>\\\\1</markup>', 'this is my string')",
    "question_id": "4338032-43",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "re.sub(, , )",
    "canonical_cmd": "re.sub('VAR_STR', 'VAR_STR', 'VAR_STR')"
  },
  {
    "nl": "replace backslashes in string `result` with empty string ''",
    "cmd": "result = result.replace('\\\\', '')",
    "question_id": "3160752-54",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.replace"
    ],
    "clean_cmd": "result = result.replace(, )",
    "canonical_cmd": "VAR_STR = VAR_STR.replace('\\\\', 'VAR_STR')"
  },
  {
    "nl": "remove backslashes from string `result`",
    "cmd": "result.replace('\\\\', '')",
    "question_id": "3160752-60",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.replace"
    ],
    "clean_cmd": "result.replace(, )",
    "canonical_cmd": "VAR_STR.replace('\\\\', '')"
  },
  {
    "nl": "separate each character in string `s` by '-'",
    "cmd": "re.sub('(.)(?=.)', '\\\\1-', s)",
    "question_id": "27457970-81",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "re.sub(, , s)",
    "canonical_cmd": "re.sub('(.)(?=.)', '\\\\1-', VAR_STR)"
  },
  {
    "nl": "concatenate '-' in between characters of string `str`",
    "cmd": "re.sub('(?<=.)(?=.)', '-', str)",
    "question_id": "27457970-4",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "re.sub(, , str)",
    "canonical_cmd": "re.sub('(?<=.)(?=.)', 'VAR_STR', VAR_STR)"
  },
  {
    "nl": "cartesian product of `x` and `y` array points into single array of 2d points",
    "cmd": "numpy.dstack(numpy.meshgrid(x, y)).reshape(-1, 2)",
    "question_id": "11144513-58",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.meshgrid",
      "numpy.reference.generated.numpy.dstack",
      "numpy.reference.generated.numpy.reshape"
    ],
    "clean_cmd": "numpy.dstack(numpy.meshgrid(x, y)).reshape(-, )",
    "canonical_cmd": "numpy.dstack(numpy.meshgrid(VAR_STR, VAR_STR)).reshape(-1, 2)"
  },
  {
    "nl": "truncate string `s` up to character ':'",
    "cmd": "s.split(':', 1)[1]",
    "question_id": "436599-41",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "s.split(, )[]",
    "canonical_cmd": "VAR_STR.split('VAR_STR', 1)[1]"
  },
  {
    "nl": "remove index 2 element from a list `my_list`",
    "cmd": "my_list.pop(2)",
    "question_id": "9754729-26",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#frozenset.pop"
    ],
    "clean_cmd": "my_list.pop()",
    "canonical_cmd": "VAR_STR.pop(2)"
  },
  {
    "nl": "Encode each value to 'UTF8' in the list `EmployeeList`",
    "cmd": "[x.encode('UTF8') for x in EmployeeList]",
    "question_id": "18272066-92",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.encode"
    ],
    "clean_cmd": "[x.encode()  x  EmployeeList]",
    "canonical_cmd": "[x.encode('VAR_STR') for x in VAR_STR]"
  },
  {
    "nl": "get the sum of each second value from a list of tuple `structure`",
    "cmd": "sum(x[1] for x in structure)",
    "question_id": "12218112-25",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sum"
    ],
    "clean_cmd": "sum(x[]  x  structure)",
    "canonical_cmd": "sum(x[1] for x in VAR_STR)"
  },
  {
    "nl": "send data 'HTTP/1.0 200 OK\\r\\n\\r\\n' to socket `connection`",
    "cmd": "connection.send('HTTP/1.0 200 established\\r\\n\\r\\n')",
    "question_id": "40851413-100",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.multiprocessing#multiprocessing.connection.Connection.send"
    ],
    "clean_cmd": "connection.send()",
    "canonical_cmd": "VAR_STR.send('HTTP/1.0 200 established\\r\\n\\r\\n')"
  },
  {
    "nl": "send data 'HTTP/1.0 200 OK\\r\\n\\r\\n' to socket `connection`",
    "cmd": "connection.send('HTTP/1.0 200 OK\\r\\n\\r\\n')",
    "question_id": "40851413-34",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.multiprocessing#multiprocessing.connection.Connection.send"
    ],
    "clean_cmd": "connection.send()",
    "canonical_cmd": "VAR_STR.send('VAR_STR')"
  },
  {
    "nl": "create a slice object using string `string_slice`",
    "cmd": "slice(*[(int(i.strip()) if i else None) for i in string_slice.split(':')])",
    "question_id": "13324554-1",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#slice",
      "python.library.functions#int",
      "python.library.stdtypes#str.strip",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "slice(*[(int(i.strip())  i  None)  i  string_slice.split()])",
    "canonical_cmd": "slice(*[(int(i.strip()) if i else None) for i in VAR_STR.split(':')])"
  },
  {
    "nl": "append a pandas series `b` to the series `a` and get a continuous index",
    "cmd": "a.append(b).reset_index(drop=True)",
    "question_id": "20400135-83",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.series.reset_index",
      "numpy.reference.generated.numpy.append"
    ],
    "clean_cmd": "a.append(b).reset_index(drop=True)",
    "canonical_cmd": "VAR_STR.append(VAR_STR).reset_index(drop=True)"
  },
  {
    "nl": "simple way to append a pandas series `a` and `b` with same index",
    "cmd": "pd.concat([a, b], ignore_index=True)",
    "question_id": "20400135-52",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.concat"
    ],
    "clean_cmd": "pd.concat([a, b], ignore_index=True)",
    "canonical_cmd": "pd.concat([VAR_STR, VAR_STR], ignore_index=True)"
  },
  {
    "nl": "split unicode string \"\u0440\u0430\u0437 \u0434\u0432\u0430 \u0442\u0440\u0438\" into words",
    "cmd": "'\\u0440\\u0430\\u0437 \\u0434\\u0432\\u0430 \\u0442\\u0440\\u0438'.split()",
    "question_id": "7286879-67",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": ".split()",
    "canonical_cmd": "\"\"\"\u0440\u0430\u0437 \u0434\u0432\u0430 \u0442\u0440\u0438\"\"\".split()"
  },
  {
    "nl": "change flask security register url to `/create_account`",
    "cmd": "app.config['SECURITY_REGISTER_URL'] = '/create_account'",
    "question_id": "14793098-82",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "app.config[] = ",
    "canonical_cmd": "app.config['SECURITY_REGISTER_URL'] = 'VAR_STR'"
  },
  {
    "nl": "encode string `data` as `hex`",
    "cmd": "data.encode('hex')",
    "question_id": "200738-3",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.encode"
    ],
    "clean_cmd": "data.encode()",
    "canonical_cmd": "VAR_STR.encode('VAR_STR')"
  },
  {
    "nl": "open the login site 'http://somesite.com/adminpanel/index.php' in the browser",
    "cmd": "webbrowser.open('http://somesite.com/adminpanel/index.php')",
    "question_id": "21414159-59",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.webbrowser#webbrowser.open"
    ],
    "clean_cmd": "webbrowser.open()",
    "canonical_cmd": "webbrowser.open('VAR_STR')"
  },
  {
    "nl": "insert row into mysql database with column 'column1' set to the value `value`",
    "cmd": "cursor.execute('INSERT INTO table (`column1`) VALUES (%s)', (value,))",
    "question_id": "5507948-0",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.sqlite3#sqlite3.Cursor.execute"
    ],
    "clean_cmd": "cursor.execute(, (value,))",
    "canonical_cmd": "cursor.execute('INSERT INTO table (`column1`) VALUES (%s)', (VAR_STR,))"
  },
  {
    "nl": "check if any item from list `b` is in list `a`",
    "cmd": "print(any(x in a for x in b))",
    "question_id": "740287-34",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#any"
    ],
    "clean_cmd": "print(any(x  a  x  b))",
    "canonical_cmd": "print(any(x in VAR_STR for x in VAR_STR))"
  },
  {
    "nl": "scalar multiply matrix `a` by `b`",
    "cmd": "(a.T * b).T",
    "question_id": "3809265-55",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "(a.T * b).T",
    "canonical_cmd": "(VAR_STR.T * VAR_STR).T"
  },
  {
    "nl": "un-escape a backslash-escaped string in `Hello,\\\\nworld!`",
    "cmd": "print('\"Hello,\\\\nworld!\"'.decode('string_escape'))",
    "question_id": "1885181-93",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#bytearray.decode"
    ],
    "clean_cmd": "print(.decode())",
    "canonical_cmd": "print('\"Hello,\\\\nworld!\"'.decode('string_escape'))"
  },
  {
    "nl": "round 1123.456789 to be an integer",
    "cmd": "print(round(1123.456789, -1))",
    "question_id": "3348825-1",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#round"
    ],
    "clean_cmd": "print(round(1123.456789, -))",
    "canonical_cmd": "print(round(1123.456789, -1))"
  },
  {
    "nl": "substitute two or more whitespace characters with character '|' in string `line`",
    "cmd": "re.sub('\\\\s{2,}', '|', line.strip())",
    "question_id": "36957908-74",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub",
      "python.library.stdtypes#str.strip"
    ],
    "clean_cmd": "re.sub(, , line.strip())",
    "canonical_cmd": "re.sub('\\\\s{2,}', 'VAR_STR', VAR_STR.strip())"
  },
  {
    "nl": "create a list containing elements from list `list` that are predicate to function `f`",
    "cmd": "[f(x) for x in list]",
    "question_id": "1222677-77",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[f(x)  x  list]",
    "canonical_cmd": "[VAR_STR(x) for x in VAR_STR]"
  },
  {
    "nl": "Make a scatter plot using unpacked values of list `li`",
    "cmd": "plt.scatter(*zip(*li))",
    "question_id": "21519203-57",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip",
      "pandas.reference.api.pandas.dataframe.plot.scatter"
    ],
    "clean_cmd": "plt.scatter(*zip(*li))",
    "canonical_cmd": "plt.scatter(*zip(*VAR_STR))"
  },
  {
    "nl": "inherit from class `Executive`",
    "cmd": "super(Executive, self).__init__(*args)",
    "question_id": "16128833-30",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#super",
      "python.library.logging#logging.Handler.__init__"
    ],
    "clean_cmd": "super(Executive, self).__init__(*args)",
    "canonical_cmd": "super(VAR_STR, self).__init__(*args)"
  },
  {
    "nl": "terminate process `p`",
    "cmd": "p.terminate()",
    "question_id": "17856928-74",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.multiprocessing#multiprocessing.Process.terminate"
    ],
    "clean_cmd": "p.terminate()",
    "canonical_cmd": "VAR_STR.terminate()"
  },
  {
    "nl": "make a list of integers from 0 to `5` where each second element is a duplicate of the previous element",
    "cmd": "print([u for v in [[i, i] for i in range(5)] for u in v])",
    "question_id": "31743603-4",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#range"
    ],
    "clean_cmd": "print([u  v  [[i, i]  i  range()]  u  v])",
    "canonical_cmd": "print([u for v in [[i, i] for i in range(5)] for u in v])"
  },
  {
    "nl": "create a list of integers with duplicate values `[0, 0, 1, 1, 2, 2, 3, 3, 4, 4]`",
    "cmd": "[0, 0, 1, 1, 2, 2, 3, 3, 4, 4]",
    "question_id": "31743603-29",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[, , , , , , , , , ]",
    "canonical_cmd": "[VAR_STR]"
  },
  {
    "nl": "create a list of integers from 1 to 5 with each value duplicated",
    "cmd": "[(i // 2) for i in range(10)]",
    "question_id": "31743603-80",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#range"
    ],
    "clean_cmd": "[(i // )  i  range()]",
    "canonical_cmd": "[(i // 2) for i in range(10)]"
  },
  {
    "nl": "convert a beautiful soup html `soup` to text",
    "cmd": "print(soup.get_text())",
    "question_id": "14694482-12",
    "cmd_name": "conala",
    "oracle_man": [
      "matplotlib.table_api#matplotlib.table.Cell.get_text"
    ],
    "clean_cmd": "print(soup.get_text())",
    "canonical_cmd": "print(VAR_STR.get_text())"
  },
  {
    "nl": "calculate the mean of columns with same name in dataframe `df`",
    "cmd": "df.groupby(by=df.columns, axis=1).mean()",
    "question_id": "40311987-33",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.groupby",
      "pandas.reference.api.pandas.dataframe.mean"
    ],
    "clean_cmd": "df.groupby(by=df.columns, axis=).mean()",
    "canonical_cmd": "VAR_STR.groupby(by=VAR_STR.columns, axis=1).mean()"
  },
  {
    "nl": "create a list where each element is a dictionary with keys 'key1' and 'key2' and values corresponding to each value in the lists referenced by keys 'key1' and 'key2' in dictionary `d`",
    "cmd": "[{'key1': a, 'key2': b} for a, b in zip(d['key1'], d['key2'])]",
    "question_id": "1780174-1",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip"
    ],
    "clean_cmd": "[{: a, : b}  a, b  zip(d[], d[])]",
    "canonical_cmd": "[{'VAR_STR': a, 'VAR_STR': b} for a, b in zip(VAR_STR['VAR_STR'], VAR_STR['VAR_STR'])]"
  },
  {
    "nl": "Split dictionary of lists into list of dictionaries",
    "cmd": "map(dict, zip(*[[(k, v) for v in value] for k, value in list(d.items())]))",
    "question_id": "1780174-26",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip",
      "python.library.functions#map",
      "python.library.functions#list",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "map(dict, zip(*[[(k, v)  v  value]  k, value  list(d.items())]))",
    "canonical_cmd": "map(dict, zip(*[[(k, v) for v in value] for k, value in list(d.items())]))"
  },
  {
    "nl": "build a dict of key:value pairs from a string representation of a dict, `{'muffin' : 'lolz', 'foo' : 'kitty'}`",
    "cmd": "ast.literal_eval(\"{'muffin' : 'lolz', 'foo' : 'kitty'}\")",
    "question_id": "988228-81",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.ast#ast.literal_eval"
    ],
    "clean_cmd": "ast.literal_eval()",
    "canonical_cmd": "ast.literal_eval('VAR_STR')"
  },
  {
    "nl": "combine two lists `[1, 2, 3, 4]` and `['a', 'b', 'c', 'd']` into a dictionary",
    "cmd": "dict(zip([1, 2, 3, 4], ['a', 'b', 'c', 'd']))",
    "question_id": "7271385-77",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip",
      "python.library.stdtypes#dict"
    ],
    "clean_cmd": "dict(zip([, , , ], [, , , ]))",
    "canonical_cmd": "dict(zip([VAR_STR], [VAR_STR]))"
  },
  {
    "nl": "combine two lists `[1, 2, 3, 4]` and `['a', 'b', 'c', 'd']` into a dictionary",
    "cmd": "dict(zip([1, 2, 3, 4], ['a', 'b', 'c', 'd']))",
    "question_id": "7271385-51",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip",
      "python.library.stdtypes#dict"
    ],
    "clean_cmd": "dict(zip([, , , ], [, , , ]))",
    "canonical_cmd": "dict(zip([VAR_STR], [VAR_STR]))"
  },
  {
    "nl": "pandas: change all the values of a column 'Date' into \"int(str(x)[-4:])\"",
    "cmd": "df['Date'] = df['Date'].apply(lambda x: int(str(x)[-4:]))",
    "question_id": "12604909-49",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#int",
      "python.library.stdtypes#str"
    ],
    "clean_cmd": "df[] = df[].apply( x: int(str(x)[-:]))",
    "canonical_cmd": "df['VAR_STR'] = df['VAR_STR'].apply(lambda x: int(str(x)[-4:]))"
  },
  {
    "nl": "format number 1000000000.0 using latex notation",
    "cmd": "print('\\\\num{{{0:.2g}}}'.format(1000000000.0))",
    "question_id": "13490292-70",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#format"
    ],
    "clean_cmd": "print(.format(1000000000.0))",
    "canonical_cmd": "print('\\\\num{{{0:.2g}}}'.format(1000000000.0))"
  },
  {
    "nl": "randomly switch letters' cases in string `s`",
    "cmd": "\"\"\"\"\"\".join(x.upper() if random.randint(0, 1) else x for x in s)",
    "question_id": "8344905-24",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.random#random.randint",
      "python.library.stdtypes#str.upper",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": ".join(x.upper()  random.randint(, )  x  x  s)",
    "canonical_cmd": "\"\"\"\"\"\".join(x.upper() if random.randint(0, 1) else x for x in VAR_STR)"
  },
  {
    "nl": "convert the argument `date` with string formatting in logging",
    "cmd": "logging.info('date=%s', date)",
    "question_id": "12843099-82",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.logging#logging.info"
    ],
    "clean_cmd": "logging.info(, date)",
    "canonical_cmd": "logging.info('date=%s', VAR_STR)"
  },
  {
    "nl": "Log message of level 'info' with value of `date` in the message",
    "cmd": "logging.info('date={}'.format(date))",
    "question_id": "12843099-18",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.logging#logging.info",
      "python.library.functions#format"
    ],
    "clean_cmd": "logging.info(.format(date))",
    "canonical_cmd": "logging.VAR_STR('date={}'.format(VAR_STR))"
  },
  {
    "nl": "Do group by on `cluster` column in `df` and get its mean",
    "cmd": "df.groupby(['cluster']).mean()",
    "question_id": "30328646-93",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.groupby",
      "pandas.reference.api.pandas.dataframe.mean"
    ],
    "clean_cmd": "df.groupby([]).mean()",
    "canonical_cmd": "VAR_STR.groupby(['VAR_STR']).mean()"
  },
  {
    "nl": "sort list `strings` in alphabetical order based on the letter after percent character `%` in each element",
    "cmd": "strings.sort(key=lambda str: re.sub('.*%(.).*', '\\\\1', str))",
    "question_id": "1082413-12",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub",
      "python.library.stdtypes#list.sort"
    ],
    "clean_cmd": "strings.sort(key= str: re.sub(, , str))",
    "canonical_cmd": "VAR_STR.sort(key=lambda str: re.sub('.*%(.).*', '\\\\1', str))"
  },
  {
    "nl": "sort a list of strings `strings` based on regex match",
    "cmd": "strings.sort(key=lambda str: re.sub('.*%', '', str))",
    "question_id": "1082413-43",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub",
      "python.library.stdtypes#list.sort"
    ],
    "clean_cmd": "strings.sort(key= str: re.sub(, , str))",
    "canonical_cmd": "VAR_STR.sort(key=lambda str: re.sub('.*%', '', str))"
  },
  {
    "nl": "split string \"This is a string\" into words that do not contain whitespaces",
    "cmd": "\"\"\"This is a string\"\"\".split()",
    "question_id": "9206964-68",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": ".split()",
    "canonical_cmd": "\"\"\"VAR_STR\"\"\".split()"
  },
  {
    "nl": "split string \"This     is a     string\" into words that does not contain whitespaces",
    "cmd": "\"\"\"This     is a     string\"\"\".split()",
    "question_id": "9206964-10",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": ".split()",
    "canonical_cmd": "\"\"\"VAR_STR\"\"\".split()"
  },
  {
    "nl": "merge a pandas data frame `distancesDF` and column `dates` in pandas data frame `datesDF` into single",
    "cmd": "pd.concat([distancesDF, datesDF.dates], axis=1)",
    "question_id": "40076861-4",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.concat"
    ],
    "clean_cmd": "pd.concat([distancesDF, datesDF.dates], axis=)",
    "canonical_cmd": "pd.concat([VAR_STR, VAR_STR.VAR_STR], axis=1)"
  },
  {
    "nl": "convert a list `my_list` into string with values separated by spaces",
    "cmd": "\"\"\" \"\"\".join(my_list)",
    "question_id": "12309976-25",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": ".join(my_list)",
    "canonical_cmd": "\"\"\" \"\"\".join(VAR_STR)"
  },
  {
    "nl": "generate a list of all unique pairs of integers in `range(9)`",
    "cmd": "list(permutations(list(range(9)), 2))",
    "question_id": "14169122-51",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list",
      "python.library.functions#range"
    ],
    "clean_cmd": "list(permutations(list(range()), ))",
    "canonical_cmd": "list(permutations(list(range(9)), 2))"
  },
  {
    "nl": "Split a string `text` with comma, question mark or exclamation by non-consuming regex using look-behind",
    "cmd": "re.split('(?<=[\\\\.\\\\?!]) ', text)",
    "question_id": "5801945-50",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.split"
    ],
    "clean_cmd": "re.split(, text)",
    "canonical_cmd": "re.split('(?<=[\\\\.\\\\?!]) ', VAR_STR)"
  },
  {
    "nl": "Subtract the mean of each row in dataframe `df` from the corresponding row's elements",
    "cmd": "df.sub(df.mean(axis=1), axis=0)",
    "question_id": "26081300-96",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.sub",
      "pandas.reference.api.pandas.dataframe.mean"
    ],
    "clean_cmd": "df.sub(df.mean(axis=), axis=)",
    "canonical_cmd": "VAR_STR.sub(VAR_STR.mean(axis=1), axis=0)"
  },
  {
    "nl": "replace extension '.txt' in basename '/home/user/somefile.txt' with extension '.jpg'",
    "cmd": "print(os.path.splitext('/home/user/somefile.txt')[0] + '.jpg')",
    "question_id": "3548673-96",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os.path#os.path.splitext"
    ],
    "clean_cmd": "print(os.path.splitext()[] + )",
    "canonical_cmd": "print(os.path.splitext('VAR_STR')[0] + 'VAR_STR')"
  },
  {
    "nl": "find and replace 2nd occurrence of word 'cat' by 'Bull' in a sentence 's'",
    "cmd": "re.sub('^((?:(?!cat).)*cat(?:(?!cat).)*)cat', '\\\\1Bull', s)",
    "question_id": "27589325-84",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "re.sub(, , s)",
    "canonical_cmd": "re.sub('^((?:(?!cat).)*cat(?:(?!cat).)*)cat', '\\\\1Bull', VAR_STR)"
  },
  {
    "nl": "find and replace 2nd occurrence of word 'cat' by 'Bull' in a sentence 's'",
    "cmd": "re.sub('^((.*?cat.*?){1})cat', '\\\\1Bull', s)",
    "question_id": "27589325-75",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "re.sub(, , s)",
    "canonical_cmd": "re.sub('^((.*?cat.*?){1})cat', '\\\\1Bull', VAR_STR)"
  },
  {
    "nl": "parse string `a` to float",
    "cmd": "float(a)",
    "question_id": "379906-43",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#float"
    ],
    "clean_cmd": "float(a)",
    "canonical_cmd": "float(VAR_STR)"
  },
  {
    "nl": "Parse String `s` to Float or Int",
    "cmd": "try:\n    return int(s)\nexcept ValueError:\n    return float(s)",
    "question_id": "379906-7",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#float",
      "python.library.functions#int"
    ],
    "clean_cmd": ": int(s) ValueError: float(s)",
    "canonical_cmd": "try:\n    return int(VAR_STR)\nexcept ValueError:\n    return float(VAR_STR)"
  },
  {
    "nl": "convert a list `L` of ascii values to a string",
    "cmd": "\"\"\"\"\"\".join(chr(i) for i in L)",
    "question_id": "180606-7",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#chr",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": ".join(chr(i)  i  L)",
    "canonical_cmd": "\"\"\"\"\"\".join(chr(i) for i in VAR_STR)"
  },
  {
    "nl": "sort list `['10', '3', '2']` in ascending order based on the integer value of its elements",
    "cmd": "sorted(['10', '3', '2'], key=int)",
    "question_id": "9758959-69",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted"
    ],
    "clean_cmd": "sorted([, , ], key=int)",
    "canonical_cmd": "sorted([VAR_STR], key=int)"
  },
  {
    "nl": "custom sort an alphanumeric list `l`",
    "cmd": "sorted(l, key=lambda x: x.replace('0', 'Z'))",
    "question_id": "41894454-52",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.stdtypes#str.replace"
    ],
    "clean_cmd": "sorted(l, key= x: x.replace(, ))",
    "canonical_cmd": "sorted(VAR_STR, key=lambda x: x.replace('0', 'Z'))"
  },
  {
    "nl": "strip a string `line` of all carriage returns and newlines",
    "cmd": "line.strip()",
    "question_id": "13656519-84",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.strip"
    ],
    "clean_cmd": "line.strip()",
    "canonical_cmd": "VAR_STR.strip()"
  },
  {
    "nl": "remove white space padding around a saved image `test.png` in matplotlib",
    "cmd": "plt.savefig('test.png', bbox_inches='tight')",
    "question_id": "11837979-28",
    "cmd_name": "conala",
    "oracle_man": [
      "matplotlib.figure_api#matplotlib.figure.Figure.savefig"
    ],
    "clean_cmd": "plt.savefig(, bbox_inches=)",
    "canonical_cmd": "plt.savefig('VAR_STR', bbox_inches='tight')"
  },
  {
    "nl": "execute a jar file 'Blender.jar' using subprocess",
    "cmd": "subprocess.call(['java', '-jar', 'Blender.jar'])",
    "question_id": "7372592-56",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.subprocess#subprocess.call"
    ],
    "clean_cmd": "subprocess.call([, , ])",
    "canonical_cmd": "subprocess.call(['java', '-jar', 'VAR_STR'])"
  },
  {
    "nl": "Find next sibling element in Python Selenium?",
    "cmd": "driver.find_element_by_xpath(\"//p[@id, 'one']/following-sibling::p\")",
    "question_id": "23887592-74",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "driver.find_element_by_xpath()",
    "canonical_cmd": "driver.find_element_by_xpath(\"//p[@id, 'one']/following-sibling::p\")"
  },
  {
    "nl": "convert Unicode codepoint to utf8 hex",
    "cmd": "chr(int('fd9b', 16)).encode('utf-8')",
    "question_id": "867866-86",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#chr",
      "python.library.functions#int",
      "python.library.stdtypes#str.encode"
    ],
    "clean_cmd": "chr(int(, )).encode()",
    "canonical_cmd": "chr(int('fd9b', 16)).encode('utf-8')"
  },
  {
    "nl": "zip keys with individual values in lists `k` and `v`",
    "cmd": "[dict(zip(k, x)) for x in v]",
    "question_id": "13480031-57",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip",
      "python.library.stdtypes#dict"
    ],
    "clean_cmd": "[dict(zip(k, x))  x  v]",
    "canonical_cmd": "[dict(zip(VAR_STR, x)) for x in VAR_STR]"
  },
  {
    "nl": "read pandas data frame csv `comma.csv` with extra commas in column specifying string delimiter `'`",
    "cmd": "df = pd.read_csv('comma.csv', quotechar=\"'\")",
    "question_id": "32743479-65",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.read_csv"
    ],
    "clean_cmd": "df = pd.read_csv(, quotechar=)",
    "canonical_cmd": "df = pd.read_csv('VAR_STR', quotechar='VAR_STR')"
  },
  {
    "nl": "Log message 'test' on the root logger.",
    "cmd": "logging.info('test')",
    "question_id": "18292500-92",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.logging#logging.info"
    ],
    "clean_cmd": "logging.info()",
    "canonical_cmd": "logging.info('VAR_STR')"
  },
  {
    "nl": "sort list of lists `L` by the second item in each list",
    "cmd": "L.sort(key=operator.itemgetter(1))",
    "question_id": "5201191-46",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.operator#operator.itemgetter",
      "python.library.stdtypes#list.sort"
    ],
    "clean_cmd": "L.sort(key=operator.itemgetter())",
    "canonical_cmd": "VAR_STR.sort(key=operator.itemgetter(1))"
  },
  {
    "nl": "extract dictionary values by key 'Feature3' from data frame `df`",
    "cmd": "feature3 = [d.get('Feature3') for d in df.dic]",
    "question_id": "35711059-64",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.get"
    ],
    "clean_cmd": "feature3 = [d.get()  d  df.dic]",
    "canonical_cmd": "feature3 = [d.get('VAR_STR') for d in VAR_STR.dic]"
  },
  {
    "nl": "find the real user home directory using python",
    "cmd": "os.path.expanduser('~user')",
    "question_id": "2668909-73",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os.path#os.path.expanduser"
    ],
    "clean_cmd": "os.path.expanduser()",
    "canonical_cmd": "os.path.expanduser('~user')"
  },
  {
    "nl": "lambda function that adds two operands",
    "cmd": "lambda x, y: x + y",
    "question_id": "6243460-84",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": " x, y: x + y",
    "canonical_cmd": "lambda x, y: x + y"
  },
  {
    "nl": "drop rows whose index value in list `[1, 3]` in dataframe `df`",
    "cmd": "df.drop(df.index[[1, 3]], inplace=True)",
    "question_id": "14661701-41",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.drop"
    ],
    "clean_cmd": "df.drop(df.index[[, ]], inplace=True)",
    "canonical_cmd": "VAR_STR.drop(VAR_STR.index[[VAR_STR]], inplace=True)"
  },
  {
    "nl": "extract the 2nd elements from a list of tuples",
    "cmd": "[x[1] for x in elements]",
    "question_id": "3308102-72",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[x[]  x  elements]",
    "canonical_cmd": "[x[1] for x in elements]"
  },
  {
    "nl": "find the first letter of each element in string `input`",
    "cmd": "output = ''.join(item[0].upper() for item in input.split())",
    "question_id": "5775719-36",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.upper",
      "python.library.stdtypes#str.join",
      "python.library.re#re.split"
    ],
    "clean_cmd": "output = .join(item[].upper()  item  input.split())",
    "canonical_cmd": "output = ''.join(item[0].upper() for item in VAR_STR.split())"
  },
  {
    "nl": "replace percent-encoded code in request `f` to their single-character equivalent",
    "cmd": "f = urllib.request.urlopen(url, urllib.parse.unquote(urllib.parse.urlencode(params)))",
    "question_id": "12527959-62",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.parse#urllib.parse.unquote",
      "python.library.urllib.parse#urllib.parse.urlencode",
      "python.library.urllib.request#urllib.request.urlopen"
    ],
    "clean_cmd": "f = urllib.request.urlopen(url, urllib.parse.unquote(urllib.parse.urlencode(params)))",
    "canonical_cmd": "VAR_STR = urllib.request.urlopen(url, urllib.parse.unquote(urllib.parse.\n    urlencode(params)))"
  },
  {
    "nl": "call base class's __init__ method from the child class `ChildClass`",
    "cmd": "super(ChildClass, self).__init__(*args, **kwargs)",
    "question_id": "19205916-14",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#super",
      "python.library.logging#logging.Handler.__init__"
    ],
    "clean_cmd": "super(ChildClass, self).__init__(*args, **kwargs)",
    "canonical_cmd": "super(VAR_STR, self).__init__(*args, **kwargs)"
  },
  {
    "nl": "extract only alphabetic characters from a string `your string`",
    "cmd": "\"\"\" \"\"\".join(re.split('[^a-zA-Z]*', 'your string'))",
    "question_id": "8199398-65",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.split",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": ".join(re.split(, ))",
    "canonical_cmd": "\"\"\" \"\"\".join(re.split('[^a-zA-Z]*', 'VAR_STR'))"
  },
  {
    "nl": "Extract only characters from a string as a list",
    "cmd": "re.split('[^a-zA-Z]*', 'your string')",
    "question_id": "8199398-34",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.split"
    ],
    "clean_cmd": "re.split(, )",
    "canonical_cmd": "re.split('[^a-zA-Z]*', 'your string')"
  },
  {
    "nl": "Convert binary string to list of integers using Python",
    "cmd": "[int(s[i:i + 3], 2) for i in range(0, len(s), 3)]",
    "question_id": "1386811-62",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len",
      "python.library.functions#range",
      "python.library.functions#int"
    ],
    "clean_cmd": "[int(s[i:i + ], )  i  range(, len(s), )]",
    "canonical_cmd": "[int(s[i:i + 3], 2) for i in range(0, len(s), 3)]"
  },
  {
    "nl": "find maximum with lookahead = 4 in a list `arr`",
    "cmd": "[max(abs(x) for x in arr[i:i + 4]) for i in range(0, len(arr), 4)]",
    "question_id": "34543513-59",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len",
      "python.library.functions#range",
      "python.library.functions#abs",
      "python.library.functions#max"
    ],
    "clean_cmd": "[max(abs(x)  x  arr[i:i + ])  i  range(, len(arr), )]",
    "canonical_cmd": "[max(abs(x) for x in VAR_STR[i:i + 4]) for i in range(0, len(VAR_STR), 4)]"
  },
  {
    "nl": "replace single quote character in string \"didn't\" with empty string ''",
    "cmd": "\"\"\"didn't\"\"\".replace(\"'\", '')",
    "question_id": "3151146-41",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.replace"
    ],
    "clean_cmd": ".replace(, )",
    "canonical_cmd": "\"\"\"VAR_STR\"\"\".replace(\"'\", 'VAR_STR')"
  },
  {
    "nl": "Find a max value of the key `count` in a nested dictionary `d`",
    "cmd": "max(d, key=lambda x: d[x]['count'])",
    "question_id": "12829889-23",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#max"
    ],
    "clean_cmd": "max(d, key= x: d[x][])",
    "canonical_cmd": "max(VAR_STR, key=lambda x: VAR_STR[x]['VAR_STR'])"
  },
  {
    "nl": "Get a list of tuples with multiple iterators using list comprehension",
    "cmd": "[(i, j) for i in range(1, 3) for j in range(1, 5)]",
    "question_id": "329886-13",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#range"
    ],
    "clean_cmd": "[(i, j)  i  range(, )  j  range(, )]",
    "canonical_cmd": "[(i, j) for i in range(1, 3) for j in range(1, 5)]"
  },
  {
    "nl": "read a binary file 'test/test.pdf'",
    "cmd": "f = open('test/test.pdf', 'rb')",
    "question_id": "2497027-35",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.request#open"
    ],
    "clean_cmd": "f = open(, )",
    "canonical_cmd": "f = open('VAR_STR', 'rb')"
  },
  {
    "nl": "remove all characters from string `stri` upto character 'I'",
    "cmd": "re.sub('.*I', 'I', stri)",
    "question_id": "30945784-37",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "re.sub(, , stri)",
    "canonical_cmd": "re.sub('.*I', 'VAR_STR', VAR_STR)"
  },
  {
    "nl": "append string `foo` to list `list`",
    "cmd": "list.append('foo')",
    "question_id": "8243188-64",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.append"
    ],
    "clean_cmd": "list.append()",
    "canonical_cmd": "VAR_STR.append('VAR_STR')"
  },
  {
    "nl": "insert string `foo` at position `0` of list `list`",
    "cmd": "list.insert(0, 'foo')",
    "question_id": "8243188-74",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.insert"
    ],
    "clean_cmd": "list.insert(, )",
    "canonical_cmd": "VAR_STR.insert(0, 'VAR_STR')"
  },
  {
    "nl": "get the text of multiple elements found by xpath \"//*[@type='submit']/@value\"",
    "cmd": "browser.find_elements_by_xpath(\"//*[@type='submit']/@value\").text",
    "question_id": "12579061-75",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "browser.find_elements_by_xpath().text",
    "canonical_cmd": "browser.find_elements_by_xpath('VAR_STR').text"
  },
  {
    "nl": "find all the values in attribute `value` for the tags whose `type` attribute is `submit` in selenium",
    "cmd": "browser.find_elements_by_xpath(\"//*[@type='submit']\").get_attribute('value')",
    "question_id": "12579061-34",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.test#test.support.get_attribute"
    ],
    "clean_cmd": "browser.find_elements_by_xpath().get_attribute()",
    "canonical_cmd": "browser.find_elements_by_xpath(\"//*[@type='submit']\").get_attribute('VAR_STR')"
  },
  {
    "nl": "converting string '(1,2,3,4)' to a tuple",
    "cmd": "ast.literal_eval('(1,2,3,4)')",
    "question_id": "3945856-34",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.ast#ast.literal_eval"
    ],
    "clean_cmd": "ast.literal_eval()",
    "canonical_cmd": "ast.literal_eval('VAR_STR')"
  },
  {
    "nl": "store data frame `df` to file `file_name` using pandas, python",
    "cmd": "df.to_pickle(file_name)",
    "question_id": "17098654-11",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.to_pickle"
    ],
    "clean_cmd": "df.to_pickle(file_name)",
    "canonical_cmd": "VAR_STR.to_pickle(VAR_STR)"
  },
  {
    "nl": "extract all the values of a specific key named 'values' from a list of dictionaries",
    "cmd": "results = [item['value'] for item in test_data]",
    "question_id": "25148611-71",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "results = [item[]  item  test_data]",
    "canonical_cmd": "results = [item['value'] for item in test_data]"
  },
  {
    "nl": "convert a date string `s` to a datetime object",
    "cmd": "datetime.datetime.strptime(s, '%Y-%m-%dT%H:%M:%SZ')",
    "question_id": "2721782-74",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.datetime#datetime.datetime.strptime"
    ],
    "clean_cmd": "datetime.datetime.strptime(s, )",
    "canonical_cmd": "datetime.datetime.strptime(VAR_STR, '%Y-%m-%dT%H:%M:%SZ')"
  },
  {
    "nl": "Get only first element in each of the innermost of the multidimensional list `listD`",
    "cmd": "[[[x[0]] for x in listD[i]] for i in range(len(listD))]",
    "question_id": "40852575-70",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len",
      "python.library.functions#range"
    ],
    "clean_cmd": "[[[x[]]  x  listD[i]]  i  range(len(listD))]",
    "canonical_cmd": "[[[x[0]] for x in VAR_STR[i]] for i in range(len(VAR_STR))]"
  },
  {
    "nl": "print a floating point number 2.345e-67 without any truncation",
    "cmd": "print('{:.100f}'.format(2.345e-67))",
    "question_id": "20048987-61",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#format"
    ],
    "clean_cmd": "print(.format(2.345e-67))",
    "canonical_cmd": "print('{:.100f}'.format(2.345e-67))"
  },
  {
    "nl": "remove duplicate characters from string 'ffffffbbbbbbbqqq'",
    "cmd": "re.sub('([a-z])\\\\1+', '\\\\1', 'ffffffbbbbbbbqqq')",
    "question_id": "4574509-95",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "re.sub(, , )",
    "canonical_cmd": "re.sub('([a-z])\\\\1+', '\\\\1', 'VAR_STR')"
  },
  {
    "nl": "split string \"a;bcd,ef g\" on delimiters ';' and ','",
    "cmd": "\"\"\"a;bcd,ef g\"\"\".replace(';', ' ').replace(',', ' ').split()",
    "question_id": "1059559-41",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.replace"
    ],
    "clean_cmd": ".replace(, ).replace(, ).split()",
    "canonical_cmd": "\"\"\"VAR_STR\"\"\".replace('VAR_STR', ' ').replace('VAR_STR', ' ').split()"
  },
  {
    "nl": "extract all rows from dataframe `data` where the value of column 'Value' is True",
    "cmd": "data[data['Value'] == True]",
    "question_id": "17424182-56",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "data[data[] == True]",
    "canonical_cmd": "VAR_STR[VAR_STR['VAR_STR'] == True]"
  },
  {
    "nl": "split string `s` into strings of repeating elements",
    "cmd": "print([a for a, b in re.findall('((\\\\w)\\\\2*)', s)])",
    "question_id": "9505526-67",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "print([a  a, b  re.findall(, s)])",
    "canonical_cmd": "print([a for a, b in re.findall('((\\\\w)\\\\2*)', VAR_STR)])"
  },
  {
    "nl": "trim string \" Hello \"",
    "cmd": "' Hello '.strip()",
    "question_id": "761804-99",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.strip"
    ],
    "clean_cmd": ".strip()",
    "canonical_cmd": "\"\"\" Hello \"\"\".strip()"
  },
  {
    "nl": "trim string `myString `",
    "cmd": "myString.strip()",
    "question_id": "761804-12",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.strip"
    ],
    "clean_cmd": "myString.strip()",
    "canonical_cmd": "VAR_STR.strip()"
  },
  {
    "nl": "Trimming a string \" Hello \"",
    "cmd": "' Hello '.strip()",
    "question_id": "761804-69",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.strip"
    ],
    "clean_cmd": ".strip()",
    "canonical_cmd": "\"\"\" Hello \"\"\".strip()"
  },
  {
    "nl": "Trimming a string \" Hello\"",
    "cmd": "' Hello'.strip()",
    "question_id": "761804-7",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.strip"
    ],
    "clean_cmd": ".strip()",
    "canonical_cmd": "\"\"\" Hello\"\"\".strip()"
  },
  {
    "nl": "Trimming a string \"Bob has a cat\"",
    "cmd": "'Bob has a cat'.strip()",
    "question_id": "761804-31",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.strip"
    ],
    "clean_cmd": ".strip()",
    "canonical_cmd": "\"\"\"VAR_STR\"\"\".strip()"
  },
  {
    "nl": "Trimming a string \"          Hello        \"",
    "cmd": "'          Hello        '.strip()",
    "question_id": "761804-4",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.strip"
    ],
    "clean_cmd": ".strip()",
    "canonical_cmd": "\"\"\"          Hello        \"\"\".strip()"
  },
  {
    "nl": "Trimming a string `str`",
    "cmd": "str.strip()",
    "question_id": "761804-26",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.strip"
    ],
    "clean_cmd": "str.strip()",
    "canonical_cmd": "VAR_STR.strip()"
  },
  {
    "nl": "Trimming \"\\n\" from string `myString`",
    "cmd": "myString.strip('\\n')",
    "question_id": "761804-11",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.strip"
    ],
    "clean_cmd": "myString.strip()",
    "canonical_cmd": "VAR_STR.strip('VAR_STR')"
  },
  {
    "nl": "left trimming \"\\n\\r\" from string `myString`",
    "cmd": "myString.lstrip('\\n\\r')",
    "question_id": "761804-12",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.strip"
    ],
    "clean_cmd": "myString.lstrip()",
    "canonical_cmd": "VAR_STR.lstrip('VAR_STR')"
  },
  {
    "nl": "right trimming \"\\n\\t\" from string `myString`",
    "cmd": "myString.rstrip('\\n\\t')",
    "question_id": "761804-51",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.rstrip"
    ],
    "clean_cmd": "myString.rstrip()",
    "canonical_cmd": "VAR_STR.rstrip('VAR_STR')"
  },
  {
    "nl": "Trimming a string \"  Hello\\n\" by space",
    "cmd": "'  Hello\\n'.strip(' ')",
    "question_id": "761804-50",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.strip"
    ],
    "clean_cmd": ".strip()",
    "canonical_cmd": "\"\"\"  Hello\n\"\"\".strip(' ')"
  },
  {
    "nl": "read csv file 'my_file.csv' into numpy array",
    "cmd": "my_data = genfromtxt('my_file.csv', delimiter=',')",
    "question_id": "3518778-98",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.genfromtxt"
    ],
    "clean_cmd": "my_data = genfromtxt(, delimiter=)",
    "canonical_cmd": "my_data = genfromtxt('VAR_STR', delimiter=',')"
  },
  {
    "nl": "read csv file 'myfile.csv' into array",
    "cmd": "df = pd.read_csv('myfile.csv', sep=',', header=None)",
    "question_id": "3518778-46",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.read_csv"
    ],
    "clean_cmd": "df = pd.read_csv(, sep=, header=None)",
    "canonical_cmd": "df = pd.read_csv('VAR_STR', sep=',', header=None)"
  },
  {
    "nl": "read csv file 'myfile.csv' into array",
    "cmd": "np.genfromtxt('myfile.csv', delimiter=',')",
    "question_id": "3518778-91",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.genfromtxt"
    ],
    "clean_cmd": "np.genfromtxt(, delimiter=)",
    "canonical_cmd": "np.genfromtxt('VAR_STR', delimiter=',')"
  },
  {
    "nl": "read csv file 'myfile.csv' into array",
    "cmd": "np.genfromtxt('myfile.csv', delimiter=',', dtype=None)",
    "question_id": "3518778-42",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.genfromtxt"
    ],
    "clean_cmd": "np.genfromtxt(, delimiter=, dtype=None)",
    "canonical_cmd": "np.genfromtxt('VAR_STR', delimiter=',', dtype=None)"
  },
  {
    "nl": "multiply column 'A' and column 'B' by column 'C' in datafram `df`",
    "cmd": "df[['A', 'B']].multiply(df['C'], axis='index')",
    "question_id": "22702760-34",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.multiply"
    ],
    "clean_cmd": "df[[, ]].multiply(df[], axis=)",
    "canonical_cmd": "VAR_STR[['VAR_STR', 'VAR_STR']].multiply(VAR_STR['VAR_STR'], axis='index')"
  },
  {
    "nl": "Normalize string `str` from 'cp1252' code to 'utf-8' code",
    "cmd": "print(str.encode('cp1252').decode('utf-8').encode('cp1252').decode('utf-8'))",
    "question_id": "10525301-35",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.encode",
      "pandas.reference.api.pandas.series.str.decode"
    ],
    "clean_cmd": "print(str.encode().decode().encode().decode())",
    "canonical_cmd": "print(VAR_STR.encode('VAR_STR').decode('VAR_STR').encode('VAR_STR').decode('VAR_STR'))"
  },
  {
    "nl": "Iterating over a dictionary `d` using for loops",
    "cmd": "for (key, value) in d.items():\n    pass",
    "question_id": "3294889-83",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": " (key, value)  d.items():",
    "canonical_cmd": "for key, value in VAR_STR.items():\n    pass"
  },
  {
    "nl": "Iterating over a dictionary `d` using for loops",
    "cmd": "for (key, value) in list(d.items()):\n    pass",
    "question_id": "3294889-61",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": " (key, value)  list(d.items()):",
    "canonical_cmd": "for key, value in list(VAR_STR.items()):\n    pass"
  },
  {
    "nl": "Iterating key and items over dictionary `d`",
    "cmd": "for (letter, number) in list(d.items()):\n    pass",
    "question_id": "3294889-5",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": " (letter, number)  list(d.items()):",
    "canonical_cmd": "for letter, number in list(VAR_STR.items()):\n    pass"
  },
  {
    "nl": "Iterating key and items over dictionary `d`",
    "cmd": "for (k, v) in list(d.items()):\n    pass",
    "question_id": "3294889-67",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": " (k, v)  list(d.items()):",
    "canonical_cmd": "for k, v in list(VAR_STR.items()):\n    pass"
  },
  {
    "nl": "get keys and items of dictionary `d`",
    "cmd": "list(d.items())",
    "question_id": "3294889-59",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "list(d.items())",
    "canonical_cmd": "list(VAR_STR.items())"
  },
  {
    "nl": "get keys and items of dictionary `d` as a list",
    "cmd": "list(d.items())",
    "question_id": "3294889-5",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "list(d.items())",
    "canonical_cmd": "list(VAR_STR.items())"
  },
  {
    "nl": "Iterating key and items over dictionary `d`",
    "cmd": "for (k, v) in list(d.items()):\n    pass",
    "question_id": "3294889-54",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": " (k, v)  list(d.items()):",
    "canonical_cmd": "for k, v in list(VAR_STR.items()):\n    pass"
  },
  {
    "nl": "Iterating key and items over dictionary `d`",
    "cmd": "for (letter, number) in list(d.items()):\n    pass",
    "question_id": "3294889-3",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": " (letter, number)  list(d.items()):",
    "canonical_cmd": "for letter, number in list(VAR_STR.items()):\n    pass"
  },
  {
    "nl": "Iterating key and items over dictionary `d`",
    "cmd": "for (letter, number) in list(d.items()):\n    pass",
    "question_id": "3294889-25",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": " (letter, number)  list(d.items()):",
    "canonical_cmd": "for letter, number in list(VAR_STR.items()):\n    pass"
  },
  {
    "nl": "Create 2D numpy array from the data provided in 'somefile.csv' with each row in the file having same number of values",
    "cmd": "X = numpy.loadtxt('somefile.csv', delimiter=',')",
    "question_id": "7356042-14",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.loadtxt"
    ],
    "clean_cmd": "X = numpy.loadtxt(, delimiter=)",
    "canonical_cmd": "X = numpy.loadtxt('VAR_STR', delimiter=',')"
  },
  {
    "nl": "control the keyboard and mouse with dogtail in linux",
    "cmd": "dogtail.rawinput.click(100, 100)",
    "question_id": "1946181-83",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "dogtail.rawinput.click(, )",
    "canonical_cmd": "dogtail.rawinput.click(100, 100)"
  },
  {
    "nl": "sort datetime objects `birthdays` by `month` and `day`",
    "cmd": "birthdays.sort(key=lambda d: (d.month, d.day))",
    "question_id": "2040038-20",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#list.sort"
    ],
    "clean_cmd": "birthdays.sort(key= d: (d.month, d.day))",
    "canonical_cmd": "VAR_STR.sort(key=lambda d: (d.VAR_STR, d.VAR_STR))"
  },
  {
    "nl": "remove trailing newline in string \"test string\\n\"",
    "cmd": "'test string\\n'.rstrip()",
    "question_id": "275018-37",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.rstrip"
    ],
    "clean_cmd": ".rstrip()",
    "canonical_cmd": "\"\"\"VAR_STR\"\"\".rstrip()"
  },
  {
    "nl": "remove trailing newline in string 'test string \\n\\n'",
    "cmd": "'test string \\n\\n'.rstrip('\\n')",
    "question_id": "275018-63",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.rstrip"
    ],
    "clean_cmd": ".rstrip()",
    "canonical_cmd": "\"\"\"VAR_STR\"\"\".rstrip('\\n')"
  },
  {
    "nl": "remove newline in string `s`",
    "cmd": "s.strip()",
    "question_id": "275018-98",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.strip"
    ],
    "clean_cmd": "s.strip()",
    "canonical_cmd": "VAR_STR.strip()"
  },
  {
    "nl": "remove newline in string `s` on the right side",
    "cmd": "s.rstrip()",
    "question_id": "275018-8",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.rstrip"
    ],
    "clean_cmd": "s.rstrip()",
    "canonical_cmd": "VAR_STR.rstrip()"
  },
  {
    "nl": "remove newline in string `s` on the left side",
    "cmd": "s.lstrip()",
    "question_id": "275018-64",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.lstrip"
    ],
    "clean_cmd": "s.lstrip()",
    "canonical_cmd": "VAR_STR.lstrip()"
  },
  {
    "nl": "remove newline in string 'Mac EOL\\r'",
    "cmd": "'Mac EOL\\r'.rstrip('\\r\\n')",
    "question_id": "275018-9",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.rstrip"
    ],
    "clean_cmd": ".rstrip()",
    "canonical_cmd": "\"\"\"VAR_STR\"\"\".rstrip('\\r\\n')"
  },
  {
    "nl": "remove newline in string 'Windows EOL\\r\\n' on the right side",
    "cmd": "'Windows EOL\\r\\n'.rstrip('\\r\\n')",
    "question_id": "275018-25",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.rstrip"
    ],
    "clean_cmd": ".rstrip()",
    "canonical_cmd": "\"\"\"VAR_STR\"\"\".rstrip('\\r\\n')"
  },
  {
    "nl": "remove newline in string 'Unix EOL\\n' on the right side",
    "cmd": "'Unix EOL\\n'.rstrip('\\r\\n')",
    "question_id": "275018-43",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.rstrip"
    ],
    "clean_cmd": ".rstrip()",
    "canonical_cmd": "\"\"\"VAR_STR\"\"\".rstrip('\\r\\n')"
  },
  {
    "nl": "remove newline in string \"Hello\\n\\n\\n\" on the right side",
    "cmd": "'Hello\\n\\n\\n'.rstrip('\\n')",
    "question_id": "275018-58",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.rstrip"
    ],
    "clean_cmd": ".rstrip()",
    "canonical_cmd": "\"\"\"VAR_STR\"\"\".rstrip('\\n')"
  },
  {
    "nl": "split string `text` by the occurrences of regex pattern '(?<=\\\\?|!|\\\\.)\\\\s{0,2}(?=[A-Z]|$)'",
    "cmd": "re.split('(?<=\\\\?|!|\\\\.)\\\\s{0,2}(?=[A-Z]|$)', text)",
    "question_id": "15530399-12",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.split"
    ],
    "clean_cmd": "re.split(, text)",
    "canonical_cmd": "re.split('VAR_STR', VAR_STR)"
  },
  {
    "nl": "split a string `s` by ';' and convert to a dictionary",
    "cmd": "dict(item.split('=') for item in s.split(';'))",
    "question_id": "186857-59",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "dict(item.split()  item  s.split())",
    "canonical_cmd": "dict(item.split('=') for item in VAR_STR.split('VAR_STR'))"
  },
  {
    "nl": "create a list where each element is a value of the key 'Name' for each dictionary `d` in the list `thisismylist`",
    "cmd": "[d['Name'] for d in thisismylist]",
    "question_id": "17117912-75",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[d[]  d  thisismylist]",
    "canonical_cmd": "[VAR_STR['VAR_STR'] for VAR_STR in VAR_STR]"
  },
  {
    "nl": "create a list of tuples with the values of keys 'Name' and 'Age' from each dictionary `d` in the list `thisismylist`",
    "cmd": "[(d['Name'], d['Age']) for d in thisismylist]",
    "question_id": "17117912-14",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[(d[], d[])  d  thisismylist]",
    "canonical_cmd": "[(VAR_STR['VAR_STR'], VAR_STR['VAR_STR']) for VAR_STR in VAR_STR]"
  },
  {
    "nl": "Reverse key-value pairs in a dictionary `map`",
    "cmd": "dict((v, k) for k, v in map.items())",
    "question_id": "8650415-85",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "dict((v, k)  k, v  map.items())",
    "canonical_cmd": "dict((v, k) for k, v in VAR_STR.items())"
  },
  {
    "nl": "assign value in `group` dynamically to class property `attr`",
    "cmd": "setattr(self, attr, group)",
    "question_id": "19153328-63",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#setattr"
    ],
    "clean_cmd": "setattr(self, attr, group)",
    "canonical_cmd": "setattr(self, VAR_STR, VAR_STR)"
  },
  {
    "nl": "sort list of date strings 'd'",
    "cmd": "sorted(d, key=lambda x: datetime.datetime.strptime(x, '%m-%Y'))",
    "question_id": "17627531-50",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.datetime#datetime.datetime.strptime",
      "python.library.functions#sorted"
    ],
    "clean_cmd": "sorted(d, key= x: datetime.datetime.strptime(x, ))",
    "canonical_cmd": "sorted(VAR_STR, key=lambda x: datetime.datetime.strptime(x, '%m-%Y'))"
  },
  {
    "nl": "test if either of strings `a` or `b` are members of the set of strings, `['b', 'a', 'foo', 'bar']`",
    "cmd": "set(['a', 'b']).issubset(['b', 'a', 'foo', 'bar'])",
    "question_id": "6159313-36",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#set",
      "python.library.stdtypes#frozenset.issubset"
    ],
    "clean_cmd": "set([, ]).issubset([, , , ])",
    "canonical_cmd": "set(['VAR_STR', 'VAR_STR']).issubset(['VAR_STR', 'VAR_STR', 'foo', 'bar'])"
  },
  {
    "nl": "Check if all the values in a list `['a', 'b']` are present in another list `['b', 'a', 'foo', 'bar']`",
    "cmd": "all(x in ['b', 'a', 'foo', 'bar'] for x in ['a', 'b'])",
    "question_id": "6159313-37",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#all"
    ],
    "clean_cmd": "all(x  [, , , ]  x  [, ])",
    "canonical_cmd": "all(x in [VAR_STR] for x in [VAR_STR])"
  },
  {
    "nl": "format float `3.5e+20`  to `$3.5 \\\\times 10^{20}$` and set as title of matplotlib plot `ax`",
    "cmd": "ax.set_title('$%s \\\\times 10^{%s}$' % ('3.5', '+20'))",
    "question_id": "17306755-9",
    "cmd_name": "conala",
    "oracle_man": [
      "matplotlib.legend_api#matplotlib.legend.Legend.set_title"
    ],
    "clean_cmd": "ax.set_title( % (, ))",
    "canonical_cmd": "VAR_STR.set_title('$%s \\\\times 10^{%s}$' % ('3.5', '+20'))"
  },
  {
    "nl": "set text color as `red` and background color as `#A3C1DA` in qpushbutton",
    "cmd": "setStyleSheet('QPushButton {background-color: #A3C1DA; color: red;}')",
    "question_id": "24659239-4",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "setStyleSheet()",
    "canonical_cmd": "setStyleSheet('QPushButton {background-color: #A3C1DA; color: red;}')"
  },
  {
    "nl": "convert an rgb image 'messi5.jpg' into grayscale `img`",
    "cmd": "img = cv2.imread('messi5.jpg', 0)",
    "question_id": "12201577-67",
    "cmd_name": "conala",
    "oracle_man": [
      "matplotlib.image_api#matplotlib.image.imread"
    ],
    "clean_cmd": "img = cv2.imread(, )",
    "canonical_cmd": "VAR_STR = cv2.imread('VAR_STR', 0)"
  },
  {
    "nl": "create list `levels` containing 3 empty dictionaries",
    "cmd": "levels = [{}, {}, {}]",
    "question_id": "4411811-39",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "levels = [{}, {}, {}]",
    "canonical_cmd": "VAR_STR = [{}, {}, {}]"
  },
  {
    "nl": "parse string '01-Jan-1995' into a datetime object using format '%d-%b-%Y'",
    "cmd": "datetime.datetime.strptime('01-Jan-1995', '%d-%b-%Y')",
    "question_id": "1713594-63",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.datetime#datetime.datetime.strptime"
    ],
    "clean_cmd": "datetime.datetime.strptime(, )",
    "canonical_cmd": "datetime.datetime.strptime('VAR_STR', 'VAR_STR')"
  },
  {
    "nl": "Convert integer elements in list `wordids` to strings",
    "cmd": "[str(wi) for wi in wordids]",
    "question_id": "7768859-33",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str"
    ],
    "clean_cmd": "[str(wi)  wi  wordids]",
    "canonical_cmd": "[str(wi) for wi in VAR_STR]"
  },
  {
    "nl": "get a list `cleaned` that contains all non-empty elements in list `your_list`",
    "cmd": "cleaned = [x for x in your_list if x]",
    "question_id": "16099694-63",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "cleaned = [x  x  your_list  x]",
    "canonical_cmd": "VAR_STR = [x for x in VAR_STR if x]"
  },
  {
    "nl": "split a string `yas` based on tab '\\t'",
    "cmd": "re.split('\\\\t+', yas.rstrip('\\t'))",
    "question_id": "17038426-88",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.split",
      "python.library.stdtypes#str.rstrip"
    ],
    "clean_cmd": "re.split(, yas.rstrip())",
    "canonical_cmd": "re.split('\\\\t+', VAR_STR.rstrip('VAR_STR'))"
  },
  {
    "nl": "sorting the lists in list of lists `data`",
    "cmd": "[sorted(item) for item in data]",
    "question_id": "34197047-67",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted"
    ],
    "clean_cmd": "[sorted(item)  item  data]",
    "canonical_cmd": "[sorted(item) for item in VAR_STR]"
  },
  {
    "nl": "remove identical items from list `my_list` and sort it alphabetically",
    "cmd": "sorted(set(my_list))",
    "question_id": "22741068-41",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.stdtypes#set"
    ],
    "clean_cmd": "sorted(set(my_list))",
    "canonical_cmd": "sorted(set(VAR_STR))"
  },
  {
    "nl": "convert a list of lists `a` into list of tuples of appropriate elements form nested lists",
    "cmd": "zip(*a)",
    "question_id": "35017035-30",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip"
    ],
    "clean_cmd": "zip(*a)",
    "canonical_cmd": "zip(*VAR_STR)"
  },
  {
    "nl": "converting hex string `s` to its integer representations",
    "cmd": "[ord(c) for c in s.decode('hex')]",
    "question_id": "7595148-31",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#ord",
      "python.library.stdtypes#bytearray.decode"
    ],
    "clean_cmd": "[ord(c)  c  s.decode()]",
    "canonical_cmd": "[ord(c) for c in VAR_STR.decode('hex')]"
  },
  {
    "nl": "create pandas data frame `df` from txt file `filename.txt` with column `Region Name` and separator `;`",
    "cmd": "df = pd.read_csv('filename.txt', sep=';', names=['Region Name'])",
    "question_id": "41386443-67",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.read_csv"
    ],
    "clean_cmd": "df = pd.read_csv(, sep=, names=[])",
    "canonical_cmd": "VAR_STR = pd.read_csv('VAR_STR', sep='VAR_STR', names=['VAR_STR'])"
  },
  {
    "nl": "get a list of all the duplicate items in dataframe `df` using pandas",
    "cmd": "pd.concat(g for _, g in df.groupby('ID') if len(g) > 1)",
    "question_id": "14657241-69",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.groupby",
      "pandas.reference.api.pandas.concat",
      "python.library.functions#len"
    ],
    "clean_cmd": "pd.concat(g  _, g  df.groupby()  len(g) &gt; )",
    "canonical_cmd": "pd.concat(g for _, g in VAR_STR.groupby('ID') if len(g) > 1)"
  },
  {
    "nl": "sort objects in model `Profile` based on Theirs `reputation` attribute",
    "cmd": "sorted(Profile.objects.all(), key=lambda p: p.reputation)",
    "question_id": "930865-13",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.functions#all"
    ],
    "clean_cmd": "sorted(Profile.objects.all(), key= p: p.reputation)",
    "canonical_cmd": "sorted(VAR_STR.objects.all(), key=lambda p: p.VAR_STR)"
  },
  {
    "nl": "for a dictionary `a`, set default value for key `somekey` as list and append value `bob`  in that key",
    "cmd": "a.setdefault('somekey', []).append('bob')",
    "question_id": "20585920-51",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict.setdefault",
      "numpy.reference.generated.numpy.append"
    ],
    "clean_cmd": "a.setdefault(, []).append()",
    "canonical_cmd": "VAR_STR.setdefault('VAR_STR', []).append('VAR_STR')"
  },
  {
    "nl": "replace white spaces in string '  a\\n b\\n c\\nd  e' with empty string ''",
    "cmd": "re.sub('(?m)^[^\\\\S\\\\n]+', '', '  a\\n b\\n c\\nd  e')",
    "question_id": "3984539-10",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "re.sub(, , )",
    "canonical_cmd": "re.sub('(?m)^[^\\\\S\\\\n]+', 'VAR_STR', '  a\\n b\\n c\\nd  e')"
  },
  {
    "nl": "remove white spaces from all the lines using a regular expression in string 'a\\n b\\n c'",
    "cmd": "re.sub('(?m)^\\\\s+', '', 'a\\n b\\n c')",
    "question_id": "3984539-99",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "re.sub(, , )",
    "canonical_cmd": "re.sub('(?m)^\\\\s+', '', 'VAR_STR')"
  },
  {
    "nl": "sort dataframe `df` based on column 'b' in ascending and column 'c' in descending",
    "cmd": "df.sort_values(['b', 'c'], ascending=[True, False], inplace=True)",
    "question_id": "17141558-7",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.sort_values"
    ],
    "clean_cmd": "df.sort_values([, ], ascending=[True, False], inplace=True)",
    "canonical_cmd": "VAR_STR.sort_values(['VAR_STR', 'VAR_STR'], ascending=[True, False], inplace=True)"
  },
  {
    "nl": "sort dataframe `df` based on column 'a' in ascending and column 'b' in descending",
    "cmd": "df.sort_values(['a', 'b'], ascending=[True, False])",
    "question_id": "17141558-84",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.sort_values"
    ],
    "clean_cmd": "df.sort_values([, ], ascending=[True, False])",
    "canonical_cmd": "VAR_STR.sort_values(['VAR_STR', 'VAR_STR'], ascending=[True, False])"
  },
  {
    "nl": "sort a pandas data frame with column `a` in ascending and `b` in descending order",
    "cmd": "df1.sort(['a', 'b'], ascending=[True, False], inplace=True)",
    "question_id": "17141558-74",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#list.sort"
    ],
    "clean_cmd": "df1.sort([, ], ascending=[True, False], inplace=True)",
    "canonical_cmd": "df1.sort(['VAR_STR', 'VAR_STR'], ascending=[True, False], inplace=True)"
  },
  {
    "nl": "sort a pandas data frame by column `a` in ascending, and by column `b` in descending order",
    "cmd": "df.sort(['a', 'b'], ascending=[True, False])",
    "question_id": "17141558-89",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.index.sort"
    ],
    "clean_cmd": "df.sort([, ], ascending=[True, False])",
    "canonical_cmd": "df.sort(['VAR_STR', 'VAR_STR'], ascending=[True, False])"
  },
  {
    "nl": "create a symlink directory `D:\\\\testdirLink` for directory `D:\\\\testdir` with unicode support using ctypes library",
    "cmd": "kdll.CreateSymbolicLinkW('D:\\\\testdirLink', 'D:\\\\testdir', 1)",
    "question_id": "1447575-49",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "kdll.CreateSymbolicLinkW(, , )",
    "canonical_cmd": "kdll.CreateSymbolicLinkW('VAR_STR', 'VAR_STR', 1)"
  },
  {
    "nl": "Sort a list of dictionaries `mylist` by keys \"weight\" and \"factor\"",
    "cmd": "mylist.sort(key=operator.itemgetter('weight', 'factor'))",
    "question_id": "861190-38",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.operator#operator.itemgetter",
      "python.library.stdtypes#list.sort"
    ],
    "clean_cmd": "mylist.sort(key=operator.itemgetter(, ))",
    "canonical_cmd": "VAR_STR.sort(key=operator.itemgetter('VAR_STR', 'VAR_STR'))"
  },
  {
    "nl": "ordering a list of dictionaries `mylist` by elements 'weight' and 'factor'",
    "cmd": "mylist.sort(key=lambda d: (d['weight'], d['factor']))",
    "question_id": "861190-31",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#list.sort"
    ],
    "clean_cmd": "mylist.sort(key= d: (d[], d[]))",
    "canonical_cmd": "VAR_STR.sort(key=lambda d: (d['VAR_STR'], d['VAR_STR']))"
  },
  {
    "nl": "convert tuple elements in list `[(1,2),(3,4),(5,6),]` into lists",
    "cmd": "map(list, zip(*[(1, 2), (3, 4), (5, 6)]))",
    "question_id": "8081545-50",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip",
      "python.library.functions#map"
    ],
    "clean_cmd": "map(list, zip(*[(, ), (, ), (, )]))",
    "canonical_cmd": "map(list, zip(*[(1, 2), (3, 4), (5, 6)]))"
  },
  {
    "nl": "convert list of tuples to multiple lists in Python",
    "cmd": "map(list, zip(*[(1, 2), (3, 4), (5, 6)]))",
    "question_id": "8081545-98",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip",
      "python.library.functions#map"
    ],
    "clean_cmd": "map(list, zip(*[(, ), (, ), (, )]))",
    "canonical_cmd": "map(list, zip(*[(1, 2), (3, 4), (5, 6)]))"
  },
  {
    "nl": "convert list of tuples to multiple lists in Python",
    "cmd": "zip(*[(1, 2), (3, 4), (5, 6)])",
    "question_id": "8081545-96",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip"
    ],
    "clean_cmd": "zip(*[(, ), (, ), (, )])",
    "canonical_cmd": "zip(*[(1, 2), (3, 4), (5, 6)])"
  },
  {
    "nl": "execute os command `my_cmd`",
    "cmd": "os.system(my_cmd)",
    "question_id": "4965159-23",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.system"
    ],
    "clean_cmd": "os.system(my_cmd)",
    "canonical_cmd": "os.system(VAR_STR)"
  },
  {
    "nl": "derive the week start for the given week number and year \u20182011, 4, 0\u2019",
    "cmd": "datetime.datetime.strptime('2011, 4, 0', '%Y, %U, %w')",
    "question_id": "4793617-92",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.datetime#datetime.datetime.strptime"
    ],
    "clean_cmd": "datetime.datetime.strptime(, )",
    "canonical_cmd": "datetime.datetime.strptime('2011, 4, 0', '%Y, %U, %w')"
  },
  {
    "nl": "python selenium click on button '.button.c_button.s_button'",
    "cmd": "driver.find_element_by_css_selector('.button.c_button.s_button').click()",
    "question_id": "21350605-24",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "driver.find_element_by_css_selector().click()",
    "canonical_cmd": "driver.find_element_by_css_selector('VAR_STR').click()"
  },
  {
    "nl": "python selenium click on button",
    "cmd": "driver.find_element_by_css_selector('.button .c_button .s_button').click()",
    "question_id": "21350605-24",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "driver.find_element_by_css_selector().click()",
    "canonical_cmd": "driver.find_element_by_css_selector('.button .c_button .s_button').click()"
  },
  {
    "nl": "read CSV file 'my.csv' into a dataframe `df` with datatype of float for column 'my_column' considering character 'n/a' as NaN value",
    "cmd": "df = pd.read_csv('my.csv', dtype={'my_column': np.float64}, na_values=['n/a'])",
    "question_id": "30190459-31",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.read_csv"
    ],
    "clean_cmd": "df = pd.read_csv(, dtype={: np.float64}, na_values=[])",
    "canonical_cmd": "VAR_STR = pd.read_csv('VAR_STR', dtype={'VAR_STR': np.float64}, na_values=['VAR_STR'])"
  },
  {
    "nl": "convert nan values to \u2018n/a\u2019 while reading rows from a csv `read_csv` with pandas",
    "cmd": "df = pd.read_csv('my.csv', na_values=['n/a'])",
    "question_id": "30190459-15",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.read_csv"
    ],
    "clean_cmd": "df = pd.read_csv(, na_values=[])",
    "canonical_cmd": "df = pd.VAR_STR('my.csv', na_values=['n/a'])"
  },
  {
    "nl": "get indexes of all true boolean values from a list `bool_list`",
    "cmd": "[i for i, elem in enumerate(bool_list, 1) if elem]",
    "question_id": "13076560-88",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#enumerate"
    ],
    "clean_cmd": "[i  i, elem  enumerate(bool_list, )  elem]",
    "canonical_cmd": "[i for i, elem in enumerate(VAR_STR, 1) if elem]"
  },
  {
    "nl": "get a list `no_integers` of all the items in list `mylist` that are not of type `int`",
    "cmd": "no_integers = [x for x in mylist if not isinstance(x, int)]",
    "question_id": "3159155-97",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#isinstance"
    ],
    "clean_cmd": "no_integers = [x  x  mylist   isinstance(x, int)]",
    "canonical_cmd": "VAR_STR = [x for x in VAR_STR if not isinstance(x, VAR_STR)]"
  },
  {
    "nl": "concatenating values in `list1` to a string",
    "cmd": "str1 = ''.join(list1)",
    "question_id": "5618878-38",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": "str1 = .join(list1)",
    "canonical_cmd": "str1 = ''.join(VAR_STR)"
  },
  {
    "nl": "concatenating values in list `L` to a string, separate by space",
    "cmd": "' '.join((str(x) for x in L))",
    "question_id": "5618878-35",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": ".join((str(x)  x  L))",
    "canonical_cmd": "\"\"\" \"\"\".join(str(x) for x in VAR_STR)"
  },
  {
    "nl": "concatenating values in `list1` to a string",
    "cmd": "str1 = ''.join((str(e) for e in list1))",
    "question_id": "5618878-66",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": "str1 = .join((str(e)  e  list1))",
    "canonical_cmd": "str1 = ''.join(str(e) for e in VAR_STR)"
  },
  {
    "nl": "concatenating values in list `L` to a string",
    "cmd": "makeitastring = ''.join(map(str, L))",
    "question_id": "5618878-26",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#map",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": "makeitastring = .join(map(str, L))",
    "canonical_cmd": "makeitastring = ''.join(map(str, VAR_STR))"
  },
  {
    "nl": "find consecutive segments from a column 'A' in a pandas data frame 'df'",
    "cmd": "df.reset_index().groupby('A')['index'].apply(np.array)",
    "question_id": "14358567-16",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.reset_index",
      "pandas.reference.api.pandas.dataframe.apply",
      "pandas.reference.api.pandas.dataframe.groupby"
    ],
    "clean_cmd": "df.reset_index().groupby()[].apply(np.array)",
    "canonical_cmd": "VAR_STR.reset_index().groupby('VAR_STR')['index'].apply(np.array)"
  },
  {
    "nl": "place '\\' infront of each non-letter char in string `line`",
    "cmd": "print(re.sub('[_%^$]', '\\\\\\\\\\\\g<0>', line))",
    "question_id": "26155985-7",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "print(re.sub(, , line))",
    "canonical_cmd": "print(re.sub('[_%^$]', '\\\\\\\\\\\\g<0>', VAR_STR))"
  },
  {
    "nl": "sort a list of tuples `my_list` by second parameter in the tuple",
    "cmd": "my_list.sort(key=lambda x: x[1])",
    "question_id": "8459231-72",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#list.sort"
    ],
    "clean_cmd": "my_list.sort(key= x: x[])",
    "canonical_cmd": "VAR_STR.sort(key=lambda x: x[1])"
  },
  {
    "nl": "execute a file './abc.py' with arguments `arg1` and `arg2` in python shell",
    "cmd": "subprocess.call(['./abc.py', arg1, arg2])",
    "question_id": "5788891-79",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.subprocess#subprocess.call"
    ],
    "clean_cmd": "subprocess.call([, arg1, arg2])",
    "canonical_cmd": "subprocess.call(['VAR_STR', VAR_STR, VAR_STR])"
  },
  {
    "nl": "find the string matches within parenthesis from a string `s` using regex",
    "cmd": "m = re.search('\\\\[(\\\\w+)\\\\]', s)",
    "question_id": "8569201-37",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.search"
    ],
    "clean_cmd": "m = re.search(, s)",
    "canonical_cmd": "m = re.search('\\\\[(\\\\w+)\\\\]', VAR_STR)"
  },
  {
    "nl": "Add row `['8/19/2014', 'Jun', 'Fly', '98765']` to dataframe `df`",
    "cmd": "df.loc[len(df)] = ['8/19/2014', 'Jun', 'Fly', '98765']",
    "question_id": "19365513-49",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.loc",
      "python.library.functions#len"
    ],
    "clean_cmd": "df.loc[len(df)] = [, , , ]",
    "canonical_cmd": "VAR_STR.loc[len(VAR_STR)] = [VAR_STR]"
  },
  {
    "nl": "decode the string 'stringnamehere'  to UTF-8",
    "cmd": "stringnamehere.decode('utf-8', 'ignore')",
    "question_id": "4182603-40",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#bytearray.decode"
    ],
    "clean_cmd": "stringnamehere.decode(, )",
    "canonical_cmd": "VAR_STR.decode('utf-8', 'ignore')"
  },
  {
    "nl": "convert string `apple` from iso-8859-1/latin1 to utf-8",
    "cmd": "apple.decode('iso-8859-1').encode('utf8')",
    "question_id": "6539881-93",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.encode",
      "python.library.stdtypes#bytearray.decode"
    ],
    "clean_cmd": "apple.decode().encode()",
    "canonical_cmd": "VAR_STR.decode('iso-8859-1').encode('utf8')"
  },
  {
    "nl": "create datetime object from \"16sep2012\"",
    "cmd": "datetime.datetime.strptime('16Sep2012', '%d%b%Y')",
    "question_id": "18684397-80",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.datetime#datetime.datetime.strptime"
    ],
    "clean_cmd": "datetime.datetime.strptime(, )",
    "canonical_cmd": "datetime.datetime.strptime('16Sep2012', '%d%b%Y')"
  },
  {
    "nl": "add key \"item3\" and value \"3\" to dictionary `default_data `",
    "cmd": "default_data['item3'] = 3",
    "question_id": "6416131-30",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "default_data[] = ",
    "canonical_cmd": "VAR_STR['VAR_STR'] = 3"
  },
  {
    "nl": "add key \"item3\" and value \"3\" to dictionary `default_data `",
    "cmd": "default_data.update({'item3': 3, })",
    "question_id": "6416131-21",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict.update"
    ],
    "clean_cmd": "default_data.update({: , })",
    "canonical_cmd": "VAR_STR.update({'VAR_STR': 3})"
  },
  {
    "nl": "add key value pairs 'item4' , 4 and 'item5' , 5 to dictionary `default_data`",
    "cmd": "default_data.update({'item4': 4, 'item5': 5, })",
    "question_id": "6416131-97",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict.update"
    ],
    "clean_cmd": "default_data.update({: , : , })",
    "canonical_cmd": "VAR_STR.update({'VAR_STR': 4, 'VAR_STR': 5})"
  },
  {
    "nl": "split strings in list `l` on the first occurring tab `\\t` and enter only the first resulting substring in a new list",
    "cmd": "[i.split('\\t', 1)[0] for i in l]",
    "question_id": "6696027-18",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "[i.split(, )[]  i  l]",
    "canonical_cmd": "[i.split('VAR_STR', 1)[0] for i in VAR_STR]"
  },
  {
    "nl": "Split each string in list `myList` on the tab character",
    "cmd": "myList = [i.split('\\t')[0] for i in myList]",
    "question_id": "6696027-69",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "myList = [i.split()[]  i  myList]",
    "canonical_cmd": "VAR_STR = [i.split('\\t')[0] for i in VAR_STR]"
  },
  {
    "nl": "search for occurrences of regex pattern `pattern` in string `url`",
    "cmd": "print(pattern.search(url).group(1))",
    "question_id": "32792602-55",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.Pattern.search",
      "pygame.ref.sprite#pygame.sprite.Group"
    ],
    "clean_cmd": "print(pattern.search(url).group())",
    "canonical_cmd": "print(VAR_STR.search(VAR_STR).group(1))"
  },
  {
    "nl": "convert all of the items in a list `lst` to float",
    "cmd": "[float(i) for i in lst]",
    "question_id": "1614236-63",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#float"
    ],
    "clean_cmd": "[float(i)  i  lst]",
    "canonical_cmd": "[float(i) for i in VAR_STR]"
  },
  {
    "nl": "Change background color in Tkinter",
    "cmd": "root.configure(background='black')",
    "question_id": "2744795-17",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.tkinter.ttk#tkinter.ttk.Style.configure"
    ],
    "clean_cmd": "root.configure(background=)",
    "canonical_cmd": "root.configure(background='black')"
  },
  {
    "nl": "encode string `data` using hex 'hex' encoding",
    "cmd": "print(data.encode('hex'))",
    "question_id": "3059301-80",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.encode"
    ],
    "clean_cmd": "print(data.encode())",
    "canonical_cmd": "print(VAR_STR.encode('VAR_STR'))"
  },
  {
    "nl": "Return the decimal value for each hex character in data `data`",
    "cmd": "print(' '.join([str(ord(a)) for a in data]))",
    "question_id": "3059301-40",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#ord",
      "python.library.stdtypes#str",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": "print(.join([str(ord(a))  a  data]))",
    "canonical_cmd": "print(' '.join([str(ord(a)) for a in VAR_STR]))"
  },
  {
    "nl": "encode value of key `City` in dictionary `data` as `ascii`, ignoring non-ascii characters",
    "cmd": "data['City'].encode('ascii', 'ignore')",
    "question_id": "10264618-17",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.encode"
    ],
    "clean_cmd": "data[].encode(, )",
    "canonical_cmd": "VAR_STR['VAR_STR'].encode('VAR_STR', 'ignore')"
  },
  {
    "nl": "Get all matching patterns 'a.*?a' from a string 'a 1 a 2 a 3 a 4 a'.",
    "cmd": "re.findall('(?=(a.*?a))', 'a 1 a 2 a 3 a 4 a')",
    "question_id": "17467504-57",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "re.findall(, )",
    "canonical_cmd": "re.findall('(?=(a.*?a))', 'VAR_STR')"
  },
  {
    "nl": "select all rows in dataframe `df` where the values of column 'columnX' is bigger than or equal to `x` and smaller than or equal to `y`",
    "cmd": "df[(x <= df['columnX']) & (df['columnX'] <= y)]",
    "question_id": "40156469-42",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "df[(x &lt;= df[]) &amp; (df[] &lt;= y)]",
    "canonical_cmd": "VAR_STR[(VAR_STR <= VAR_STR['VAR_STR']) & (VAR_STR['VAR_STR'] <= VAR_STR)]"
  },
  {
    "nl": "remove parentheses only around single words in a string `s` using regex",
    "cmd": "re.sub('\\\\((\\\\w+)\\\\)', '\\\\1', s)",
    "question_id": "31405409-44",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "re.sub(, , s)",
    "canonical_cmd": "re.sub('\\\\((\\\\w+)\\\\)', '\\\\1', VAR_STR)"
  },
  {
    "nl": "add variable `var` to key 'f' of first element in JSON data `data`",
    "cmd": "data[0]['f'] = var",
    "question_id": "22296496-67",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "data[][] = var",
    "canonical_cmd": "VAR_STR[0]['VAR_STR'] = VAR_STR"
  },
  {
    "nl": "merge pandas dataframe `x` with columns 'a' and 'b' and dataframe `y` with column 'y'",
    "cmd": "pd.merge(y, x, on='k')[['a', 'b', 'y']]",
    "question_id": "20504881-46",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.merge"
    ],
    "clean_cmd": "pd.merge(y, x, on=)[[, , ]]",
    "canonical_cmd": "pd.merge(VAR_STR, VAR_STR, on='k')[['VAR_STR', 'VAR_STR', 'VAR_STR']]"
  },
  {
    "nl": "concatenate key/value pairs in dictionary `a` with string ', ' into a single string",
    "cmd": "\"\"\", \"\"\".join([(str(k) + ' ' + str(v)) for k, v in list(a.items())])",
    "question_id": "40512124-86",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str",
      "python.library.functions#list",
      "python.library.stdtypes#str.join",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": ".join([(str(k) +  + str(v))  k, v  list(a.items())])",
    "canonical_cmd": "\"\"\", \"\"\".join([(str(k) + ' ' + str(v)) for k, v in list(VAR_STR.items())])"
  },
  {
    "nl": "match regex pattern 'a*?bc*?' on string 'aabcc' with DOTALL enabled",
    "cmd": "re.findall('a*?bc*?', 'aabcc', re.DOTALL)",
    "question_id": "9507819-61",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "re.findall(, , re.DOTALL)",
    "canonical_cmd": "re.findall('VAR_STR', 'VAR_STR', re.DOTALL)"
  },
  {
    "nl": "list all files in directory \".\"",
    "cmd": "for (dirname, dirnames, filenames) in os.walk('.'):\n    for subdirname in dirnames:\n        print(os.path.join(dirname, subdirname))\n    for filename in filenames:\n        pass",
    "question_id": "120656-81",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os.path#os.path.join",
      "python.library.os#os.walk"
    ],
    "clean_cmd": " (dirname, dirnames, filenames)  os.walk(): subdirname  dirnames:print(os.path.join(dirname, subdirname)) filename  filenames:",
    "canonical_cmd": "for dirname, dirnames, filenames in os.walk('VAR_STR'):\n    for subdirname in dirnames:\n        print(os.path.join(dirname, subdirname))\n    for filename in filenames:\n        pass"
  },
  {
    "nl": "list all files in directory `path`",
    "cmd": "os.listdir(path)",
    "question_id": "120656-36",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.listdir"
    ],
    "clean_cmd": "os.listdir(path)",
    "canonical_cmd": "os.listdir(VAR_STR)"
  },
  {
    "nl": "split a `utf-8` encoded string `stru` into a list of characters",
    "cmd": "list(stru.decode('utf-8'))",
    "question_id": "18711384-26",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list",
      "python.library.stdtypes#bytearray.decode"
    ],
    "clean_cmd": "list(stru.decode())",
    "canonical_cmd": "list(VAR_STR.decode('VAR_STR'))"
  },
  {
    "nl": "convert a string `s` to its base-10 representation",
    "cmd": "int(s.encode('hex'), 16)",
    "question_id": "10716796-78",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#int",
      "python.library.stdtypes#str.encode"
    ],
    "clean_cmd": "int(s.encode(), )",
    "canonical_cmd": "int(VAR_STR.encode('hex'), 16)"
  },
  {
    "nl": "sort a zipped list `zipped` using lambda function",
    "cmd": "sorted(zipped, key=lambda x: x[1])",
    "question_id": "7142227-68",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted"
    ],
    "clean_cmd": "sorted(zipped, key= x: x[])",
    "canonical_cmd": "sorted(VAR_STR, key=lambda x: x[1])"
  },
  {
    "nl": "How do I sort a zipped list in Python?",
    "cmd": "zipped.sort(key=lambda t: t[1])",
    "question_id": "7142227-50",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#list.sort"
    ],
    "clean_cmd": "zipped.sort(key= t: t[])",
    "canonical_cmd": "zipped.sort(key=lambda t: t[1])"
  },
  {
    "nl": "print script's directory",
    "cmd": "print(os.path.dirname(os.path.realpath(__file__)))",
    "question_id": "4934806-50",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os.path#os.path.dirname",
      "python.library.os.path#os.path.realpath"
    ],
    "clean_cmd": "print(os.path.dirname(os.path.realpath(__file__)))",
    "canonical_cmd": "print(os.path.dirname(os.path.realpath(__file__)))"
  },
  {
    "nl": "find all the rows in Dataframe 'df2' that are also present in Dataframe 'df1', for the columns 'A', 'B', 'C' and 'D'.",
    "cmd": "pd.merge(df1, df2, on=['A', 'B', 'C', 'D'], how='inner')",
    "question_id": "29464234-50",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.merge"
    ],
    "clean_cmd": "pd.merge(df1, df2, on=[, , , ], how=)",
    "canonical_cmd": "pd.merge(VAR_STR, VAR_STR, on=['VAR_STR', 'VAR_STR', 'VAR_STR', 'VAR_STR'], how='inner')"
  },
  {
    "nl": "check if all of the following items in list `['a', 'b']` are in a list `['a', 'b', 'c']`",
    "cmd": "set(['a', 'b']).issubset(['a', 'b', 'c'])",
    "question_id": "3931541-49",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#set",
      "python.library.stdtypes#frozenset.issubset"
    ],
    "clean_cmd": "set([, ]).issubset([, , ])",
    "canonical_cmd": "set([VAR_STR]).issubset([VAR_STR])"
  },
  {
    "nl": "Check if all the items in a list `['a', 'b']` exists in another list `l`",
    "cmd": "set(['a', 'b']).issubset(set(l))",
    "question_id": "3931541-28",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#set"
    ],
    "clean_cmd": "set([, ]).issubset(set(l))",
    "canonical_cmd": "set([VAR_STR]).issubset(set(VAR_STR))"
  },
  {
    "nl": "split string `string` on whitespaces using a generator",
    "cmd": "return (x.group(0) for x in re.finditer(\"[A-Za-z']+\", string))",
    "question_id": "3862010-24",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.finditer",
      "python.library.re#re.Match.group"
    ],
    "clean_cmd": " (x.group()  x  re.finditer(, string))",
    "canonical_cmd": "return (x.group(0) for x in re.finditer(\"[A-Za-z']+\", VAR_STR))"
  },
  {
    "nl": "generate a list containing values associated with the key 'value' of each dictionary inside list `list_of_dicts`",
    "cmd": "[x['value'] for x in list_of_dicts]",
    "question_id": "7271482-45",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[x[]  x  list_of_dicts]",
    "canonical_cmd": "[x['VAR_STR'] for x in VAR_STR]"
  },
  {
    "nl": "python getting a list of value from list of dict",
    "cmd": "[d['value'] for d in l]",
    "question_id": "7271482-32",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[d[]  d  l]",
    "canonical_cmd": "[d['value'] for d in l]"
  },
  {
    "nl": "python getting a list of value from list of dict",
    "cmd": "[d['value'] for d in l if 'value' in d]",
    "question_id": "7271482-21",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[d[]  d  l    d]",
    "canonical_cmd": "[d['value'] for d in l if 'value' in d]"
  },
  {
    "nl": "BeautifulSoup find all 'tr' elements in HTML string `soup` at the five stride starting from the fourth element",
    "cmd": "rows = soup.findAll('tr')[4::5]",
    "question_id": "8724352-17",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "rows = soup.findAll()[::]",
    "canonical_cmd": "rows = VAR_STR.findAll('VAR_STR')[4::5]"
  },
  {
    "nl": "get a utf-8 string literal representation of byte string `x`",
    "cmd": "\"\"\"x = {}\"\"\".format(x.decode('utf8')).encode('utf8')",
    "question_id": "15390374-17",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#format",
      "python.library.stdtypes#str.encode",
      "python.library.stdtypes#bytearray.decode"
    ],
    "clean_cmd": ".format(x.decode()).encode()",
    "canonical_cmd": "\"\"\"x = {}\"\"\".format(VAR_STR.decode('utf8')).encode('utf8')"
  },
  {
    "nl": "extract elements at indices (1, 2, 5) from a list `a`",
    "cmd": "[a[i] for i in (1, 2, 5)]",
    "question_id": "2621674-32",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[a[i]  i  (, , )]",
    "canonical_cmd": "[VAR_STR[i] for i in (1, 2, 5)]"
  },
  {
    "nl": "sort list `a` using the first dimension of the element as the key to list `b`",
    "cmd": "a.sort(key=lambda x: b.index(x[0]))",
    "question_id": "12814667-69",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.index.sort"
    ],
    "clean_cmd": "a.sort(key= x: b.index(x[]))",
    "canonical_cmd": "VAR_STR.sort(key=lambda x: VAR_STR.index(x[0]))"
  },
  {
    "nl": "How to sort a list according to another list?",
    "cmd": "a.sort(key=lambda x_y: b.index(x_y[0]))",
    "question_id": "12814667-58",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.index.sort"
    ],
    "clean_cmd": "a.sort(key= x_y: b.index(x_y[]))",
    "canonical_cmd": "a.sort(key=lambda x_y: b.index(x_y[0]))"
  },
  {
    "nl": "Convert a Unicode string `title` to a 'ascii' string",
    "cmd": "unicodedata.normalize('NFKD', title).encode('ascii', 'ignore')",
    "question_id": "1207457-34",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.unicodedata#unicodedata.normalize",
      "python.library.stdtypes#str.encode"
    ],
    "clean_cmd": "unicodedata.normalize(, title).encode(, )",
    "canonical_cmd": "unicodedata.normalize('NFKD', VAR_STR).encode('VAR_STR', 'ignore')"
  },
  {
    "nl": "Convert a Unicode string `a` to a 'ascii' string",
    "cmd": "a.encode('ascii', 'ignore')",
    "question_id": "1207457-55",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.encode"
    ],
    "clean_cmd": "a.encode(, )",
    "canonical_cmd": "VAR_STR.encode('VAR_STR', 'ignore')"
  },
  {
    "nl": "delete all occureces of `8` in each string `s` in list `lst`",
    "cmd": "print([s.replace('8', '') for s in lst])",
    "question_id": "8282553-31",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.replace"
    ],
    "clean_cmd": "print([s.replace(, )  s  lst])",
    "canonical_cmd": "print([VAR_STR.replace('VAR_STR', '') for VAR_STR in VAR_STR])"
  },
  {
    "nl": "replace values of dataframe `df` with True if numeric",
    "cmd": "df.applymap(lambda x: isinstance(x, (int, float)))",
    "question_id": "21771133-50",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.applymap",
      "python.library.functions#isinstance"
    ],
    "clean_cmd": "df.applymap( x: isinstance(x, (int, float)))",
    "canonical_cmd": "VAR_STR.applymap(lambda x: isinstance(x, (int, float)))"
  },
  {
    "nl": "convert values in dictionary `d` into integers",
    "cmd": "{k: int(v) for k, v in d.items()}",
    "question_id": "9224385-40",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#int",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "{k: int(v)  k, v  d.items()}",
    "canonical_cmd": "{k: int(v) for k, v in VAR_STR.items()}"
  },
  {
    "nl": "generate the combinations of 3 from a set `{1, 2, 3, 4}`",
    "cmd": "print(list(itertools.combinations({1, 2, 3, 4}, 3)))",
    "question_id": "10115967-39",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.itertools#itertools.combinations",
      "python.library.functions#list"
    ],
    "clean_cmd": "print(list(itertools.combinations({, , , }, )))",
    "canonical_cmd": "print(list(itertools.combinations({VAR_STR}, 3)))"
  },
  {
    "nl": "double each character in string `text.read()`",
    "cmd": "re.sub('(.)', '\\\\1\\\\1', text.read(), 0, re.S)",
    "question_id": "15175142-92",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub",
      "python.library.os#os.read"
    ],
    "clean_cmd": "re.sub(, , text.read(), , re.S)",
    "canonical_cmd": "re.sub('(.)', '\\\\1\\\\1', text.read(), 0, re.S)"
  },
  {
    "nl": "split a string `a , b; cdf`  using both commas and semicolons as delimeters",
    "cmd": "re.split('\\\\s*,\\\\s*|\\\\s*;\\\\s*', 'a , b; cdf')",
    "question_id": "4697006-77",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.split"
    ],
    "clean_cmd": "re.split(, )",
    "canonical_cmd": "re.split('\\\\s*,\\\\s*|\\\\s*;\\\\s*', 'VAR_STR')"
  },
  {
    "nl": "Split a string `string` by multiple separators `,` and `;`",
    "cmd": "[t.strip() for s in string.split(',') for t in s.split(';')]",
    "question_id": "4697006-83",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "[t.strip()  s  string.split()  t  s.split()]",
    "canonical_cmd": "[t.strip() for s in VAR_STR.split('VAR_STR') for t in s.split('VAR_STR')]"
  },
  {
    "nl": "sort list `trial_list` based on values of dictionary `trail_dict`",
    "cmd": "sorted(trial_list, key=lambda x: trial_dict[x])",
    "question_id": "12987178-6",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted"
    ],
    "clean_cmd": "sorted(trial_list, key= x: trial_dict[x])",
    "canonical_cmd": "sorted(VAR_STR, key=lambda x: trial_dict[x])"
  },
  {
    "nl": "merge a list of integers `[1, 2, 3, 4, 5]` into a single integer",
    "cmd": "from functools import reduce\nreduce(lambda x, y: 10 * x + y, [1, 2, 3, 4, 5])",
    "question_id": "4299741-5",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "from functools import reducereduce( x, y:  * x + y, [, , , , ])",
    "canonical_cmd": "from functools import reduce\nreduce(lambda x, y: 10 * x + y, [VAR_STR])"
  },
  {
    "nl": "Get all object attributes of object `obj`",
    "cmd": "print((obj.__dict__))",
    "question_id": "6886493-12",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "print((obj.__dict__))",
    "canonical_cmd": "print(VAR_STR.__dict__)"
  },
  {
    "nl": "Get all object attributes of an object",
    "cmd": "dir()",
    "question_id": "6886493-89",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#dir"
    ],
    "clean_cmd": "dir()",
    "canonical_cmd": "dir()"
  },
  {
    "nl": "Get all object attributes of an object",
    "cmd": "dir()",
    "question_id": "6886493-2",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#dir"
    ],
    "clean_cmd": "dir()",
    "canonical_cmd": "dir()"
  },
  {
    "nl": "create a list containing the multiplication of each elements at the same index of list `lista` and list `listb`",
    "cmd": "[(a * b) for a, b in zip(lista, listb)]",
    "question_id": "10271484-8",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip"
    ],
    "clean_cmd": "[(a * b)  a, b  zip(lista, listb)]",
    "canonical_cmd": "[(a * b) for a, b in zip(VAR_STR, VAR_STR)]"
  },
  {
    "nl": "append a path `/path/to/main_folder` in system path",
    "cmd": "sys.path.append('/path/to/main_folder')",
    "question_id": "9153527-61",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.append"
    ],
    "clean_cmd": "sys.path.append()",
    "canonical_cmd": "sys.path.append('VAR_STR')"
  },
  {
    "nl": "check if any of the items in  `search` appear in `string`",
    "cmd": "any(x in string for x in search)",
    "question_id": "5858916-91",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#any"
    ],
    "clean_cmd": "any(x  string  x  search)",
    "canonical_cmd": "any(x in VAR_STR for x in VAR_STR)"
  },
  {
    "nl": "select the first row grouped per level 0 of dataframe `df`",
    "cmd": "df.groupby(level=0, as_index=False).nth(0)",
    "question_id": "42747987-29",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.groupby",
      "pandas.reference.api.pandas.core.groupby.groupby.nth"
    ],
    "clean_cmd": "df.groupby(level=, as_index=False).nth()",
    "canonical_cmd": "VAR_STR.groupby(level=0, as_index=False).nth(0)"
  },
  {
    "nl": "Escape character '}' in string '{0}:<15}}{1}:<15}}{2}:<8}}' while using function `format` with arguments `('1', '2', '3')`",
    "cmd": "print('{0}:<15}}{1}:<15}}{2}:<8}}'.format('1', '2', '3'))",
    "question_id": "9079540-32",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#format"
    ],
    "clean_cmd": "print(.format(, , ))",
    "canonical_cmd": "print('VAR_STR'.VAR_STR(VAR_STR))"
  },
  {
    "nl": "Join data of dataframe `df1` with data in dataframe `df2` based on similar values of column 'user_id' in both dataframes",
    "cmd": "s1 = pd.merge(df1, df2, how='inner', on=['user_id'])",
    "question_id": "19618912-23",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.merge"
    ],
    "clean_cmd": "s1 = pd.merge(df1, df2, how=, on=[])",
    "canonical_cmd": "s1 = pd.merge(VAR_STR, VAR_STR, how='inner', on=['VAR_STR'])"
  },
  {
    "nl": "parse string '2015/01/01 12:12am' to DateTime object using format '%Y/%m/%d %I:%M%p'",
    "cmd": "datetime.strptime('2015/01/01 12:12am', '%Y/%m/%d %I:%M%p')",
    "question_id": "25279993-12",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.datetime#datetime.datetime.strptime"
    ],
    "clean_cmd": "datetime.strptime(, )",
    "canonical_cmd": "datetime.strptime('VAR_STR', 'VAR_STR')"
  },
  {
    "nl": "group dataframe `data` entries by year value of the date in column 'date'",
    "cmd": "data.groupby(data['date'].map(lambda x: x.year))",
    "question_id": "11391969-89",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#map",
      "pandas.reference.groupby"
    ],
    "clean_cmd": "data.groupby(data[].map( x: x.year))",
    "canonical_cmd": "VAR_STR.groupby(VAR_STR['VAR_STR'].map(lambda x: x.year))"
  },
  {
    "nl": "change the size of the sci notation to '30' above the y axis in matplotlib `plt`",
    "cmd": "plt.rc('font', **{'size': '30'})",
    "question_id": "14159753-79",
    "cmd_name": "conala",
    "oracle_man": [
      "matplotlib._as_gen.matplotlib.pyplot.rc"
    ],
    "clean_cmd": "plt.rc(, **{: })",
    "canonical_cmd": "VAR_STR.rc('font', **{'size': 'VAR_STR'})"
  },
  {
    "nl": "get all column name of dataframe `df` except for column 'T1_V6'",
    "cmd": "df[df.columns - ['T1_V6']]",
    "question_id": "32032836-96",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "df[df.columns - []]",
    "canonical_cmd": "VAR_STR[VAR_STR.columns - ['VAR_STR']]"
  },
  {
    "nl": "removing key values pairs with key 'mykey1' from a list of dictionaries `mylist`",
    "cmd": "[{k: v for k, v in d.items() if k != 'mykey1'} for d in mylist]",
    "question_id": "13254241-96",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "[{k: v  k, v  d.items()  k != }  d  mylist]",
    "canonical_cmd": "[{k: v for k, v in d.items() if k != 'VAR_STR'} for d in VAR_STR]"
  },
  {
    "nl": "Removing key values pairs from a list of dictionaries",
    "cmd": "[dict((k, v) for k, v in d.items() if k != 'mykey1') for d in mylist]",
    "question_id": "13254241-96",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "[dict((k, v)  k, v  d.items()  k != )  d  mylist]",
    "canonical_cmd": "[dict((k, v) for k, v in d.items() if k != 'mykey1') for d in mylist]"
  },
  {
    "nl": "strip the string `.txt` from anywhere in the string `Boat.txt.txt`",
    "cmd": "\"\"\"Boat.txt.txt\"\"\".replace('.txt', '')",
    "question_id": "18723580-43",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.replace"
    ],
    "clean_cmd": ".replace(, )",
    "canonical_cmd": "\"\"\"VAR_STR\"\"\".replace('VAR_STR', '')"
  },
  {
    "nl": "split string \"0,1,2\" based on delimiter ','",
    "cmd": "\"\"\"0,1,2\"\"\".split(',')",
    "question_id": "2168123-74",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": ".split()",
    "canonical_cmd": "\"\"\"VAR_STR\"\"\".split('VAR_STR')"
  },
  {
    "nl": "convert the string '0,1,2' to a list of integers",
    "cmd": "[int(x) for x in '0,1,2'.split(',')]",
    "question_id": "2168123-3",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#int",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "[int(x)  x  .split()]",
    "canonical_cmd": "[int(x) for x in 'VAR_STR'.split(',')]"
  },
  {
    "nl": "run function 'SudsMove' simultaneously",
    "cmd": "threading.Thread(target=SudsMove).start()",
    "question_id": "2108126-46",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.threading#threading.Thread",
      "python.library.threading#threading.Thread.start"
    ],
    "clean_cmd": "threading.Thread(target=SudsMove).start()",
    "canonical_cmd": "threading.Thread(target=VAR_STR).start()"
  },
  {
    "nl": "execute raw sql queue '<sql here>' in database `db` in sqlalchemy-flask app",
    "cmd": "result = db.engine.execute('<sql here>')",
    "question_id": "17972020-38",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.msilib#msilib.View.Execute"
    ],
    "clean_cmd": "result = db.engine.execute()",
    "canonical_cmd": "result = VAR_STR.engine.execute('VAR_STR')"
  },
  {
    "nl": "What is the most pythonic way to exclude elements of a list that start with a specific character?",
    "cmd": "[x for x in my_list if not x.startswith('#')]",
    "question_id": "11791568-17",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.startswith"
    ],
    "clean_cmd": "[x  x  my_list   x.startswith()]",
    "canonical_cmd": "[x for x in my_list if not x.startswith('#')]"
  },
  {
    "nl": "Replace `;` with `:` in a string `line`",
    "cmd": "line = line.replace(';', ':')",
    "question_id": "12723751-59",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.replace"
    ],
    "clean_cmd": "line = line.replace(, )",
    "canonical_cmd": "VAR_STR = VAR_STR.replace('VAR_STR', 'VAR_STR')"
  },
  {
    "nl": "get a list of last trailing words from another list  of strings`Original_List`",
    "cmd": "new_list = [x.split()[-1] for x in Original_List]",
    "question_id": "40535203-57",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "new_list = [x.split()[-]  x  Original_List]",
    "canonical_cmd": "new_list = [x.split()[-1] for x in VAR_STR]"
  },
  {
    "nl": "set the value in column 'B' to NaN if the corresponding value in column 'A' is equal to 0 in pandas dataframe `df`",
    "cmd": "df.ix[df.A == 0, 'B'] = np.nan",
    "question_id": "12307099-72",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "df.ix[df.A == , ] = np.nan",
    "canonical_cmd": "VAR_STR.ix[VAR_STR.VAR_STR == 0, 'VAR_STR'] = np.nan"
  },
  {
    "nl": "reset index to default in dataframe `df`",
    "cmd": "df = df.reset_index(drop=True)",
    "question_id": "20490274-12",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.reset_index"
    ],
    "clean_cmd": "df = df.reset_index(drop=True)",
    "canonical_cmd": "VAR_STR = VAR_STR.reset_index(drop=True)"
  },
  {
    "nl": "create a list containing digits of number 123 as its elements",
    "cmd": "list(str(123))",
    "question_id": "13905936-21",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list",
      "python.library.stdtypes#str"
    ],
    "clean_cmd": "list(str())",
    "canonical_cmd": "list(str(123))"
  },
  {
    "nl": "converting integer `num` to list",
    "cmd": "[int(x) for x in str(num)]",
    "question_id": "13905936-57",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#int",
      "python.library.stdtypes#str"
    ],
    "clean_cmd": "[int(x)  x  str(num)]",
    "canonical_cmd": "[int(x) for x in str(VAR_STR)]"
  },
  {
    "nl": "convert generator object to a dictionary",
    "cmd": "{i: (i * 2) for i in range(10)}",
    "question_id": "17815945-94",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#range"
    ],
    "clean_cmd": "{i: (i * )  i  range()}",
    "canonical_cmd": "{i: (i * 2) for i in range(10)}"
  },
  {
    "nl": "convert generator object to a dictionary",
    "cmd": "dict((i, i * 2) for i in range(10))",
    "question_id": "17815945-66",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#range",
      "python.library.stdtypes#dict"
    ],
    "clean_cmd": "dict((i, i * )  i  range())",
    "canonical_cmd": "dict((i, i * 2) for i in range(10))"
  },
  {
    "nl": "skip the newline while printing `line`",
    "cmd": "print(line.rstrip('\\n'))",
    "question_id": "17027690-44",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.rstrip"
    ],
    "clean_cmd": "print(line.rstrip())",
    "canonical_cmd": "print(VAR_STR.rstrip('\\n'))"
  },
  {
    "nl": "move dictionaries in list `lst` to the end of the list if value of key 'language' in each dictionary is not equal to 'en'",
    "cmd": "sorted(lst, key=lambda x: x['language'] != 'en')",
    "question_id": "42364593-66",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted"
    ],
    "clean_cmd": "sorted(lst, key= x: x[] != )",
    "canonical_cmd": "sorted(VAR_STR, key=lambda x: x['VAR_STR'] != 'VAR_STR')"
  },
  {
    "nl": "writing string 'text to write\\n' to file `f`",
    "cmd": "f.write('text to write\\n')",
    "question_id": "2918362-2",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.write"
    ],
    "clean_cmd": "f.write()",
    "canonical_cmd": "VAR_STR.write('VAR_STR')"
  },
  {
    "nl": "Write a string `My String` to a file `file` including new line character",
    "cmd": "file.write('My String\\n')",
    "question_id": "2918362-26",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.write"
    ],
    "clean_cmd": "file.write()",
    "canonical_cmd": "VAR_STR.write('My String\\n')"
  },
  {
    "nl": "use regex pattern '((.+?)\\\\2+)' to split string '44442(2)2(2)44'",
    "cmd": "[m[0] for m in re.compile('((.+?)\\\\2+)').findall('44442(2)2(2)44')]",
    "question_id": "40582103-88",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.compile",
      "python.library.re#re.findall"
    ],
    "clean_cmd": "[m[]  m  re.compile().findall()]",
    "canonical_cmd": "[m[0] for m in re.compile('VAR_STR').findall('VAR_STR')]"
  },
  {
    "nl": "use regular expression '((\\\\d)(?:[()]*\\\\2*[()]*)*)' to split string `s`",
    "cmd": "[i[0] for i in re.findall('((\\\\d)(?:[()]*\\\\2*[()]*)*)', s)]",
    "question_id": "40582103-64",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "[i[]  i  re.findall(, s)]",
    "canonical_cmd": "[i[0] for i in re.findall('VAR_STR', VAR_STR)]"
  },
  {
    "nl": "add character '@' after word 'get' in string `text`",
    "cmd": "text = re.sub('(\\\\bget\\\\b)', '\\\\1@', text)",
    "question_id": "20735384-98",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "text = re.sub(, , text)",
    "canonical_cmd": "VAR_STR = re.sub('(\\\\bget\\\\b)', '\\\\1@', VAR_STR)"
  },
  {
    "nl": "serialize dictionary `d` as a JSON formatted string with each key formatted to pattern '%d,%d'",
    "cmd": "simplejson.dumps(dict([('%d,%d' % k, v) for k, v in list(d.items())]))",
    "question_id": "715550-30",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict",
      "python.library.functions#list",
      "python.library.stdtypes#dict.items",
      "python.library.json#json.dumps"
    ],
    "clean_cmd": "simplejson.dumps(dict([( % k, v)  k, v  list(d.items())]))",
    "canonical_cmd": "simplejson.dumps(dict([('VAR_STR' % k, v) for k, v in list(VAR_STR.items())]))"
  },
  {
    "nl": "decode escape sequences in string `myString`",
    "cmd": "myString.decode('string_escape')",
    "question_id": "4020539-78",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#bytearray.decode"
    ],
    "clean_cmd": "myString.decode()",
    "canonical_cmd": "VAR_STR.decode('string_escape')"
  },
  {
    "nl": "remove colon character surrounded by vowels letters in string `word`",
    "cmd": "word = re.sub('([aeiou]):(([aeiou][^aeiou]*){3})$', '\\\\1\\\\2', word)",
    "question_id": "33724111-3",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "word = re.sub(, , word)",
    "canonical_cmd": "VAR_STR = re.sub('([aeiou]):(([aeiou][^aeiou]*){3})$', '\\\\1\\\\2', VAR_STR)"
  },
  {
    "nl": "group dataframe `df` based on minute interval",
    "cmd": "df.groupby(df.index.map(lambda t: t.minute))",
    "question_id": "11073609-16",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.groupby",
      "pandas.reference.api.pandas.index.map"
    ],
    "clean_cmd": "df.groupby(df.index.map( t: t.minute))",
    "canonical_cmd": "VAR_STR.groupby(VAR_STR.index.map(lambda t: t.minute))"
  },
  {
    "nl": "Execute a post request to url `http://httpbin.org/post` with json data `{'test': 'cheers'}`",
    "cmd": "requests.post('http://httpbin.org/post', json={'test': 'cheers'})",
    "question_id": "9733638-44",
    "cmd_name": "conala",
    "oracle_man": [
      "pygame.ref.fastevent#pygame.fastevent.post"
    ],
    "clean_cmd": "requests.post(, json={: })",
    "canonical_cmd": "requests.post('VAR_STR', json={VAR_STR})"
  },
  {
    "nl": "delete all digits in string `s` that are not directly attached to a word character",
    "cmd": "re.sub('$\\\\d+\\\\W+|\\\\b\\\\d+\\\\b|\\\\W+\\\\d+$', '', s)",
    "question_id": "817122-45",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "re.sub(, , s)",
    "canonical_cmd": "re.sub('$\\\\d+\\\\W+|\\\\b\\\\d+\\\\b|\\\\W+\\\\d+$', '', VAR_STR)"
  },
  {
    "nl": "delete digits at the end of string `s`",
    "cmd": "re.sub('\\\\b\\\\d+\\\\b', '', s)",
    "question_id": "817122-20",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "re.sub(, , s)",
    "canonical_cmd": "re.sub('\\\\b\\\\d+\\\\b', '', VAR_STR)"
  },
  {
    "nl": "Delete self-contained digits from string `s`",
    "cmd": "s = re.sub('^\\\\d+\\\\s|\\\\s\\\\d+\\\\s|\\\\s\\\\d+$', ' ', s)",
    "question_id": "817122-0",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "s = re.sub(, , s)",
    "canonical_cmd": "VAR_STR = re.sub('^\\\\d+\\\\s|\\\\s\\\\d+\\\\s|\\\\s\\\\d+$', ' ', VAR_STR)"
  },
  {
    "nl": "get the maximum 2 values per row in array `A`",
    "cmd": "A[:, -2:]",
    "question_id": "37125495-30",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "A[:, -:]",
    "canonical_cmd": "VAR_STR[:, -2:]"
  },
  {
    "nl": "convert date `my_date` to datetime",
    "cmd": "datetime.datetime.combine(my_date, datetime.time.min)",
    "question_id": "15661013-5",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.datetime#datetime.datetime.combine"
    ],
    "clean_cmd": "datetime.datetime.combine(my_date, datetime.time.min)",
    "canonical_cmd": "datetime.datetime.combine(VAR_STR, datetime.time.min)"
  },
  {
    "nl": "append line \"appended text\" to file \"test.txt\"",
    "cmd": "with open('test.txt', 'a') as myfile:\n    myfile.write('appended text')",
    "question_id": "4706499-19",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.request#open",
      "python.library.os#os.write"
    ],
    "clean_cmd": " open(, )  myfile:myfile.write()",
    "canonical_cmd": "with open('VAR_STR', 'a') as myfile:\n    myfile.write('VAR_STR')"
  },
  {
    "nl": "append line \"cool beans...\" to file \"foo\"",
    "cmd": "with open('foo', 'a') as f:\n    f.write('cool beans...')",
    "question_id": "4706499-84",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.request#open",
      "python.library.os#os.write"
    ],
    "clean_cmd": " open(, )  f:f.write()",
    "canonical_cmd": "with open('VAR_STR', 'a') as f:\n    f.write('VAR_STR')"
  },
  {
    "nl": "append to file 'test1' content 'koko'",
    "cmd": "with open('test1', 'ab') as f:\n    pass",
    "question_id": "4706499-88",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.request#open"
    ],
    "clean_cmd": " open(, )  f:",
    "canonical_cmd": "with open('VAR_STR', 'ab') as f:\n    pass"
  },
  {
    "nl": "append to file 'test' content 'koko'",
    "cmd": "open('test', 'a+b').write('koko')",
    "question_id": "4706499-97",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.request#open",
      "python.library.os#os.write"
    ],
    "clean_cmd": "open(, ).write()",
    "canonical_cmd": "open('VAR_STR', 'a+b').write('VAR_STR')"
  },
  {
    "nl": "get the name of function `func` as a string",
    "cmd": "print(func.__name__)",
    "question_id": "7142062-27",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "print(func.__name__)",
    "canonical_cmd": "print(VAR_STR.__name__)"
  },
  {
    "nl": "do a boolean check if a string `lestring` contains any of the items in list `lelist`",
    "cmd": "any(e in lestring for e in lelist)",
    "question_id": "14411633-100",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#any"
    ],
    "clean_cmd": "any(e  lestring  e  lelist)",
    "canonical_cmd": "any(e in VAR_STR for e in VAR_STR)"
  },
  {
    "nl": "combine two columns `foo` and `bar` in a pandas data frame",
    "cmd": "pandas.concat([df['foo'].dropna(), df['bar'].dropna()]).reindex_like(df)",
    "question_id": "10972410-96",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.concat",
      "pandas.reference.api.pandas.index.dropna",
      "pandas.reference.api.pandas.series.reindex_like"
    ],
    "clean_cmd": "pandas.concat([df[].dropna(), df[].dropna()]).reindex_like(df)",
    "canonical_cmd": "pandas.concat([df['VAR_STR'].dropna(), df['VAR_STR'].dropna()]).reindex_like(df)"
  },
  {
    "nl": "sort a list of dictionaries `a` by dictionary values in descending order",
    "cmd": "sorted(a, key=lambda i: list(i.values())[0], reverse=True)",
    "question_id": "10915391-10",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.functions#list",
      "python.library.stdtypes#dict.values"
    ],
    "clean_cmd": "sorted(a, key= i: list(i.values())[], reverse=True)",
    "canonical_cmd": "sorted(VAR_STR, key=lambda i: list(i.values())[0], reverse=True)"
  },
  {
    "nl": "sorting a list of dictionary `a` by values in descending order",
    "cmd": "sorted(a, key=dict.values, reverse=True)",
    "question_id": "10915391-39",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted"
    ],
    "clean_cmd": "sorted(a, key=dict.values, reverse=True)",
    "canonical_cmd": "sorted(VAR_STR, key=dict.values, reverse=True)"
  },
  {
    "nl": "split string `s` by '@' and get the first element",
    "cmd": "s.split('@')[0]",
    "question_id": "15210485-43",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "s.split()[]",
    "canonical_cmd": "VAR_STR.split('VAR_STR')[0]"
  },
  {
    "nl": "Add a tuple with value `another_choice` to a tuple `my_choices`",
    "cmd": "final_choices = ((another_choice,) + my_choices)",
    "question_id": "3523048-94",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "final_choices = ((another_choice,) + my_choices)",
    "canonical_cmd": "final_choices = (VAR_STR,) + VAR_STR"
  },
  {
    "nl": "Add a tuple with value `another_choice` to a tuple `my_choices`",
    "cmd": "final_choices = ((another_choice,) + my_choices)",
    "question_id": "3523048-81",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "final_choices = ((another_choice,) + my_choices)",
    "canonical_cmd": "final_choices = (VAR_STR,) + VAR_STR"
  },
  {
    "nl": "pair each element in list `it` 3 times into a tuple",
    "cmd": "zip(it, it, it)",
    "question_id": "23286254-50",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip"
    ],
    "clean_cmd": "zip(it, it, it)",
    "canonical_cmd": "zip(VAR_STR, VAR_STR, VAR_STR)"
  },
  {
    "nl": "store integer 3, 4, 1 and 2 in a list",
    "cmd": "[3, 4, 1, 2]",
    "question_id": "19672101-46",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[, , , ]",
    "canonical_cmd": "[3, 4, 1, 2]"
  },
  {
    "nl": "Sorting while preserving order in python",
    "cmd": "sorted(enumerate(a), key=lambda x: x[1])",
    "question_id": "3728017-8",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.functions#enumerate"
    ],
    "clean_cmd": "sorted(enumerate(a), key= x: x[])",
    "canonical_cmd": "sorted(enumerate(a), key=lambda x: x[1])"
  },
  {
    "nl": "get list of sums of neighboring integers in string `example`",
    "cmd": "[sum(map(int, s)) for s in example.split()]",
    "question_id": "40498088-80",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#map",
      "python.library.functions#sum",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "[sum(map(int, s))  s  example.split()]",
    "canonical_cmd": "[sum(map(int, s)) for s in VAR_STR.split()]"
  },
  {
    "nl": "split column 'AB' in dataframe `df` into two columns by first whitespace ' '",
    "cmd": "df['AB'].str.split(' ', 1, expand=True)",
    "question_id": "14745022-45",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "df[].str.split(, , expand=True)",
    "canonical_cmd": "VAR_STR['VAR_STR'].str.split(' ', 1, expand=True)"
  },
  {
    "nl": "pandas dataframe, how do i split a column 'AB' into two 'A' and 'B' on delimiter ' '",
    "cmd": "df['A'], df['B'] = df['AB'].str.split(' ', 1).str",
    "question_id": "14745022-49",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "df[], df[] = df[].str.split(, ).str",
    "canonical_cmd": "df['VAR_STR'], df['VAR_STR'] = df['VAR_STR'].str.split(' ', 1).str"
  },
  {
    "nl": "get multiple parameters with same name from a url in pylons",
    "cmd": "request.params.getall('c')",
    "question_id": "14734750-16",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "request.params.getall()",
    "canonical_cmd": "request.params.getall('c')"
  },
  {
    "nl": "match contents of an element to 'Example' in xpath (lxml)",
    "cmd": "tree.xpath(\".//a[text()='Example']\")[0].tag",
    "question_id": "2637760-10",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "tree.xpath()[].tag",
    "canonical_cmd": "tree.xpath(\".//a[text()='Example']\")[0].tag"
  },
  {
    "nl": "find rows of 2d array in 3d numpy array 'arr' if the row has value '[[0, 3], [3, 0]]'",
    "cmd": "np.argwhere(np.all(arr == [[0, 3], [3, 0]], axis=(1, 2)))",
    "question_id": "36381230-4",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.argwhere",
      "numpy.reference.generated.numpy.all"
    ],
    "clean_cmd": "np.argwhere(np.all(arr == [[, ], [, ]], axis=(, )))",
    "canonical_cmd": "np.argwhere(np.all(VAR_STR == [VAR_STR], axis=(1, 2)))"
  },
  {
    "nl": "How to convert a date string '2013-1-25' in format '%Y-%m-%d' to different format '%m/%d/%y'",
    "cmd": "datetime.datetime.strptime('2013-1-25', '%Y-%m-%d').strftime('%m/%d/%y')",
    "question_id": "14524322-31",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.datetime#datetime.datetime.strptime",
      "python.library.datetime#datetime.datetime.strftime"
    ],
    "clean_cmd": "datetime.datetime.strptime(, ).strftime()",
    "canonical_cmd": "datetime.datetime.strptime('VAR_STR', 'VAR_STR').strftime('VAR_STR')"
  },
  {
    "nl": "convert a date string '2013-1-25' in format '%Y-%m-%d' to different format '%-m/%d/%y'",
    "cmd": "datetime.datetime.strptime('2013-1-25', '%Y-%m-%d').strftime('%-m/%d/%y')",
    "question_id": "14524322-31",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.datetime#datetime.datetime.strptime",
      "python.library.datetime#datetime.datetime.strftime"
    ],
    "clean_cmd": "datetime.datetime.strptime(, ).strftime()",
    "canonical_cmd": "datetime.datetime.strptime('VAR_STR', 'VAR_STR').strftime('VAR_STR')"
  },
  {
    "nl": "find the count of a word 'Hello' in a string `input_string`",
    "cmd": "input_string.count('Hello')",
    "question_id": "11300383-68",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.count"
    ],
    "clean_cmd": "input_string.count()",
    "canonical_cmd": "VAR_STR.count('VAR_STR')"
  },
  {
    "nl": "count the number of trailing question marks in string `my_text`",
    "cmd": "len(my_text) - len(my_text.rstrip('?'))",
    "question_id": "42178481-4",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len",
      "python.library.stdtypes#str.rstrip"
    ],
    "clean_cmd": "len(my_text) - len(my_text.rstrip())",
    "canonical_cmd": "len(VAR_STR) - len(VAR_STR.rstrip('?'))"
  },
  {
    "nl": "extract dictionary `d` from list `a` where the value associated with the key 'name' of dictionary `d` is equal to 'pluto'",
    "cmd": "[d for d in a if d['name'] == 'pluto']",
    "question_id": "7900882-78",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[d  d  a  d[] == ]",
    "canonical_cmd": "[VAR_STR for VAR_STR in VAR_STR if VAR_STR['VAR_STR'] == 'VAR_STR']"
  },
  {
    "nl": "extract dictionary from list of dictionaries based on a key's value.",
    "cmd": "[d for d in a if d['name'] == 'pluto']",
    "question_id": "7900882-19",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[d  d  a  d[] == ]",
    "canonical_cmd": "[d for d in a if d['name'] == 'pluto']"
  },
  {
    "nl": "get second array column length of array `a`",
    "cmd": "a.shape[1]",
    "question_id": "7670226-12",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "a.shape[]",
    "canonical_cmd": "VAR_STR.shape[1]"
  },
  {
    "nl": "convert the dataframe column 'col' from string types to datetime types",
    "cmd": "df['col'] = pd.to_datetime(df['col'])",
    "question_id": "17134716-54",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.to_datetime"
    ],
    "clean_cmd": "df[] = pd.to_datetime(df[])",
    "canonical_cmd": "df['VAR_STR'] = pd.to_datetime(df['VAR_STR'])"
  },
  {
    "nl": "remove all non -word, -whitespace, or -apostrophe characters from string `doesn't this mean it -technically- works?`",
    "cmd": "re.sub(\"[^\\\\w' ]\", '', \"doesn't this mean it -technically- works?\")",
    "question_id": "11403474-99",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "re.sub(, , )",
    "canonical_cmd": "re.sub(\"[^\\\\w' ]\", '', 'VAR_STR')"
  },
  {
    "nl": "find the index of the element with the maximum value from a list 'a'.",
    "cmd": "max(enumerate(a), key=lambda x: x[1])[0]",
    "question_id": "11530799-66",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#enumerate",
      "python.library.functions#max"
    ],
    "clean_cmd": "max(enumerate(a), key= x: x[])[]",
    "canonical_cmd": "max(enumerate(VAR_STR), key=lambda x: x[1])[0]"
  },
  {
    "nl": "Check if string 'a b' only contains letters and spaces",
    "cmd": "\"\"\"a b\"\"\".replace(' ', '').isalpha()",
    "question_id": "29454773-91",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.isalpha",
      "python.library.stdtypes#str.replace"
    ],
    "clean_cmd": ".replace(, ).isalpha()",
    "canonical_cmd": "\"\"\"VAR_STR\"\"\".replace(' ', '').isalpha()"
  },
  {
    "nl": "pad 'dog' up to a length of 5 characters with 'x'",
    "cmd": "\"\"\"{s:{c}^{n}}\"\"\".format(s='dog', n=5, c='x')",
    "question_id": "4008546-45",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#format"
    ],
    "clean_cmd": ".format(s=, n=, c=)",
    "canonical_cmd": "\"\"\"{s:{c}^{n}}\"\"\".format(s='VAR_STR', n=5, c='VAR_STR')"
  },
  {
    "nl": "find all substrings in string `mystring` composed only of letters `a` and `b` where each `a` is directly preceded and succeeded by `b`",
    "cmd": "re.findall('\\\\b(?:b+a)+b+\\\\b', mystring)",
    "question_id": "32926587-42",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "re.findall(, mystring)",
    "canonical_cmd": "re.findall('\\\\b(?:b+a)+b+\\\\b', VAR_STR)"
  },
  {
    "nl": "check if a checkbox is checked in selenium python webdriver",
    "cmd": "driver.find_element_by_name('<check_box_name>').is_selected()",
    "question_id": "14442636-72",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "driver.find_element_by_name().is_selected()",
    "canonical_cmd": "driver.find_element_by_name('<check_box_name>').is_selected()"
  },
  {
    "nl": "determine if checkbox with id '<check_box_id>' is checked in selenium python webdriver",
    "cmd": "driver.find_element_by_id('<check_box_id>').is_selected()",
    "question_id": "14442636-50",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "driver.find_element_by_id().is_selected()",
    "canonical_cmd": "driver.find_element_by_id('VAR_STR').is_selected()"
  },
  {
    "nl": "Find all keys from a dictionary `d` whose values are `desired_value`",
    "cmd": "[k for k, v in d.items() if v == desired_value]",
    "question_id": "7657457-75",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "[k  k, v  d.items()  v == desired_value]",
    "canonical_cmd": "[k for k, v in VAR_STR.items() if v == VAR_STR]"
  },
  {
    "nl": "sort a multidimensional array `a` by column with index 1",
    "cmd": "sorted(a, key=lambda x: x[1])",
    "question_id": "20183069-6",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted"
    ],
    "clean_cmd": "sorted(a, key= x: x[])",
    "canonical_cmd": "sorted(VAR_STR, key=lambda x: x[1])"
  },
  {
    "nl": "sum the length of all strings in a list `strings`",
    "cmd": "length = sum(len(s) for s in strings)",
    "question_id": "3780403-38",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len",
      "python.library.functions#sum"
    ],
    "clean_cmd": "length = sum(len(s)  s  strings)",
    "canonical_cmd": "length = sum(len(s) for s in VAR_STR)"
  },
  {
    "nl": "Convert a list of lists `lol` to a dictionary with key as second value of a list and value as list itself",
    "cmd": "{x[1]: x for x in lol}",
    "question_id": "14986218-40",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "{x[]: x  x  lol}",
    "canonical_cmd": "{x[1]: x for x in VAR_STR}"
  },
  {
    "nl": "get data of column 'A' and column 'B' in dataframe `df` where column 'A' is equal to 'foo'",
    "cmd": "df.loc[gb.groups['foo'], ('A', 'B')]",
    "question_id": "14734533-87",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.loc"
    ],
    "clean_cmd": "df.loc[gb.groups[], (, )]",
    "canonical_cmd": "VAR_STR.loc[gb.groups['VAR_STR'], ('VAR_STR', 'VAR_STR')]"
  },
  {
    "nl": "display a pdf  file that has been downloaded as `my_pdf.pdf`",
    "cmd": "webbrowser.open('file:///my_pdf.pdf')",
    "question_id": "21684346-78",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.webbrowser#webbrowser.open"
    ],
    "clean_cmd": "webbrowser.open()",
    "canonical_cmd": "webbrowser.open('file:///my_pdf.pdf')"
  },
  {
    "nl": "sort 2d array `matrix` by row with index 1",
    "cmd": "sorted(matrix, key=itemgetter(1))",
    "question_id": "2173797-93",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.operator#operator.itemgetter"
    ],
    "clean_cmd": "sorted(matrix, key=itemgetter())",
    "canonical_cmd": "sorted(VAR_STR, key=itemgetter(1))"
  },
  {
    "nl": "get the max string length in list `i`",
    "cmd": "max(len(word) for word in i)",
    "question_id": "39373620-92",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len",
      "python.library.functions#max"
    ],
    "clean_cmd": "max(len(word)  word  i)",
    "canonical_cmd": "max(len(word) for word in VAR_STR)"
  },
  {
    "nl": "get the maximum string length in nested list `i`",
    "cmd": "len(max(i, key=len))",
    "question_id": "39373620-74",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len",
      "python.library.functions#max"
    ],
    "clean_cmd": "len(max(i, key=len))",
    "canonical_cmd": "len(max(VAR_STR, key=len))"
  },
  {
    "nl": "find all occurrences of the pattern '\\\\[[^\\\\]]*\\\\]|\\\\([^\\\\)]*\\\\)|\"[^\"]*\"|\\\\S+' within `strs`",
    "cmd": "re.findall('\\\\[[^\\\\]]*\\\\]|\\\\([^\\\\)]*\\\\)|\"[^\"]*\"|\\\\S+', strs)",
    "question_id": "17352321-10",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "re.findall(, strs)",
    "canonical_cmd": "re.findall('VAR_STR', VAR_STR)"
  },
  {
    "nl": "print a string after a specific substring ', ' in string `my_string `",
    "cmd": "print(my_string.split(', ', 1)[1])",
    "question_id": "12572362-45",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "print(my_string.split(, )[])",
    "canonical_cmd": "print(VAR_STR.split(', ', 1)[1])"
  },
  {
    "nl": "extract attribute `my_attr` from each object in list `my_list`",
    "cmd": "[o.my_attr for o in my_list]",
    "question_id": "677656-11",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[o.my_attr  o  my_list]",
    "canonical_cmd": "[o.VAR_STR for o in VAR_STR]"
  },
  {
    "nl": "set the value of cell `['x']['C']` equal to 10 in dataframe `df`",
    "cmd": "df['x']['C'] = 10",
    "question_id": "13842088-36",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "df[][] = ",
    "canonical_cmd": "VAR_STR[VAR_STR] = 10"
  },
  {
    "nl": "how to get month name of datetime `today`",
    "cmd": "today.strftime('%B')",
    "question_id": "9621388-67",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.time#time.strftime"
    ],
    "clean_cmd": "today.strftime()",
    "canonical_cmd": "VAR_STR.strftime('%B')"
  },
  {
    "nl": "get month name from a datetime object `today`",
    "cmd": "today.strftime('%B')",
    "question_id": "9621388-54",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.time#time.strftime"
    ],
    "clean_cmd": "today.strftime()",
    "canonical_cmd": "VAR_STR.strftime('%B')"
  },
  {
    "nl": "execute shell command 'grep -r PASSED *.log | sort -u | wc -l' with a | pipe in it",
    "cmd": "subprocess.call('grep -r PASSED *.log | sort -u | wc -l', shell=True)",
    "question_id": "18050937-57",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.subprocess#subprocess.call"
    ],
    "clean_cmd": "subprocess.call(, shell=True)",
    "canonical_cmd": "subprocess.call('VAR_STR', shell=True)"
  },
  {
    "nl": "Converting string lists `s` to float list",
    "cmd": "floats = [float(x) for x in s.split()]",
    "question_id": "4004550-67",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#float",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "floats = [float(x)  x  s.split()]",
    "canonical_cmd": "floats = [float(x) for x in VAR_STR.split()]"
  },
  {
    "nl": "Converting string lists `s` to float list",
    "cmd": "floats = map(float, s.split())",
    "question_id": "4004550-27",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#map",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "floats = map(float, s.split())",
    "canonical_cmd": "floats = map(float, VAR_STR.split())"
  },
  {
    "nl": "iterate backwards from 10 to 0",
    "cmd": "range(10, 0, -1)",
    "question_id": "3476732-50",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#range"
    ],
    "clean_cmd": "range(, , -)",
    "canonical_cmd": "range(10, 0, -1)"
  },
  {
    "nl": "Execute Shell Script from python with variable",
    "cmd": "subprocess.call(['test.sh', str(domid)])",
    "question_id": "18742657-57",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.subprocess#subprocess.call",
      "python.library.stdtypes#str"
    ],
    "clean_cmd": "subprocess.call([, str(domid)])",
    "canonical_cmd": "subprocess.call(['test.sh', str(domid)])"
  },
  {
    "nl": "count the number of integers in list `a`",
    "cmd": "sum(isinstance(x, int) for x in a)",
    "question_id": "25355705-76",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#isinstance",
      "python.library.functions#sum"
    ],
    "clean_cmd": "sum(isinstance(x, int)  x  a)",
    "canonical_cmd": "sum(isinstance(x, int) for x in VAR_STR)"
  },
  {
    "nl": "play the wav file 'sound.wav'",
    "cmd": "winsound.PlaySound('sound.wav', winsound.SND_FILENAME)",
    "question_id": "307305-84",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.winsound#winsound.PlaySound"
    ],
    "clean_cmd": "winsound.PlaySound(, winsound.SND_FILENAME)",
    "canonical_cmd": "winsound.PlaySound('VAR_STR', winsound.SND_FILENAME)"
  },
  {
    "nl": "save json output from a url \u2018http://search.twitter.com/search.json?q=hi\u2019 to file \u2018hi.json\u2019 in Python 2",
    "cmd": "urllib.request.urlretrieve('http://search.twitter.com/search.json?q=hi', 'hi.json')",
    "question_id": "3040904-33",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.request#urllib.request.urlretrieve"
    ],
    "clean_cmd": "urllib.request.urlretrieve(, )",
    "canonical_cmd": "urllib.request.urlretrieve('http://search.twitter.com/search.json?q=hi',\n    'hi.json')"
  },
  {
    "nl": "remove uppercased characters in string `s`",
    "cmd": "re.sub('[^A-Z]', '', s)",
    "question_id": "15886340-14",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "re.sub(, , s)",
    "canonical_cmd": "re.sub('[^A-Z]', '', VAR_STR)"
  },
  {
    "nl": "Get a list of lists with summing the values of the second element from each list of lists `data`",
    "cmd": "[[sum([x[1] for x in i])] for i in data]",
    "question_id": "37619348-38",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sum"
    ],
    "clean_cmd": "[[sum([x[]  x  i])]  i  data]",
    "canonical_cmd": "[[sum([x[1] for x in i])] for i in VAR_STR]"
  },
  {
    "nl": "summing the second item in a list of lists of lists",
    "cmd": "[sum([x[1] for x in i]) for i in data]",
    "question_id": "37619348-52",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sum"
    ],
    "clean_cmd": "[sum([x[]  x  i])  i  data]",
    "canonical_cmd": "[sum([x[1] for x in i]) for i in data]"
  },
  {
    "nl": "apply functions `mean` and `std` to each column in dataframe `df`",
    "cmd": "df.groupby(lambda idx: 0).agg(['mean', 'std'])",
    "question_id": "22128218-47",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.groupby",
      "pandas.reference.api.pandas.dataframe.agg"
    ],
    "clean_cmd": "df.groupby( idx: ).agg([, ])",
    "canonical_cmd": "VAR_STR.groupby(lambda idx: 0).agg(['VAR_STR', 'VAR_STR'])"
  },
  {
    "nl": "convert an int 65 to hex string",
    "cmd": "hex(65)",
    "question_id": "2269827-12",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#hex"
    ],
    "clean_cmd": "hex()",
    "canonical_cmd": "hex(65)"
  },
  {
    "nl": "count the number of non-nan elements in a numpy ndarray matrix `data`",
    "cmd": "np.count_nonzero(~np.isnan(data))",
    "question_id": "21778118-32",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.count_nonzero",
      "numpy.reference.generated.numpy.isnan"
    ],
    "clean_cmd": "np.count_nonzero(~np.isnan(data))",
    "canonical_cmd": "np.count_nonzero(~np.isnan(VAR_STR))"
  },
  {
    "nl": "How can I resize the root window in Tkinter?",
    "cmd": "root.geometry('500x500')",
    "question_id": "2261011-58",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "root.geometry()",
    "canonical_cmd": "root.geometry('500x500')"
  },
  {
    "nl": "add header 'WWWAuthenticate' in a flask app with value 'Basic realm=\"test\"'",
    "cmd": "response.headers['WWW-Authenticate'] = 'Basic realm=\"test\"'",
    "question_id": "19410585-47",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "response.headers[] = ",
    "canonical_cmd": "response.headers['WWW-Authenticate'] = 'VAR_STR'"
  },
  {
    "nl": "return the column for value 38.15 in dataframe `df`",
    "cmd": "df.ix[:, (df.loc[0] == 38.15)].columns",
    "question_id": "38331568-38",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.loc"
    ],
    "clean_cmd": "df.ix[:, (df.loc[] == 38.15)].columns",
    "canonical_cmd": "VAR_STR.ix[:, (VAR_STR.loc[0] == 38.15)].columns"
  },
  {
    "nl": "sort two lists `list1` and `list2` together using lambda function",
    "cmd": "[list(x) for x in zip(*sorted(zip(list1, list2), key=lambda pair: pair[0]))]",
    "question_id": "13668393-94",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip",
      "python.library.functions#sorted",
      "python.library.functions#list"
    ],
    "clean_cmd": "[list(x)  x  zip(*sorted(zip(list1, list2), key= pair: pair[]))]",
    "canonical_cmd": "[list(x) for x in zip(*sorted(zip(VAR_STR, VAR_STR), key=lambda pair: pair[0]))]"
  },
  {
    "nl": "drop duplicate indexes in a pandas data frame `df`",
    "cmd": "df[~df.index.duplicated()]",
    "question_id": "22918212-12",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.index.duplicated"
    ],
    "clean_cmd": "df[~df.index.duplicated()]",
    "canonical_cmd": "VAR_STR[~VAR_STR.index.duplicated()]"
  },
  {
    "nl": "grab one random item from a database `model` in django/postgresql",
    "cmd": "model.objects.all().order_by('?')[0]",
    "question_id": "9354127-56",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#all"
    ],
    "clean_cmd": "model.objects.all().order_by()[]",
    "canonical_cmd": "VAR_STR.objects.all().order_by('?')[0]"
  },
  {
    "nl": "select a first form with no name in mechanize",
    "cmd": "br.select_form(nr=0)",
    "question_id": "2582580-92",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "br.select_form(nr=)",
    "canonical_cmd": "br.select_form(nr=0)"
  },
  {
    "nl": "strip html from strings",
    "cmd": "re.sub('<[^<]+?>', '', text)",
    "question_id": "753052-14",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "re.sub(, , text)",
    "canonical_cmd": "re.sub('<[^<]+?>', '', text)"
  },
  {
    "nl": "convert a list of objects `list_name` to json string `json_string`",
    "cmd": "json_string = json.dumps([ob.__dict__ for ob in list_name])",
    "question_id": "26033239-78",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.json#json.dumps"
    ],
    "clean_cmd": "json_string = json.dumps([ob.__dict__  ob  list_name])",
    "canonical_cmd": "VAR_STR = json.dumps([ob.__dict__ for ob in VAR_STR])"
  },
  {
    "nl": "Get a list comparing two lists of tuples `l1` and `l2` if any first value in `l1` matches with first value in `l2`",
    "cmd": "[x[0] for x in l1 if any(x[0] == y[0] for y in l2)]",
    "question_id": "13168252-36",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#any"
    ],
    "clean_cmd": "[x[]  x  l1  any(x[] == y[]  y  l2)]",
    "canonical_cmd": "[x[0] for x in VAR_STR if any(x[0] == y[0] for y in VAR_STR)]"
  },
  {
    "nl": "find the largest integer less than `x`",
    "cmd": "int(math.ceil(x)) - 1",
    "question_id": "27758657-77",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.math#math.ceil",
      "python.library.functions#int"
    ],
    "clean_cmd": "int(math.ceil(x)) - ",
    "canonical_cmd": "int(math.ceil(VAR_STR)) - 1"
  },
  {
    "nl": "remove all words which contains number from a string `words` using regex",
    "cmd": "re.sub('\\\\w*\\\\d\\\\w*', '', words).strip()",
    "question_id": "18082130-81",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub",
      "python.library.stdtypes#str.strip"
    ],
    "clean_cmd": "re.sub(, , words).strip()",
    "canonical_cmd": "re.sub('\\\\w*\\\\d\\\\w*', '', VAR_STR).strip()"
  },
  {
    "nl": "match a sharp, followed by letters (including accent characters) in string `str1` using a regex",
    "cmd": "hashtags = re.findall('#(\\\\w+)', str1, re.UNICODE)",
    "question_id": "18663644-86",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "hashtags = re.findall(, str1, re.UNICODE)",
    "canonical_cmd": "hashtags = re.findall('#(\\\\w+)', VAR_STR, re.UNICODE)"
  },
  {
    "nl": "Concat a list of strings `lst` using string formatting",
    "cmd": "\"\"\"\"\"\".join(lst)",
    "question_id": "2133571-79",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": ".join(lst)",
    "canonical_cmd": "\"\"\"\"\"\".join(VAR_STR)"
  },
  {
    "nl": "send cookies `cookie` in a post request to url 'http://wikipedia.org' with the python requests library",
    "cmd": "r = requests.post('http://wikipedia.org', cookies=cookie)",
    "question_id": "7164679-63",
    "cmd_name": "conala",
    "oracle_man": [
      "pygame.ref.fastevent#pygame.fastevent.post"
    ],
    "clean_cmd": "r = requests.post(, cookies=cookie)",
    "canonical_cmd": "r = requests.post('VAR_STR', cookies=VAR_STR)"
  },
  {
    "nl": "list comprehension that produces integers between 11 and 19",
    "cmd": "[i for i in range(100) if i > 10 if i < 20]",
    "question_id": "15248272-21",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#range"
    ],
    "clean_cmd": "[i  i  range()  i &gt;   i &lt; ]",
    "canonical_cmd": "[i for i in range(100) if i > 10 if i < 20]"
  },
  {
    "nl": "unquote a urlencoded unicode string '%0a'",
    "cmd": "urllib.parse.unquote('%0a')",
    "question_id": "300445-24",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.parse#urllib.parse.unquote"
    ],
    "clean_cmd": "urllib.parse.unquote()",
    "canonical_cmd": "urllib.parse.unquote('VAR_STR')"
  },
  {
    "nl": "decode url `url` from UTF-16 code to UTF-8 code",
    "cmd": "urllib.parse.unquote(url).decode('utf8')",
    "question_id": "300445-76",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.parse#urllib.parse.unquote",
      "python.library.stdtypes#bytearray.decode"
    ],
    "clean_cmd": "urllib.parse.unquote(url).decode()",
    "canonical_cmd": "urllib.parse.unquote(VAR_STR).decode('utf8')"
  },
  {
    "nl": "display current time in readable format",
    "cmd": "time.strftime('%l:%M%p %z on %b %d, %Y')",
    "question_id": "3961581-71",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.time#time.strftime"
    ],
    "clean_cmd": "time.strftime()",
    "canonical_cmd": "time.strftime('%l:%M%p %z on %b %d, %Y')"
  },
  {
    "nl": "replace nans by preceding values in pandas dataframe `df`",
    "cmd": "df.fillna(method='ffill', inplace=True)",
    "question_id": "27905295-6",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.fillna"
    ],
    "clean_cmd": "df.fillna(method=, inplace=True)",
    "canonical_cmd": "VAR_STR.fillna(method='ffill', inplace=True)"
  },
  {
    "nl": "sort list `list_` based on first element of each tuple and by the length of the second element of each tuple",
    "cmd": "list_.sort(key=lambda x: [x[0], len(x[1]), x[1]])",
    "question_id": "19643099-48",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len",
      "python.library.stdtypes#list.sort"
    ],
    "clean_cmd": "list_.sort(key= x: [x[], len(x[]), x[]])",
    "canonical_cmd": "VAR_STR.sort(key=lambda x: [x[0], len(x[1]), x[1]])"
  },
  {
    "nl": "sort a multidimensional list `a` by second and third column",
    "cmd": "a.sort(key=operator.itemgetter(2, 3))",
    "question_id": "1683775-89",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.operator#operator.itemgetter",
      "python.library.stdtypes#list.sort"
    ],
    "clean_cmd": "a.sort(key=operator.itemgetter(, ))",
    "canonical_cmd": "VAR_STR.sort(key=operator.itemgetter(2, 3))"
  },
  {
    "nl": "Get all indexes of a list `a` where each value is greater than `2`",
    "cmd": "[i for i in range(len(a)) if a[i] > 2]",
    "question_id": "7270321-45",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len",
      "python.library.functions#range"
    ],
    "clean_cmd": "[i  i  range(len(a))  a[i] &gt; ]",
    "canonical_cmd": "[i for i in range(len(VAR_STR)) if VAR_STR[i] > 2]"
  },
  {
    "nl": "replace special characters in url 'http://spam.com/go/' using the '%xx' escape",
    "cmd": "urllib.parse.quote('http://spam.com/go/')",
    "question_id": "5399112-45",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.parse#urllib.parse.quote"
    ],
    "clean_cmd": "urllib.parse.quote()",
    "canonical_cmd": "urllib.parse.quote('VAR_STR')"
  },
  {
    "nl": "execute a command in the command prompt to list directory contents of the c drive `c:\\\\'",
    "cmd": "os.system('dir c:\\\\')",
    "question_id": "5486725-74",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.system"
    ],
    "clean_cmd": "os.system()",
    "canonical_cmd": "os.system('dir c:\\\\')"
  },
  {
    "nl": "Compose keys from dictionary `d1` with respective values in dictionary `d2`",
    "cmd": "result = {k: d2.get(v) for k, v in list(d1.items())}",
    "question_id": "17846545-92",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list",
      "django.ref.class-based-views.generic-display#django.views.generic.list.BaseListView.get",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "result = {k: d2.get(v)  k, v  list(d1.items())}",
    "canonical_cmd": "result = {k: VAR_STR.get(v) for k, v in list(VAR_STR.items())}"
  },
  {
    "nl": "find all the elements that consists value '1' in a list of tuples 'a'",
    "cmd": "[item for item in a if 1 in item]",
    "question_id": "2191699-35",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[item  item  a    item]",
    "canonical_cmd": "[item for item in VAR_STR if 1 in item]"
  },
  {
    "nl": "find all elements in a list of tuples `a` where the first element of each tuple equals 1",
    "cmd": "[item for item in a if item[0] == 1]",
    "question_id": "2191699-81",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[item  item  a  item[] == ]",
    "canonical_cmd": "[item for item in VAR_STR if item[0] == 1]"
  },
  {
    "nl": "execute os command ''TASKKILL /F /IM firefox.exe''",
    "cmd": "os.system('TASKKILL /F /IM firefox.exe')",
    "question_id": "5625524-90",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.system"
    ],
    "clean_cmd": "os.system()",
    "canonical_cmd": "os.system('VAR_STR')"
  },
  {
    "nl": "copy a file from `src` to `dst`",
    "cmd": "copyfile(src, dst)",
    "question_id": "123198-58",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.shutil#shutil.copyfile"
    ],
    "clean_cmd": "copyfile(src, dst)",
    "canonical_cmd": "copyfile(VAR_STR, VAR_STR)"
  },
  {
    "nl": "copy file \"/dir/file.ext\" to \"/new/dir/newname.ext\"",
    "cmd": "shutil.copy2('/dir/file.ext', '/new/dir/newname.ext')",
    "question_id": "123198-2",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.shutil#shutil.copy2"
    ],
    "clean_cmd": "shutil.copy2(, )",
    "canonical_cmd": "shutil.copy2('VAR_STR', 'VAR_STR')"
  },
  {
    "nl": "copy file '/dir/file.ext' to '/new/dir'",
    "cmd": "shutil.copy2('/dir/file.ext', '/new/dir')",
    "question_id": "123198-90",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.shutil#shutil.copy2"
    ],
    "clean_cmd": "shutil.copy2(, )",
    "canonical_cmd": "shutil.copy2('VAR_STR', 'VAR_STR')"
  },
  {
    "nl": "Get a string with string formatting from dictionary `d`",
    "cmd": "\"\"\", \"\"\".join(['{}_{}'.format(k, v) for k, v in d.items()])",
    "question_id": "17462994-4",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#format",
      "python.library.stdtypes#dict.items",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": ".join([.format(k, v)  k, v  d.items()])",
    "canonical_cmd": "\"\"\", \"\"\".join(['{}_{}'.format(k, v) for k, v in VAR_STR.items()])"
  },
  {
    "nl": "get the ASCII value of a character 'a' as an int",
    "cmd": "ord('a')",
    "question_id": "227459-13",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#ord"
    ],
    "clean_cmd": "ord()",
    "canonical_cmd": "ord('VAR_STR')"
  },
  {
    "nl": "get the ASCII value of a character u'\u3042' as an int",
    "cmd": "ord('\\u3042')",
    "question_id": "227459-75",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#ord"
    ],
    "clean_cmd": "ord()",
    "canonical_cmd": "ord('\u3042')"
  },
  {
    "nl": "get the ASCII value of a character as an int",
    "cmd": "ord()",
    "question_id": "227459-40",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#ord"
    ],
    "clean_cmd": "ord()",
    "canonical_cmd": "ord()"
  },
  {
    "nl": "What's the best way to search for a Python dictionary value in a list of dictionaries?",
    "cmd": "any(d['site'] == 'Superuser' for d in data)",
    "question_id": "1580270-33",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#any"
    ],
    "clean_cmd": "any(d[] ==   d  data)",
    "canonical_cmd": "any(d['site'] == 'Superuser' for d in data)"
  },
  {
    "nl": "return dataframe `df` with last row dropped",
    "cmd": "df.ix[:-1]",
    "question_id": "11414596-44",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "df.ix[:-]",
    "canonical_cmd": "VAR_STR.ix[:-1]"
  },
  {
    "nl": "separate numbers from characters in string \"30m1000n20m\"",
    "cmd": "re.findall('(([0-9]+)([A-Z]))', '20M10000N80M')",
    "question_id": "15103484-71",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "re.findall(, )",
    "canonical_cmd": "re.findall('(([0-9]+)([A-Z]))', '20M10000N80M')"
  },
  {
    "nl": "separate numbers and characters in string '20M10000N80M'",
    "cmd": "re.findall('([0-9]+|[A-Z])', '20M10000N80M')",
    "question_id": "15103484-54",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "re.findall(, )",
    "canonical_cmd": "re.findall('([0-9]+|[A-Z])', 'VAR_STR')"
  },
  {
    "nl": "separate numbers and characters in string '20M10000N80M'",
    "cmd": "re.findall('([0-9]+)([A-Z])', '20M10000N80M')",
    "question_id": "15103484-24",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "re.findall(, )",
    "canonical_cmd": "re.findall('([0-9]+)([A-Z])', 'VAR_STR')"
  },
  {
    "nl": "manually throw/raise a `ValueError` exception with the message 'A very specific bad thing happened'",
    "cmd": "raise ValueError('A very specific bad thing happened')",
    "question_id": "2052390-4",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.exceptions#ValueError"
    ],
    "clean_cmd": " ValueError()",
    "canonical_cmd": "raise VAR_STR('VAR_STR')"
  },
  {
    "nl": "throw an exception \"I know Python!\"",
    "cmd": "raise Exception('I know Python!')",
    "question_id": "2052390-88",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.exceptions#Exception"
    ],
    "clean_cmd": " Exception()",
    "canonical_cmd": "raise Exception('VAR_STR')"
  },
  {
    "nl": "Manually throw an exception \"I know python!\"",
    "cmd": "raise Exception('I know python!')",
    "question_id": "2052390-69",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.exceptions#Exception"
    ],
    "clean_cmd": " Exception()",
    "canonical_cmd": "raise Exception('VAR_STR')"
  },
  {
    "nl": "throw a ValueError with message 'represents a hidden bug, do not catch this'",
    "cmd": "raise ValueError('represents a hidden bug, do not catch this')",
    "question_id": "2052390-39",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.exceptions#ValueError"
    ],
    "clean_cmd": " ValueError()",
    "canonical_cmd": "raise ValueError('VAR_STR')"
  },
  {
    "nl": "throw an Exception with message 'This is the exception you expect to handle'",
    "cmd": "raise Exception('This is the exception you expect to handle')",
    "question_id": "2052390-95",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.exceptions#Exception"
    ],
    "clean_cmd": " Exception()",
    "canonical_cmd": "raise Exception('VAR_STR')"
  },
  {
    "nl": "throw a value error with message 'A very specific bad thing happened', 'foo', 'bar', 'baz'",
    "cmd": "raise ValueError('A very specific bad thing happened')",
    "question_id": "2052390-76",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.exceptions#ValueError"
    ],
    "clean_cmd": " ValueError()",
    "canonical_cmd": "raise ValueError('VAR_STR')"
  },
  {
    "nl": "throw a runtime error with message 'specific message'",
    "cmd": "raise RuntimeError('specific message')",
    "question_id": "2052390-99",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.exceptions#RuntimeError"
    ],
    "clean_cmd": " RuntimeError()",
    "canonical_cmd": "raise RuntimeError('VAR_STR')"
  },
  {
    "nl": "throw an assertion error with message \"Unexpected value of 'distance'!\", distance",
    "cmd": "raise AssertionError(\"Unexpected value of 'distance'!\", distance)",
    "question_id": "2052390-32",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.exceptions#AssertionError"
    ],
    "clean_cmd": " AssertionError(, distance)",
    "canonical_cmd": "raise AssertionError('VAR_STR', distance)"
  },
  {
    "nl": "remove duplicates from list `myset`",
    "cmd": "mynewlist = list(myset)",
    "question_id": "12897374-47",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list"
    ],
    "clean_cmd": "mynewlist = list(myset)",
    "canonical_cmd": "mynewlist = list(VAR_STR)"
  },
  {
    "nl": "get unique values from the list `['a', 'b', 'c', 'd']`",
    "cmd": "set(['a', 'b', 'c', 'd'])",
    "question_id": "12897374-66",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#set"
    ],
    "clean_cmd": "set([, , , ])",
    "canonical_cmd": "set([VAR_STR])"
  },
  {
    "nl": "split string `s` based on white spaces",
    "cmd": "re.findall('\\\\s+|\\\\S+', s)",
    "question_id": "35005907-50",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "re.findall(, s)",
    "canonical_cmd": "re.findall('\\\\s+|\\\\S+', VAR_STR)"
  },
  {
    "nl": "convert a hex string `x` to string",
    "cmd": "y = str(int(x, 16))",
    "question_id": "2636755-57",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#int",
      "python.library.stdtypes#str"
    ],
    "clean_cmd": "y = str(int(x, ))",
    "canonical_cmd": "y = str(int(VAR_STR, 16))"
  },
  {
    "nl": "get list of duplicated elements in range of 3",
    "cmd": "[y for x in range(3) for y in [x, x]]",
    "question_id": "3925465-40",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#range"
    ],
    "clean_cmd": "[y  x  range()  y  [x, x]]",
    "canonical_cmd": "[y for x in range(3) for y in [x, x]]"
  },
  {
    "nl": "remove elements in list `b` from list `a`",
    "cmd": "[x for x in a if x not in b]",
    "question_id": "9053260-30",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[x  x  a  x   b]",
    "canonical_cmd": "[x for x in VAR_STR if x not in VAR_STR]"
  },
  {
    "nl": "create a list `matrix` containing 5 lists, each of 5 items  all set to 0",
    "cmd": "matrix = [([0] * 5) for i in range(5)]",
    "question_id": "6667201-23",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#range"
    ],
    "clean_cmd": "matrix = [([] * )  i  range()]",
    "canonical_cmd": "VAR_STR = [([0] * 5) for i in range(5)]"
  },
  {
    "nl": "interleave the elements of two lists `a` and `b`",
    "cmd": "[j for i in zip(a, b) for j in i]",
    "question_id": "3471999-53",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip"
    ],
    "clean_cmd": "[j  i  zip(a, b)  j  i]",
    "canonical_cmd": "[j for i in zip(VAR_STR, VAR_STR) for j in i]"
  },
  {
    "nl": "merge two lists `a` and `b` into a single list",
    "cmd": "[j for i in zip(a, b) for j in i]",
    "question_id": "3471999-1",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip"
    ],
    "clean_cmd": "[j  i  zip(a, b)  j  i]",
    "canonical_cmd": "[j for i in zip(VAR_STR, VAR_STR) for j in i]"
  },
  {
    "nl": "SQLite get a list of column names from cursor object `cursor`",
    "cmd": "names = list(map(lambda x: x[0], cursor.description))",
    "question_id": "7831371-34",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#map",
      "python.library.functions#list"
    ],
    "clean_cmd": "names = list(map( x: x[], cursor.description))",
    "canonical_cmd": "names = list(map(lambda x: x[0], VAR_STR.description))"
  },
  {
    "nl": "select the last business day of the month for each month in 2014 in pandas",
    "cmd": "pd.date_range('1/1/2014', periods=12, freq='BM')",
    "question_id": "27218543-39",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.date_range"
    ],
    "clean_cmd": "pd.date_range(, periods=, freq=)",
    "canonical_cmd": "pd.date_range('1/1/2014', periods=12, freq='BM')"
  },
  {
    "nl": "rename `last` row index label in dataframe `df` to `a`",
    "cmd": "df = df.rename(index={last: 'a'})",
    "question_id": "42142756-73",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.rename"
    ],
    "clean_cmd": "df = df.rename(index={last: })",
    "canonical_cmd": "VAR_STR = VAR_STR.rename(index={VAR_STR: 'VAR_STR'})"
  },
  {
    "nl": "Finding the largest delta between two integers in a list in python",
    "cmd": "max(abs(x - y) for x, y in zip(values[1:], values[:-1]))",
    "question_id": "3428769-74",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip",
      "python.library.functions#abs",
      "python.library.functions#max"
    ],
    "clean_cmd": "max(abs(x - y)  x, y  zip(values[:], values[:-]))",
    "canonical_cmd": "max(abs(x - y) for x, y in zip(values[1:], values[:-1]))"
  },
  {
    "nl": "delay for \"5\" seconds",
    "cmd": "time.sleep(5)",
    "question_id": "510348-43",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.time#time.sleep"
    ],
    "clean_cmd": "time.sleep()",
    "canonical_cmd": "time.sleep(5)"
  },
  {
    "nl": "make a 60 seconds time delay",
    "cmd": "time.sleep(60)",
    "question_id": "510348-64",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.time#time.sleep"
    ],
    "clean_cmd": "time.sleep()",
    "canonical_cmd": "time.sleep(60)"
  },
  {
    "nl": "make a 0.1 seconds time delay",
    "cmd": "sleep(0.1)",
    "question_id": "510348-96",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.time#time.sleep"
    ],
    "clean_cmd": "sleep(0.1)",
    "canonical_cmd": "sleep(0.1)"
  },
  {
    "nl": "make a 60 seconds time delay",
    "cmd": "time.sleep(60)",
    "question_id": "510348-83",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.time#time.sleep"
    ],
    "clean_cmd": "time.sleep()",
    "canonical_cmd": "time.sleep(60)"
  },
  {
    "nl": "make a 0.1 seconds time delay",
    "cmd": "time.sleep(0.1)",
    "question_id": "510348-35",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.time#time.sleep"
    ],
    "clean_cmd": "time.sleep(0.1)",
    "canonical_cmd": "time.sleep(0.1)"
  },
  {
    "nl": "pads string '5' on the left with 1 zero",
    "cmd": "print('{0}'.format('5'.zfill(2)))",
    "question_id": "3505831-69",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#format",
      "python.library.stdtypes#str.zfill"
    ],
    "clean_cmd": "print(.format(.zfill()))",
    "canonical_cmd": "print('{0}'.format('VAR_STR'.zfill(2)))"
  },
  {
    "nl": "Unzip a list of tuples `l` into a list of lists",
    "cmd": "zip(*l)",
    "question_id": "12974474-46",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip"
    ],
    "clean_cmd": "zip(*l)",
    "canonical_cmd": "zip(*VAR_STR)"
  },
  {
    "nl": "reduce the first element of list of strings `data` to a string, separated by '.'",
    "cmd": "print('.'.join([item[0] for item in data]))",
    "question_id": "27436748-64",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": "print(.join([item[]  item  data]))",
    "canonical_cmd": "print('VAR_STR'.join([item[0] for item in VAR_STR]))"
  },
  {
    "nl": "get the first element of each tuple from a list of tuples `G`",
    "cmd": "[x[0] for x in G]",
    "question_id": "12440342-92",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[x[]  x  G]",
    "canonical_cmd": "[x[0] for x in VAR_STR]"
  },
  {
    "nl": "create list `changed_list ` containing elements of list `original_list` whilst converting strings containing digits to integers",
    "cmd": "changed_list = [(int(f) if f.isdigit() else f) for f in original_list]",
    "question_id": "818949-51",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#int",
      "python.library.stdtypes#str.isdigit"
    ],
    "clean_cmd": "changed_list = [(int(f)  f.isdigit()  f)  f  original_list]",
    "canonical_cmd": "VAR_STR = [(int(f) if f.isdigit() else f) for f in VAR_STR]"
  },
  {
    "nl": "flatten a dataframe df to a list",
    "cmd": "df.values.flatten()",
    "question_id": "25440008-98",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "df.values.flatten()",
    "canonical_cmd": "df.values.flatten()"
  },
  {
    "nl": "randomly select an item from list `foo`",
    "cmd": "random.choice(foo)",
    "question_id": "306400-38",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.random#random.choice"
    ],
    "clean_cmd": "random.choice(foo)",
    "canonical_cmd": "random.choice(VAR_STR)"
  },
  {
    "nl": "regular expression match nothing",
    "cmd": "re.compile('$^')",
    "question_id": "940822-75",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.compile"
    ],
    "clean_cmd": "re.compile()",
    "canonical_cmd": "re.compile('$^')"
  },
  {
    "nl": "regular expression syntax for not to match anything",
    "cmd": "re.compile('.\\\\A|.\\\\A*|.\\\\A+')",
    "question_id": "940822-88",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.compile"
    ],
    "clean_cmd": "re.compile()",
    "canonical_cmd": "re.compile('.\\\\A|.\\\\A*|.\\\\A+')"
  },
  {
    "nl": "create a regular expression object with a pattern that will match nothing",
    "cmd": "re.compile('a^')",
    "question_id": "940822-67",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.compile"
    ],
    "clean_cmd": "re.compile()",
    "canonical_cmd": "re.compile('a^')"
  },
  {
    "nl": "convert a list of strings `lst` to list of integers",
    "cmd": "[map(int, sublist) for sublist in lst]",
    "question_id": "34696853-20",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#map"
    ],
    "clean_cmd": "[map(int, sublist)  sublist  lst]",
    "canonical_cmd": "[map(int, sublist) for sublist in VAR_STR]"
  },
  {
    "nl": "convert strings in list-of-lists `lst` to ints",
    "cmd": "[[int(x) for x in sublist] for sublist in lst]",
    "question_id": "34696853-89",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#int"
    ],
    "clean_cmd": "[[int(x)  x  sublist]  sublist  lst]",
    "canonical_cmd": "[[int(x) for x in sublist] for sublist in VAR_STR]"
  },
  {
    "nl": "create a list with the characters of a string `5+6`",
    "cmd": "list('5+6')",
    "question_id": "5501641-12",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list"
    ],
    "clean_cmd": "list()",
    "canonical_cmd": "list('VAR_STR')"
  },
  {
    "nl": "How to download a file via FTP with Python ftplib",
    "cmd": "ftp.retrbinary('RETR %s' % filename, file.write)",
    "question_id": "11573817-17",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.ftplib#ftplib.FTP.retrbinary"
    ],
    "clean_cmd": "ftp.retrbinary( % filename, file.write)",
    "canonical_cmd": "ftp.retrbinary('RETR %s' % filename, file.write)"
  },
  {
    "nl": "print the number of occurences of not `none` in a list `lst` in Python 2",
    "cmd": "print(len([x for x in lst if x is not None]))",
    "question_id": "29422691-3",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len"
    ],
    "clean_cmd": "print(len([x  x  lst  x   None]))",
    "canonical_cmd": "print(len([x for x in VAR_STR if x is not None]))"
  },
  {
    "nl": "encode string `s` to utf-8 code",
    "cmd": "s.encode('utf8')",
    "question_id": "9339630-98",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.encode"
    ],
    "clean_cmd": "s.encode()",
    "canonical_cmd": "VAR_STR.encode('utf8')"
  },
  {
    "nl": "get a sum of 4d array `M`",
    "cmd": "M.sum(axis=0).sum(axis=0)",
    "question_id": "24841306-8",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sum"
    ],
    "clean_cmd": "M.sum(axis=).sum(axis=)",
    "canonical_cmd": "VAR_STR.sum(axis=0).sum(axis=0)"
  },
  {
    "nl": "sort a python dictionary `a_dict` by element `1` of the value",
    "cmd": "sorted(list(a_dict.items()), key=lambda item: item[1][1])",
    "question_id": "11932729-43",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.functions#list",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "sorted(list(a_dict.items()), key= item: item[][])",
    "canonical_cmd": "sorted(list(VAR_STR.items()), key=lambda item: item[1][1])"
  },
  {
    "nl": "Reverse list `x`",
    "cmd": "x[::-1]",
    "question_id": "10201977-40",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "x[::-]",
    "canonical_cmd": "VAR_STR[::-1]"
  },
  {
    "nl": "delete an item `thing` in a list `some_list` if it exists",
    "cmd": "cleaned_list = [x for x in some_list if x is not thing]",
    "question_id": "4915920-75",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "cleaned_list = [x  x  some_list  x   thing]",
    "canonical_cmd": "cleaned_list = [x for x in VAR_STR if x is not VAR_STR]"
  },
  {
    "nl": "get the number of all keys in the nested dictionary `dict_list`",
    "cmd": "len(dict_test) + sum(len(v) for v in dict_test.values())",
    "question_id": "35427814-67",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len",
      "python.library.functions#sum",
      "python.library.stdtypes#dict.values"
    ],
    "clean_cmd": "len(dict_test) + sum(len(v)  v  dict_test.values())",
    "canonical_cmd": "len(dict_test) + sum(len(v) for v in dict_test.values())"
  },
  {
    "nl": "find the sums of length 7 subsets of a list `daily`",
    "cmd": "weekly = [sum(visitors[x:x + 7]) for x in range(0, len(daily), 7)]",
    "question_id": "6133434-3",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len",
      "python.library.functions#range",
      "python.library.functions#sum"
    ],
    "clean_cmd": "weekly = [sum(visitors[x:x + ])  x  range(, len(daily), )]",
    "canonical_cmd": "weekly = [sum(visitors[x:x + 7]) for x in range(0, len(VAR_STR), 7)]"
  },
  {
    "nl": "Get a list of all keys from dictionary `dictA` where the number of occurrences of value `duck` in that key is more than `1`",
    "cmd": "[k for k, v in dictA.items() if v.count('duck') > 1]",
    "question_id": "14743454-13",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict.items",
      "python.library.stdtypes#str.count"
    ],
    "clean_cmd": "[k  k, v  dictA.items()  v.count() &gt; ]",
    "canonical_cmd": "[k for k, v in VAR_STR.items() if v.count('VAR_STR') > 1]"
  },
  {
    "nl": "generate a list of consecutive integers from 0 to 8",
    "cmd": "list(range(9))",
    "question_id": "29558007-37",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#range",
      "python.library.functions#list"
    ],
    "clean_cmd": "list(range())",
    "canonical_cmd": "list(range(9))"
  },
  {
    "nl": "getting every possible combination of two elements in a list",
    "cmd": "list(itertools.combinations([1, 2, 3, 4, 5, 6], 2))",
    "question_id": "5106228-78",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.itertools#itertools.combinations",
      "python.library.functions#list"
    ],
    "clean_cmd": "list(itertools.combinations([, , , , , ], ))",
    "canonical_cmd": "list(itertools.combinations([1, 2, 3, 4, 5, 6], 2))"
  },
  {
    "nl": "split string 'x+13.5*10x-4e1' into tokens",
    "cmd": "print([i for i in re.split('([\\\\d.]+|\\\\W+)', 'x+13.5*10x-4e1') if i])",
    "question_id": "18312447-81",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.split"
    ],
    "clean_cmd": "print([i  i  re.split(, )  i])",
    "canonical_cmd": "print([i for i in re.split('([\\\\d.]+|\\\\W+)', 'VAR_STR') if i])"
  },
  {
    "nl": "unpack elements of list `i` as arguments into function `foo`",
    "cmd": "foo(*i)",
    "question_id": "13891559-42",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "foo(*i)",
    "canonical_cmd": "VAR_STR(*VAR_STR)"
  },
  {
    "nl": "remove all square brackets from string 'abcd[e]yth[ac]ytwec'",
    "cmd": "re.sub('\\\\[.*?\\\\]', '', 'abcd[e]yth[ac]ytwec')",
    "question_id": "9470142-61",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "re.sub(, , )",
    "canonical_cmd": "re.sub('\\\\[.*?\\\\]', '', 'VAR_STR')"
  },
  {
    "nl": "set dataframe `df` index using column 'month'",
    "cmd": "df.set_index('month')",
    "question_id": "15752422-54",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.set_index"
    ],
    "clean_cmd": "df.set_index()",
    "canonical_cmd": "VAR_STR.set_index('VAR_STR')"
  },
  {
    "nl": "get a list of the row names from index of a pandas data frame",
    "cmd": "list(df.index)",
    "question_id": "26640145-30",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list"
    ],
    "clean_cmd": "list(df.index)",
    "canonical_cmd": "list(df.index)"
  },
  {
    "nl": "get the row names from index in a pandas data frame",
    "cmd": "df.index",
    "question_id": "26640145-100",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "df.index",
    "canonical_cmd": "df.index"
  },
  {
    "nl": "filtering out strings that contain 'ab' from a list of strings `lst`",
    "cmd": "[k for k in lst if 'ab' in k]",
    "question_id": "2152898-7",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[k  k  lst    k]",
    "canonical_cmd": "[k for k in VAR_STR if 'VAR_STR' in k]"
  },
  {
    "nl": "From a list of strings `my_list`, remove the values that contains numbers.",
    "cmd": "[x for x in my_list if not any(c.isdigit() for c in x)]",
    "question_id": "16084642-16",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#any",
      "python.library.stdtypes#str.isdigit"
    ],
    "clean_cmd": "[x  x  my_list   any(c.isdigit()  c  x)]",
    "canonical_cmd": "[x for x in VAR_STR if not any(c.isdigit() for c in x)]"
  },
  {
    "nl": "get file '~/foo.ini'",
    "cmd": "config_file = os.path.expanduser('~/foo.ini')",
    "question_id": "3227624-52",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os.path#os.path.expanduser"
    ],
    "clean_cmd": "config_file = os.path.expanduser()",
    "canonical_cmd": "config_file = os.path.expanduser('VAR_STR')"
  },
  {
    "nl": "split string `text` into chunks of 16 characters each",
    "cmd": "re.findall('.{,16}\\\\b', text)",
    "question_id": "18551752-51",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "re.findall(, text)",
    "canonical_cmd": "re.findall('.{,16}\\\\b', VAR_STR)"
  },
  {
    "nl": "remove line breaks from string `textblock` using regex",
    "cmd": "re.sub('(?<=[a-z])\\\\r?\\\\n', ' ', textblock)",
    "question_id": "5075247-69",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "re.sub(, , textblock)",
    "canonical_cmd": "re.sub('(?<=[a-z])\\\\r?\\\\n', ' ', VAR_STR)"
  },
  {
    "nl": "convert scientific notation of variable `a` to decimal",
    "cmd": "\"\"\"{:.50f}\"\"\".format(float(a[0] / a[1]))",
    "question_id": "16962512-77",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#float",
      "python.library.functions#format"
    ],
    "clean_cmd": ".format(float(a[] / a[]))",
    "canonical_cmd": "\"\"\"{:.50f}\"\"\".format(float(VAR_STR[0] / VAR_STR[1]))"
  },
  {
    "nl": "create a list containing the indices of elements greater than 4 in list `a`",
    "cmd": "[i for i, v in enumerate(a) if v > 4]",
    "question_id": "13717463-67",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#enumerate"
    ],
    "clean_cmd": "[i  i, v  enumerate(a)  v &gt; ]",
    "canonical_cmd": "[i for i, v in enumerate(VAR_STR) if v > 4]"
  },
  {
    "nl": "split 1d array `a` into 2d array at the last element",
    "cmd": "np.split(a, [-1])",
    "question_id": "42098487-14",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.split"
    ],
    "clean_cmd": "np.split(a, [-])",
    "canonical_cmd": "np.split(VAR_STR, [-1])"
  },
  {
    "nl": "convert dataframe `df` to list of dictionaries including the index values",
    "cmd": "df.to_dict('index')",
    "question_id": "29815129-54",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.to_dict"
    ],
    "clean_cmd": "df.to_dict()",
    "canonical_cmd": "VAR_STR.to_dict('index')"
  },
  {
    "nl": "Create list of dictionaries from pandas dataframe `df`",
    "cmd": "df.to_dict('records')",
    "question_id": "29815129-23",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.to_dict"
    ],
    "clean_cmd": "df.to_dict()",
    "canonical_cmd": "VAR_STR.to_dict('records')"
  },
  {
    "nl": "Flatten list `x`",
    "cmd": "x = [i[0] for i in x]",
    "question_id": "15096021-2",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "x = [i[]  i  x]",
    "canonical_cmd": "VAR_STR = [i[0] for i in VAR_STR]"
  },
  {
    "nl": "convert list `x` into a flat list",
    "cmd": "y = map(operator.itemgetter(0), x)",
    "question_id": "15096021-36",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.operator#operator.itemgetter",
      "python.library.functions#map"
    ],
    "clean_cmd": "y = map(operator.itemgetter(), x)",
    "canonical_cmd": "y = map(operator.itemgetter(0), VAR_STR)"
  },
  {
    "nl": "get a list `y` of the first element of every tuple in list `x`",
    "cmd": "y = [i[0] for i in x]",
    "question_id": "15096021-51",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "y = [i[]  i  x]",
    "canonical_cmd": "VAR_STR = [i[0] for i in VAR_STR]"
  },
  {
    "nl": "Group a pandas data frame by monthly frequenct `M` using groupby",
    "cmd": "df.groupby(pd.TimeGrouper(freq='M'))",
    "question_id": "24082784-99",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.groupby"
    ],
    "clean_cmd": "df.groupby(pd.TimeGrouper(freq=))",
    "canonical_cmd": "df.groupby(pd.TimeGrouper(freq='VAR_STR'))"
  },
  {
    "nl": "Find average of a nested list `a`",
    "cmd": "a = [(sum(x) / len(x)) for x in zip(*a)]",
    "question_id": "2153444-1",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip",
      "python.library.functions#len",
      "python.library.functions#sum"
    ],
    "clean_cmd": "a = [(sum(x) / len(x))  x  zip(*a)]",
    "canonical_cmd": "VAR_STR = [(sum(x) / len(x)) for x in zip(*VAR_STR)]"
  },
  {
    "nl": "Get the age of directory (or file) `/tmp` in seconds.",
    "cmd": "print(os.path.getmtime('/tmp'))",
    "question_id": "6879364-89",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os.path#os.path.getmtime"
    ],
    "clean_cmd": "print(os.path.getmtime())",
    "canonical_cmd": "print(os.path.getmtime('VAR_STR'))"
  },
  {
    "nl": "Convert string to boolean from defined set of strings",
    "cmd": "s in ['true', '1', 't', 'y', 'yes', 'yeah', 'yup', 'certainly', 'uh-huh']",
    "question_id": "715417-70",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "s  [, , , , , , , , ]",
    "canonical_cmd": "s in ['true', '1', 't', 'y', 'yes', 'yeah', 'yup', 'certainly', 'uh-huh']"
  },
  {
    "nl": "Format string `hello {name}, how are you {name}, welcome {name}` to be interspersed by `name` three times, specifying the value as `john` only once",
    "cmd": "\"\"\"hello {name}, how are you {name}, welcome {name}\"\"\".format(name='john')",
    "question_id": "11921649-79",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#format"
    ],
    "clean_cmd": ".format(name=)",
    "canonical_cmd": "\"\"\"VAR_STR\"\"\".format(VAR_STR='VAR_STR')"
  },
  {
    "nl": "divide the members of a list `conversions` by the corresponding members of another list `trials`",
    "cmd": "[(c / t) for c, t in zip(conversions, trials)]",
    "question_id": "3731426-92",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip"
    ],
    "clean_cmd": "[(c / t)  c, t  zip(conversions, trials)]",
    "canonical_cmd": "[(c / t) for c, t in zip(VAR_STR, VAR_STR)]"
  },
  {
    "nl": "searche in HTML string for elements that have text 'Python'",
    "cmd": "soup.body.findAll(text='Python')",
    "question_id": "8936030-9",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "soup.body.findAll(text=)",
    "canonical_cmd": "soup.body.findAll(text='VAR_STR')"
  },
  {
    "nl": "BeautifulSoup find string 'Python Jobs' in HTML body `body`",
    "cmd": "soup.body.findAll(text='Python Jobs')",
    "question_id": "8936030-35",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "soup.body.findAll(text=)",
    "canonical_cmd": "soup.VAR_STR.findAll(text='VAR_STR')"
  },
  {
    "nl": "Initialize a list `a` with `10000` items and each item's value `0`",
    "cmd": "a = [0] * 10000",
    "question_id": "36113747-21",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "a = [] * ",
    "canonical_cmd": "VAR_STR = [0] * 10000"
  },
  {
    "nl": "Sort lists in the list `unsorted_list` by the element at index 3 of each list",
    "cmd": "unsorted_list.sort(key=lambda x: x[3])",
    "question_id": "17555218-63",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#list.sort"
    ],
    "clean_cmd": "unsorted_list.sort(key= x: x[])",
    "canonical_cmd": "VAR_STR.sort(key=lambda x: x[3])"
  },
  {
    "nl": "Replace NaN values in  column 'value' with the mean of data in column 'group' of dataframe `df`",
    "cmd": "df[['value']].fillna(df.groupby('group').transform('mean'))",
    "question_id": "40682209-98",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.groupby",
      "pandas.reference.api.pandas.dataframe.fillna",
      "pandas.reference.api.pandas.dataframe.transform"
    ],
    "clean_cmd": "df[[]].fillna(df.groupby().transform())",
    "canonical_cmd": "VAR_STR[['VAR_STR']].fillna(VAR_STR.groupby('VAR_STR').transform('mean'))"
  },
  {
    "nl": "add a path `/path/to/2014_07_13_test` to system path",
    "cmd": "sys.path.append('/path/to/2014_07_13_test')",
    "question_id": "24722212-6",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.append"
    ],
    "clean_cmd": "sys.path.append()",
    "canonical_cmd": "sys.path.append('VAR_STR')"
  },
  {
    "nl": "remove None value from list `L`",
    "cmd": "[x for x in L if x is not None]",
    "question_id": "16096754-83",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[x  x  L  x   None]",
    "canonical_cmd": "[x for x in VAR_STR if x is not None]"
  },
  {
    "nl": "Split string with comma (,) and remove whitespace from a string 'my_string'",
    "cmd": "[item.strip() for item in my_string.split(',')]",
    "question_id": "21261330-82",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.strip",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "[item.strip()  item  my_string.split()]",
    "canonical_cmd": "[item.strip() for item in VAR_STR.split(',')]"
  },
  {
    "nl": "get current script directory",
    "cmd": "os.path.dirname(os.path.abspath(__file__))",
    "question_id": "3718657-91",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os.path#os.path.dirname",
      "python.library.os.path#os.path.abspath"
    ],
    "clean_cmd": "os.path.dirname(os.path.abspath(__file__))",
    "canonical_cmd": "os.path.dirname(os.path.abspath(__file__))"
  },
  {
    "nl": "Match regex pattern '((?:A|B|C)D)' on string 'BDE'",
    "cmd": "re.findall('((?:A|B|C)D)', 'BDE')",
    "question_id": "11985628-48",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "re.findall(, )",
    "canonical_cmd": "re.findall('VAR_STR', 'VAR_STR')"
  },
  {
    "nl": "create list `done` containing permutations of each element in list `[a, b, c, d]` with variable `x` as tuples",
    "cmd": "done = [(el, x) for el in [a, b, c, d]]",
    "question_id": "9962293-50",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "done = [(el, x)  el  [a, b, c, d]]",
    "canonical_cmd": "VAR_STR = [(el, VAR_STR) for el in [VAR_STR]]"
  },
  {
    "nl": "sort a set `s` by numerical value",
    "cmd": "sorted(s, key=float)",
    "question_id": "17457793-83",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted"
    ],
    "clean_cmd": "sorted(s, key=float)",
    "canonical_cmd": "sorted(VAR_STR, key=float)"
  },
  {
    "nl": "How do I find an element that contains specific text in Selenium Webdriver (Python)?",
    "cmd": "driver.find_elements_by_xpath(\"//*[contains(text(), 'My Button')]\")",
    "question_id": "12323403-99",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "driver.find_elements_by_xpath()",
    "canonical_cmd": "driver.find_elements_by_xpath(\"//*[contains(text(), 'My Button')]\")"
  },
  {
    "nl": "Selecting Element \"//li/label/input\" followed by text \"polishpottery\" with Selenium WebDriver `driver`",
    "cmd": "driver.find_element_by_xpath(\"//li/label/input[contains(..,'polishpottery')]\")",
    "question_id": "11406091-7",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "driver.find_element_by_xpath()",
    "canonical_cmd": "VAR_STR.find_element_by_xpath(\"//li/label/input[contains(..,'polishpottery')]\")"
  },
  {
    "nl": "Resample dataframe `frame` to resolution of 1 hour `1H` for timeseries index, summing values in the column `radiation` averaging those in column `tamb`",
    "cmd": "frame.resample('1H').agg({'radiation': np.sum, 'tamb': np.mean})",
    "question_id": "10020591-5",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.agg"
    ],
    "clean_cmd": "frame.resample().agg({: np.sum, : np.mean})",
    "canonical_cmd": "VAR_STR.resample('VAR_STR').agg({'VAR_STR': np.sum, 'VAR_STR': np.mean})"
  },
  {
    "nl": "sum each element `x` in list `first` with element `y` at the same index in list `second`.",
    "cmd": "[(x + y) for x, y in zip(first, second)]",
    "question_id": "14050824-40",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip"
    ],
    "clean_cmd": "[(x + y)  x, y  zip(first, second)]",
    "canonical_cmd": "[(VAR_STR + VAR_STR) for VAR_STR, VAR_STR in zip(VAR_STR, VAR_STR)]"
  },
  {
    "nl": "get the context of a search by keyword 'My keywords' in beautifulsoup `soup`",
    "cmd": "k = soup.find(text=re.compile('My keywords')).parent.text",
    "question_id": "28780956-93",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.compile",
      "python.library.stdtypes#str.find"
    ],
    "clean_cmd": "k = soup.find(text=re.compile()).parent.text",
    "canonical_cmd": "k = VAR_STR.find(text=re.compile('VAR_STR')).parent.text"
  },
  {
    "nl": "sort list `xs` based on the length of its elements",
    "cmd": "print(sorted(xs, key=len))",
    "question_id": "2587402-45",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted"
    ],
    "clean_cmd": "print(sorted(xs, key=len))",
    "canonical_cmd": "print(sorted(VAR_STR, key=len))"
  },
  {
    "nl": "sort list `xs` in ascending order of length of elements",
    "cmd": "xs.sort(lambda x, y: cmp(len(x), len(y)))",
    "question_id": "2587402-28",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len",
      "python.library.filecmp#filecmp.cmp",
      "python.library.stdtypes#list.sort"
    ],
    "clean_cmd": "xs.sort( x, y: cmp(len(x), len(y)))",
    "canonical_cmd": "VAR_STR.sort(lambda x, y: cmp(len(x), len(y)))"
  },
  {
    "nl": "sort list of strings `xs` by the length of string",
    "cmd": "xs.sort(key=lambda s: len(s))",
    "question_id": "2587402-14",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len",
      "python.library.stdtypes#list.sort"
    ],
    "clean_cmd": "xs.sort(key= s: len(s))",
    "canonical_cmd": "VAR_STR.sort(key=lambda s: len(s))"
  },
  {
    "nl": "Make a dictionary from list `f` which is in the format of four sets of  \"val, key, val\"",
    "cmd": "{f[i + 1]: [f[i], f[i + 2]] for i in range(0, len(f), 3)}",
    "question_id": "23914774-64",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len",
      "python.library.functions#range"
    ],
    "clean_cmd": "{f[i + ]: [f[i], f[i + ]]  i  range(, len(f), )}",
    "canonical_cmd": "{VAR_STR[i + 1]: [VAR_STR[i], VAR_STR[i + 2]] for i in range(0, len(VAR_STR), 3)}"
  },
  {
    "nl": "check if string `string` starts with a number",
    "cmd": "string[0].isdigit()",
    "question_id": "5577501-79",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.isdigit"
    ],
    "clean_cmd": "string[].isdigit()",
    "canonical_cmd": "VAR_STR[0].isdigit()"
  },
  {
    "nl": "Check if string `strg` starts with any of the elements in list ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9')",
    "cmd": "strg.startswith(('0', '1', '2', '3', '4', '5', '6', '7', '8', '9'))",
    "question_id": "5577501-18",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.startswith"
    ],
    "clean_cmd": "strg.startswith((, , , , , , , , , ))",
    "canonical_cmd": "VAR_STR.startswith(('VAR_STR', 'VAR_STR', 'VAR_STR', 'VAR_STR', 'VAR_STR', 'VAR_STR',\n    'VAR_STR', 'VAR_STR', 'VAR_STR', 'VAR_STR'))"
  },
  {
    "nl": "create a list containing all values associated with key 'baz' in dictionaries of list `foos` using list comprehension",
    "cmd": "[y['baz'] for x in foos for y in x['bar']]",
    "question_id": "4879641-52",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[y[]  x  foos  y  x[]]",
    "canonical_cmd": "[y['VAR_STR'] for x in VAR_STR for y in x['bar']]"
  },
  {
    "nl": "Make a auto scrolled window to the end of the list in gtk",
    "cmd": "self.treeview.connect('size-allocate', self.treeview_changed)",
    "question_id": "5218948-78",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.sqlite3#sqlite3.connect"
    ],
    "clean_cmd": "self.treeview.connect(, self.treeview_changed)",
    "canonical_cmd": "self.treeview.connect('size-allocate', self.treeview_changed)"
  },
  {
    "nl": "Getting today's date in YYYY-MM-DD",
    "cmd": "datetime.datetime.today().strftime('%Y-%m-%d')",
    "question_id": "32490629-62",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.datetime#datetime.datetime.today",
      "python.library.datetime#datetime.datetime.strftime"
    ],
    "clean_cmd": "datetime.datetime.today().strftime()",
    "canonical_cmd": "datetime.datetime.today().strftime('%Y-%m-%d')"
  },
  {
    "nl": "creating a list of dictionaries [{'A': 1, 'C': 4, 'B': 2, 'D': 4}, {'A': 1, 'C': 4, 'B': 1, 'D': 5}]",
    "cmd": "[{'A': 1, 'C': 4, 'B': 2, 'D': 4}, {'A': 1, 'C': 4, 'B': 1, 'D': 5}]",
    "question_id": "35883459-33",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[{: , : , : , : }, {: , : , : , : }]",
    "canonical_cmd": "[{'VAR_STR': 1, 'VAR_STR': 4, 'VAR_STR': 2, 'VAR_STR': 4}, {'VAR_STR': 1, 'VAR_STR': 4,\n    'VAR_STR': 1, 'VAR_STR': 5}]"
  },
  {
    "nl": "Creating a list of dictionaries in python",
    "cmd": "[{'A': 1, 'C': 4, 'B': 2, 'D': 4}, {'A': 1, 'C': 4, 'B': 1, 'D': 5}]",
    "question_id": "35883459-5",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[{: , : , : , : }, {: , : , : , : }]",
    "canonical_cmd": "[{'A': 1, 'C': 4, 'B': 2, 'D': 4}, {'A': 1, 'C': 4, 'B': 1, 'D': 5}]"
  },
  {
    "nl": "print variable `value ` without spaces",
    "cmd": "print('Value is \"' + str(value) + '\"')",
    "question_id": "28669459-58",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str"
    ],
    "clean_cmd": "print( + str(value) + )",
    "canonical_cmd": "print('Value is \"' + str(VAR_STR) + '\"')"
  },
  {
    "nl": "Print a string `value` with string formatting",
    "cmd": "print('Value is \"{}\"'.format(value))",
    "question_id": "28669459-58",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str"
    ],
    "clean_cmd": "print(.format(value))",
    "canonical_cmd": "print('Value is \"{}\"'.format(VAR_STR))"
  },
  {
    "nl": "get index of the biggest 2 values of a list `a`",
    "cmd": "sorted(list(range(len(a))), key=lambda i: a[i])[-2:]",
    "question_id": "13070461-75",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.functions#len",
      "python.library.functions#range",
      "python.library.functions#list"
    ],
    "clean_cmd": "sorted(list(range(len(a))), key= i: a[i])[-:]",
    "canonical_cmd": "sorted(list(range(len(VAR_STR))), key=lambda i: VAR_STR[i])[-2:]"
  },
  {
    "nl": "get indexes of the largest `2` values from a list `a` using itemgetter",
    "cmd": "zip(*sorted(enumerate(a), key=operator.itemgetter(1)))[0][-2:]",
    "question_id": "13070461-22",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.operator#operator.itemgetter",
      "python.library.functions#zip",
      "python.library.functions#sorted",
      "python.library.functions#enumerate"
    ],
    "clean_cmd": "zip(*sorted(enumerate(a), key=operator.itemgetter()))[][-:]",
    "canonical_cmd": "zip(*sorted(enumerate(VAR_STR), key=operator.itemgetter(1)))[0][-2:]"
  },
  {
    "nl": "get the indexes of the largest `2` values from a list of integers `a`",
    "cmd": "sorted(list(range(len(a))), key=lambda i: a[i], reverse=True)[:2]",
    "question_id": "13070461-4",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.functions#len",
      "python.library.functions#range",
      "python.library.functions#list"
    ],
    "clean_cmd": "sorted(list(range(len(a))), key= i: a[i], reverse=True)[:]",
    "canonical_cmd": "sorted(list(range(len(VAR_STR))), key=lambda i: VAR_STR[i], reverse=True)[:2]"
  },
  {
    "nl": "adding url `url` to mysql row",
    "cmd": "cursor.execute('INSERT INTO index(url) VALUES(%s)', (url,))",
    "question_id": "13042013-50",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.sqlite3#sqlite3.Cursor.execute"
    ],
    "clean_cmd": "cursor.execute(, (url,))",
    "canonical_cmd": "cursor.execute('INSERT INTO index(url) VALUES(%s)', (VAR_STR,))"
  },
  {
    "nl": "fill list `myList` with 4 0's",
    "cmd": "self.myList.extend([0] * (4 - len(self.myList)))",
    "question_id": "7026131-18",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len",
      "python.library.collections#collections.deque.extend"
    ],
    "clean_cmd": "self.myList.extend([] * ( - len(self.myList)))",
    "canonical_cmd": "self.VAR_STR.extend([0] * (4 - len(self.VAR_STR)))"
  },
  {
    "nl": "immediately see output of print statement that doesn't end in a newline",
    "cmd": "sys.stdout.flush()",
    "question_id": "5917537-9",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.logging#logging.Handler.flush"
    ],
    "clean_cmd": "sys.stdout.flush()",
    "canonical_cmd": "sys.stdout.flush()"
  },
  {
    "nl": "check if all values of a dictionary `your_dict` are zero `0`",
    "cmd": "all(value == 0 for value in list(your_dict.values()))",
    "question_id": "35253971-84",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#all",
      "python.library.functions#list",
      "python.library.stdtypes#dict.values"
    ],
    "clean_cmd": "all(value ==   value  list(your_dict.values()))",
    "canonical_cmd": "all(value == 0 for value in list(VAR_STR.values()))"
  },
  {
    "nl": "count number of occurrences of a substring 'ab' in a string \"abcdabcva\"",
    "cmd": "\"\"\"abcdabcva\"\"\".count('ab')",
    "question_id": "8899905-32",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.count"
    ],
    "clean_cmd": ".count()",
    "canonical_cmd": "\"\"\"VAR_STR\"\"\".count('VAR_STR')"
  },
  {
    "nl": "get the union set from list of lists `results_list`",
    "cmd": "results_union = set().union(*results_list)",
    "question_id": "2151517-74",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#set",
      "python.library.stdtypes#frozenset.union"
    ],
    "clean_cmd": "results_union = set().union(*results_list)",
    "canonical_cmd": "results_union = set().union(*VAR_STR)"
  },
  {
    "nl": "get the union of values in list of lists `result_list`",
    "cmd": "return list(set(itertools.chain(*result_list)))",
    "question_id": "2151517-30",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.itertools#itertools.chain",
      "python.library.functions#list",
      "python.library.stdtypes#set"
    ],
    "clean_cmd": " list(set(itertools.chain(*result_list)))",
    "canonical_cmd": "return list(set(itertools.chain(*VAR_STR)))"
  },
  {
    "nl": "make a flat list from list of lists `sublist`",
    "cmd": "[item for sublist in l for item in sublist]",
    "question_id": "952914-55",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[item  sublist  l  item  sublist]",
    "canonical_cmd": "[item for VAR_STR in l for item in VAR_STR]"
  },
  {
    "nl": "make a flat list from list of lists `list2d`",
    "cmd": "list(itertools.chain(*list2d))",
    "question_id": "952914-41",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.itertools#itertools.chain",
      "python.library.functions#list"
    ],
    "clean_cmd": "list(itertools.chain(*list2d))",
    "canonical_cmd": "list(itertools.chain(*VAR_STR))"
  },
  {
    "nl": "make a flat list from list of lists `list2d`",
    "cmd": "list(itertools.chain.from_iterable(list2d))",
    "question_id": "952914-3",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.itertools#itertools.chain.from_iterable",
      "python.library.functions#list"
    ],
    "clean_cmd": "list(itertools.chain.from_iterable(list2d))",
    "canonical_cmd": "list(itertools.chain.from_iterable(VAR_STR))"
  },
  {
    "nl": "join two dataframes based on values in selected columns",
    "cmd": "pd.merge(a, b, on=['A', 'B'], how='outer')",
    "question_id": "40221516-69",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.merge"
    ],
    "clean_cmd": "pd.merge(a, b, on=[, ], how=)",
    "canonical_cmd": "pd.merge(a, b, on=['A', 'B'], how='outer')"
  },
  {
    "nl": "get all the values in column `b` from pandas data frame `df`",
    "cmd": "df['b']",
    "question_id": "17193850-16",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "df[]",
    "canonical_cmd": "VAR_STR['VAR_STR']"
  },
  {
    "nl": "convert Date object `dateobject` into a DateTime object",
    "cmd": "datetime.datetime.combine(dateobject, datetime.time())",
    "question_id": "11619169-67",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.datetime#datetime.datetime.combine",
      "python.library.datetime#datetime.time"
    ],
    "clean_cmd": "datetime.datetime.combine(dateobject, datetime.time())",
    "canonical_cmd": "datetime.datetime.combine(VAR_STR, datetime.time())"
  },
  {
    "nl": "How to exclude a character from a regex group?",
    "cmd": "re.compile('[^a-zA-Z0-9-]+')",
    "question_id": "4108561-37",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.compile"
    ],
    "clean_cmd": "re.compile()",
    "canonical_cmd": "re.compile('[^a-zA-Z0-9-]+')"
  },
  {
    "nl": "remove Nan values from array `x`",
    "cmd": "x = x[numpy.logical_not(numpy.isnan(x))]",
    "question_id": "11620914-85",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.logical_not",
      "numpy.reference.generated.numpy.isnan"
    ],
    "clean_cmd": "x = x[numpy.logical_not(numpy.isnan(x))]",
    "canonical_cmd": "VAR_STR = VAR_STR[numpy.logical_not(numpy.isnan(VAR_STR))]"
  },
  {
    "nl": "index a list `L` with another list `Idx`",
    "cmd": "T = [L[i] for i in Idx]",
    "question_id": "1012185-44",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "T = [L[i]  i  Idx]",
    "canonical_cmd": "T = [VAR_STR[i] for i in VAR_STR]"
  },
  {
    "nl": "How to plot two columns of a pandas data frame using points?",
    "cmd": "df.plot(x='col_name_1', y='col_name_2', style='o')",
    "question_id": "17812978-84",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.plot"
    ],
    "clean_cmd": "df.plot(x=, y=, style=)",
    "canonical_cmd": "df.plot(x='col_name_1', y='col_name_2', style='o')"
  },
  {
    "nl": "remove all non-numeric characters from string `sdkjh987978asd098as0980a98sd `",
    "cmd": "re.sub('[^0-9]', '', 'sdkjh987978asd098as0980a98sd')",
    "question_id": "1249388-47",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "re.sub(, , )",
    "canonical_cmd": "re.sub('[^0-9]', '', 'VAR_STR')"
  },
  {
    "nl": "finding the index of an item 'foo' given a list `['foo', 'bar', 'baz']` containing it",
    "cmd": "[i for i, j in enumerate(['foo', 'bar', 'baz']) if j == 'foo']",
    "question_id": "176918-30",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#enumerate"
    ],
    "clean_cmd": "[i  i, j  enumerate([, , ])  j == ]",
    "canonical_cmd": "[i for i, j in enumerate(['VAR_STR', 'bar', 'baz']) if j == 'VAR_STR']"
  },
  {
    "nl": "Validate IP address using Regex",
    "cmd": "pat = re.compile('^\\\\d{1,3}\\\\.\\\\d{1,3}\\\\.\\\\d{1,3}\\\\.\\\\d{1,3}$')",
    "question_id": "11264005-71",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.compile"
    ],
    "clean_cmd": "pat = re.compile()",
    "canonical_cmd": "pat = re.compile('^\\\\d{1,3}\\\\.\\\\d{1,3}\\\\.\\\\d{1,3}\\\\.\\\\d{1,3}$')"
  },
  {
    "nl": "subtract elements of list `List1` from elements of list `List2`",
    "cmd": "[(x1 - x2) for x1, x2 in zip(List1, List2)]",
    "question_id": "8194156-97",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip"
    ],
    "clean_cmd": "[(x1 - x2)  x1, x2  zip(List1, List2)]",
    "canonical_cmd": "[(x1 - x2) for x1, x2 in zip(VAR_STR, VAR_STR)]"
  },
  {
    "nl": "extract date from a string 'monkey 2010-07-32 love banana'",
    "cmd": "dparser.parse('monkey 2010-07-32 love banana', fuzzy=True)",
    "question_id": "3276180-66",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.ast#ast.parse"
    ],
    "clean_cmd": "dparser.parse(, fuzzy=True)",
    "canonical_cmd": "dparser.parse('VAR_STR', fuzzy=True)"
  },
  {
    "nl": "extract date from a string 'monkey 20/01/1980 love banana'",
    "cmd": "dparser.parse('monkey 20/01/1980 love banana', fuzzy=True)",
    "question_id": "3276180-20",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.ast#ast.parse"
    ],
    "clean_cmd": "dparser.parse(, fuzzy=True)",
    "canonical_cmd": "dparser.parse('VAR_STR', fuzzy=True)"
  },
  {
    "nl": "extract date from a string `monkey 10/01/1980 love banana`",
    "cmd": "dparser.parse('monkey 10/01/1980 love banana', fuzzy=True)",
    "question_id": "3276180-87",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.ast#ast.parse"
    ],
    "clean_cmd": "dparser.parse(, fuzzy=True)",
    "canonical_cmd": "dparser.parse('VAR_STR', fuzzy=True)"
  },
  {
    "nl": "remove a key 'key' from a dictionary `my_dict`",
    "cmd": "my_dict.pop('key', None)",
    "question_id": "11277432-32",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict.pop"
    ],
    "clean_cmd": "my_dict.pop(, None)",
    "canonical_cmd": "VAR_STR.pop('VAR_STR', None)"
  },
  {
    "nl": "How to get the concrete class name as a string?",
    "cmd": "instance.__class__.__name__",
    "question_id": "521502-6",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "instance.__class__.__name__",
    "canonical_cmd": "instance.__class__.__name__"
  },
  {
    "nl": "sort list `a` in ascending order based on its elements' float values",
    "cmd": "a = sorted(a, key=lambda x: float(x))",
    "question_id": "17474211-72",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.functions#float"
    ],
    "clean_cmd": "a = sorted(a, key= x: float(x))",
    "canonical_cmd": "VAR_STR = sorted(VAR_STR, key=lambda x: float(x))"
  },
  {
    "nl": "Sort list `alist` in ascending order based on each of its elements' attribute `foo`",
    "cmd": "alist.sort(key=lambda x: x.foo)",
    "question_id": "11850425-35",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#list.sort"
    ],
    "clean_cmd": "alist.sort(key= x: x.foo)",
    "canonical_cmd": "VAR_STR.sort(key=lambda x: x.VAR_STR)"
  },
  {
    "nl": "open file '5_1.txt' in directory `direct`",
    "cmd": "x_file = open(os.path.join(direct, '5_1.txt'), 'r')",
    "question_id": "13223737-73",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os.path#os.path.join",
      "python.library.urllib.request#open"
    ],
    "clean_cmd": "x_file = open(os.path.join(direct, ), )",
    "canonical_cmd": "x_file = open(os.path.join(VAR_STR, 'VAR_STR'), 'r')"
  },
  {
    "nl": "save numpy array `x` into text file 'test.txt'",
    "cmd": "np.savetxt('test.txt', x)",
    "question_id": "3685265-84",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.savetxt"
    ],
    "clean_cmd": "np.savetxt(, x)",
    "canonical_cmd": "np.savetxt('VAR_STR', VAR_STR)"
  },
  {
    "nl": "get the list with the highest sum value in list `x`",
    "cmd": "print(max(x, key=sum))",
    "question_id": "29760130-12",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#max"
    ],
    "clean_cmd": "print(max(x, key=sum))",
    "canonical_cmd": "print(max(VAR_STR, key=sum))"
  },
  {
    "nl": "get the value at index 1 for each tuple in the list of tuples `L`",
    "cmd": "[x[1] for x in L]",
    "question_id": "4800811-3",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[x[]  x  L]",
    "canonical_cmd": "[x[1] for x in VAR_STR]"
  },
  {
    "nl": "Python: Convert a string to an integer",
    "cmd": "int('  23  ')",
    "question_id": "2508861-5",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#int"
    ],
    "clean_cmd": "int()",
    "canonical_cmd": "int('  23  ')"
  },
  {
    "nl": "create a set that is the exclusive or of [1, 2, 3] and [3, 4, 5]",
    "cmd": "set([1, 2, 3]) ^ set([3, 4, 5])",
    "question_id": "11348347-44",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#set"
    ],
    "clean_cmd": "set([, , ]) ^ set([, , ])",
    "canonical_cmd": "set([1, 2, 3]) ^ set([3, 4, 5])"
  },
  {
    "nl": "if Selenium textarea element `foo` is not empty, clear the field",
    "cmd": "driver.find_element_by_id('foo').clear()",
    "question_id": "7732125-28",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#frozenset.clear"
    ],
    "clean_cmd": "driver.find_element_by_id().clear()",
    "canonical_cmd": "driver.find_element_by_id('VAR_STR').clear()"
  },
  {
    "nl": "clear text from textarea 'foo' with selenium",
    "cmd": "driver.find_element_by_id('foo').clear()",
    "question_id": "7732125-17",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#frozenset.clear"
    ],
    "clean_cmd": "driver.find_element_by_id().clear()",
    "canonical_cmd": "driver.find_element_by_id('VAR_STR').clear()"
  },
  {
    "nl": "make a function `f` that calculates the sum of two integer variables `x` and `y`",
    "cmd": "f = lambda x, y: x + y",
    "question_id": "7974442-3",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "f =  x, y: x + y",
    "canonical_cmd": "VAR_STR = lambda VAR_STR, VAR_STR: VAR_STR + VAR_STR"
  },
  {
    "nl": "throw an error window in python in windows",
    "cmd": "ctypes.windll.user32.MessageBoxW(0, 'Error', 'Error', 0)",
    "question_id": "3365673-3",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "ctypes.windll.user32.MessageBoxW(, , , )",
    "canonical_cmd": "ctypes.windll.user32.MessageBoxW(0, 'Error', 'Error', 0)"
  },
  {
    "nl": "convert rows in pandas data frame `df` into list",
    "cmd": "df.apply(lambda x: x.tolist(), axis=1)",
    "question_id": "19585280-52",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.apply",
      "pandas.reference.api.pandas.series.tolist"
    ],
    "clean_cmd": "df.apply( x: x.tolist(), axis=)",
    "canonical_cmd": "VAR_STR.apply(lambda x: x.tolist(), axis=1)"
  },
  {
    "nl": "round 123 to 100",
    "cmd": "int(round(123, -2))",
    "question_id": "2742784-42",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#int",
      "python.library.functions#round"
    ],
    "clean_cmd": "int(round(, -))",
    "canonical_cmd": "int(round(123, -2))"
  },
  {
    "nl": "split a unicode string `text` into a list of words and punctuation characters with a regex",
    "cmd": "re.findall('\\\\w+|[^\\\\w\\\\s]', text, re.UNICODE)",
    "question_id": "367155-83",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "re.findall(, text, re.UNICODE)",
    "canonical_cmd": "re.findall('\\\\w+|[^\\\\w\\\\s]', VAR_STR, re.UNICODE)"
  },
  {
    "nl": "sum all the values in a counter variable `my_counter`",
    "cmd": "sum(my_counter.values())",
    "question_id": "32511444-44",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sum",
      "python.library.stdtypes#dict.values"
    ],
    "clean_cmd": "sum(my_counter.values())",
    "canonical_cmd": "sum(VAR_STR.values())"
  },
  {
    "nl": "convert 3652458 to string represent a 32bit hex number",
    "cmd": "\"\"\"0x{0:08X}\"\"\".format(3652458)",
    "question_id": "7253907-19",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#format"
    ],
    "clean_cmd": ".format()",
    "canonical_cmd": "\"\"\"0x{0:08X}\"\"\".format(3652458)"
  },
  {
    "nl": "print two numbers `10` and `20` using string formatting",
    "cmd": "\"\"\"{0} {1}\"\"\".format(10, 20)",
    "question_id": "17895835-63",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#format"
    ],
    "clean_cmd": ".format(, )",
    "canonical_cmd": "\"\"\"{0} {1}\"\"\".format(10, 20)"
  },
  {
    "nl": "replace placeholders in string '{1} {ham} {0} {foo} {1}' with arguments `(10, 20, foo='bar', ham='spam')`",
    "cmd": "\"\"\"{1} {ham} {0} {foo} {1}\"\"\".format(10, 20, foo='bar', ham='spam')",
    "question_id": "17895835-19",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#format"
    ],
    "clean_cmd": ".format(, , foo=, ham=)",
    "canonical_cmd": "\"\"\"VAR_STR\"\"\".format(VAR_STR)"
  },
  {
    "nl": "insert string `string1` after each character of `string2`",
    "cmd": "string2.replace('', string1)[len(string1):-len(string1)]",
    "question_id": "14431731-59",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len",
      "python.library.stdtypes#str.replace"
    ],
    "clean_cmd": "string2.replace(, string1)[len(string1):-len(string1)]",
    "canonical_cmd": "VAR_STR.replace('', VAR_STR)[len(VAR_STR):-len(VAR_STR)]"
  },
  {
    "nl": "check whether elements in list `a` appear only once",
    "cmd": "len(set(a)) == len(a)",
    "question_id": "3899782-68",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len",
      "python.library.stdtypes#set"
    ],
    "clean_cmd": "len(set(a)) == len(a)",
    "canonical_cmd": "len(set(VAR_STR)) == len(VAR_STR)"
  },
  {
    "nl": "clear Tkinter Canvas `canvas`",
    "cmd": "canvas.delete('all')",
    "question_id": "15839491-63",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.ast#ast.Delete"
    ],
    "clean_cmd": "canvas.delete()",
    "canonical_cmd": "VAR_STR.delete('all')"
  },
  {
    "nl": "take screenshot 'screen.png' on mac os x",
    "cmd": "os.system('screencapture screen.png')",
    "question_id": "4524723-100",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.system"
    ],
    "clean_cmd": "os.system()",
    "canonical_cmd": "os.system('screencapture screen.png')"
  },
  {
    "nl": "Reset the indexes of a pandas data frame",
    "cmd": "df2 = df.reset_index()",
    "question_id": "11621165-35",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.reset_index"
    ],
    "clean_cmd": "df2 = df.reset_index()",
    "canonical_cmd": "df2 = df.reset_index()"
  },
  {
    "nl": "sort pandas data frame `df` using values from columns `c1` and `c2` in ascending order",
    "cmd": "df.sort(['c1', 'c2'], ascending=[True, True])",
    "question_id": "17618981-15",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.index.sort"
    ],
    "clean_cmd": "df.sort([, ], ascending=[True, True])",
    "canonical_cmd": "VAR_STR.sort(['VAR_STR', 'VAR_STR'], ascending=[True, True])"
  },
  {
    "nl": "Get the zip output as list from the lists `[1, 2, 3]`, `[4, 5, 6]`, `[7, 8, 9]`",
    "cmd": "[list(a) for a in zip([1, 2, 3], [4, 5, 6], [7, 8, 9])]",
    "question_id": "8372399-59",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip",
      "python.library.functions#list"
    ],
    "clean_cmd": "[list(a)  a  zip([, , ], [, , ], [, , ])]",
    "canonical_cmd": "[list(a) for a in zip([VAR_STR], [VAR_STR], [VAR_STR])]"
  },
  {
    "nl": "modify the width of a text control as `300` keeping default height in wxpython",
    "cmd": "wx.TextCtrl(self, -1, size=(300, -1))",
    "question_id": "14306852-84",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "wx.TextCtrl(self, -, size=(, -))",
    "canonical_cmd": "wx.TextCtrl(self, -1, size=(300, -1))"
  },
  {
    "nl": "erase the contents of a file `filename`",
    "cmd": "open('filename', 'w').close()",
    "question_id": "2769061-34",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.request#open"
    ],
    "clean_cmd": "open(, ).close()",
    "canonical_cmd": "open('VAR_STR', 'w').close()"
  },
  {
    "nl": "How to erase the file contents of text file in Python?",
    "cmd": "open('file.txt', 'w').close()",
    "question_id": "2769061-2",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.request#open"
    ],
    "clean_cmd": "open(, ).close()",
    "canonical_cmd": "open('file.txt', 'w').close()"
  },
  {
    "nl": "Create sub matrix of a list of lists `[[2, 3, 4], [2, 3, 4], [2, 3, 4]]` (without numpy)",
    "cmd": "[[2, 3, 4], [2, 3, 4], [2, 3, 4]]",
    "question_id": "15650538-28",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[[, , ], [, , ], [, , ]]",
    "canonical_cmd": "[VAR_STR]"
  },
  {
    "nl": "sort each row in a pandas dataframe `df` in descending order",
    "cmd": "df.sort(axis=1, ascending=False)",
    "question_id": "25817930-59",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.index.sort"
    ],
    "clean_cmd": "df.sort(axis=, ascending=False)",
    "canonical_cmd": "VAR_STR.sort(axis=1, ascending=False)"
  },
  {
    "nl": "Fastest way to sort each row in a pandas dataframe",
    "cmd": "df.sort(df.columns, axis=1, ascending=False)",
    "question_id": "25817930-65",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "df.sort(df.columns, axis=, ascending=False)",
    "canonical_cmd": "df.sort(df.columns, axis=1, ascending=False)"
  },
  {
    "nl": "delete all rows in a numpy array `a` where any value in a row is zero `0`",
    "cmd": "a[np.all(a != 0, axis=1)]",
    "question_id": "18397805-56",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.all"
    ],
    "clean_cmd": "a[np.all(a != , axis=)]",
    "canonical_cmd": "VAR_STR[np.all(VAR_STR != 0, axis=1)]"
  },
  {
    "nl": "sort array `order_array` based on column 'year', 'month' and 'day'",
    "cmd": "order_array.sort(order=['year', 'month', 'day'])",
    "question_id": "19156472-33",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#list.sort"
    ],
    "clean_cmd": "order_array.sort(order=[, , ])",
    "canonical_cmd": "VAR_STR.sort(order=['VAR_STR', 'VAR_STR', 'VAR_STR'])"
  },
  {
    "nl": "Sort a structured numpy array 'df' on multiple columns 'year', 'month' and 'day'.",
    "cmd": "df.sort(['year', 'month', 'day'])",
    "question_id": "19156472-7",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.index.sort"
    ],
    "clean_cmd": "df.sort([, , ])",
    "canonical_cmd": "VAR_STR.sort(['VAR_STR', 'VAR_STR', 'VAR_STR'])"
  },
  {
    "nl": "remove multiple values from a list `my_list` at the same time with index starting at `2` and ending just before `6`.",
    "cmd": "del my_list[2:6]",
    "question_id": "11303225-39",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": " my_list[:]",
    "canonical_cmd": "del VAR_STR[2:6]"
  },
  {
    "nl": "pandas read comma-separated CSV file `s` and skip commented lines starting with '#'",
    "cmd": "pd.read_csv(StringIO(s), sep=',', comment='#')",
    "question_id": "18366797-79",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.read_csv",
      "python.library.io#io.StringIO"
    ],
    "clean_cmd": "pd.read_csv(StringIO(s), sep=, comment=)",
    "canonical_cmd": "pd.read_csv(StringIO(VAR_STR), sep=',', comment='VAR_STR')"
  },
  {
    "nl": "get a name of function `my_function` as a string",
    "cmd": "my_function.__name__",
    "question_id": "251464-30",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "my_function.__name__",
    "canonical_cmd": "VAR_STR.__name__"
  },
  {
    "nl": "How to get a function name as a string in Python?",
    "cmd": "my_function.__name__",
    "question_id": "251464-61",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "my_function.__name__",
    "canonical_cmd": "my_function.__name__"
  },
  {
    "nl": "get a random record from model 'MyModel' using django's orm",
    "cmd": "MyModel.objects.order_by('?').first()",
    "question_id": "962619-48",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.first"
    ],
    "clean_cmd": "MyModel.objects.order_by().first()",
    "canonical_cmd": "VAR_STR.objects.order_by('?').first()"
  },
  {
    "nl": "in Django, select 100 random records from the database `Content.objects`",
    "cmd": "Content.objects.all().order_by('?')[:100]",
    "question_id": "3506678-35",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#all"
    ],
    "clean_cmd": "Content.objects.all().order_by()[:]",
    "canonical_cmd": "Content.objects.all().order_by('?')[:100]"
  },
  {
    "nl": "Merge all columns in dataframe `df` into one column",
    "cmd": "df.apply(' '.join, axis=0)",
    "question_id": "38549915-39",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.apply"
    ],
    "clean_cmd": "df.apply(.join, axis=)",
    "canonical_cmd": "VAR_STR.apply(' '.join, axis=0)"
  },
  {
    "nl": "write a tuple of tuples `A` to a csv file using python",
    "cmd": "writer.writerow(A)",
    "question_id": "8687568-13",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.csv#csv.csvwriter.writerow"
    ],
    "clean_cmd": "writer.writerow(A)",
    "canonical_cmd": "writer.writerow(VAR_STR)"
  },
  {
    "nl": "Write all tuple of tuples `A` at once into csv file",
    "cmd": "writer.writerows(A)",
    "question_id": "8687568-40",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.csv#csv.csvwriter.writerows"
    ],
    "clean_cmd": "writer.writerows(A)",
    "canonical_cmd": "writer.writerows(VAR_STR)"
  },
  {
    "nl": "split string `s` to list conversion by ','",
    "cmd": "[x.strip() for x in s.split(',')]",
    "question_id": "9905471-69",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.strip",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "[x.strip()  x  s.split()]",
    "canonical_cmd": "[x.strip() for x in VAR_STR.split('VAR_STR')]"
  },
  {
    "nl": "delete all elements from a list `x` if a function `fn` taking value as parameter returns `0`",
    "cmd": "[x for x in lst if fn(x) != 0]",
    "question_id": "3895424-5",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[x  x  lst  fn(x) != ]",
    "canonical_cmd": "[VAR_STR for VAR_STR in lst if VAR_STR(VAR_STR) != 0]"
  },
  {
    "nl": "execute command 'source .bashrc; shopt -s expand_aliases; nuke -x scriptPath' from python script",
    "cmd": "os.system('source .bashrc; shopt -s expand_aliases; nuke -x scriptPath')",
    "question_id": "6856119-91",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.system"
    ],
    "clean_cmd": "os.system()",
    "canonical_cmd": "os.system('VAR_STR')"
  },
  {
    "nl": "Format all floating variables `var1`, `var2`, `var3`, `var1` to print to two decimal places.",
    "cmd": "print('%.2f kg = %.2f lb = %.2f gal = %.2f l' % (var1, var2, var3, var4))",
    "question_id": "2075128-48",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "print( % (var1, var2, var3, var4))",
    "canonical_cmd": "print('%.2f kg = %.2f lb = %.2f gal = %.2f l' % (VAR_STR, VAR_STR, VAR_STR, var4))"
  },
  {
    "nl": "Get the index value in list `p_list` using enumerate in list comprehension",
    "cmd": "{p.id: {'id': p.id, 'position': ind} for ind, p in enumerate(p_list)}",
    "question_id": "18816297-75",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#enumerate"
    ],
    "clean_cmd": "{p.id: {: p.id, : ind}  ind, p  enumerate(p_list)}",
    "canonical_cmd": "{p.id: {'id': p.id, 'position': ind} for ind, p in enumerate(VAR_STR)}"
  },
  {
    "nl": "Python regular expression match whole word",
    "cmd": "re.search('\\\\bis\\\\b', your_string)",
    "question_id": "15863066-99",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.search"
    ],
    "clean_cmd": "re.search(, your_string)",
    "canonical_cmd": "re.search('\\\\bis\\\\b', your_string)"
  },
  {
    "nl": "Set the resolution of a monitor as `FULLSCREEN` in pygame",
    "cmd": "pygame.display.set_mode((0, 0), pygame.FULLSCREEN)",
    "question_id": "19954469-63",
    "cmd_name": "conala",
    "oracle_man": [
      "pygame.ref.display#pygame.display.set_mode"
    ],
    "clean_cmd": "pygame.display.set_mode((, ), pygame.FULLSCREEN)",
    "canonical_cmd": "pygame.display.set_mode((0, 0), pygame.VAR_STR)"
  },
  {
    "nl": "find button that is in li class `next` and assign it to variable `next`",
    "cmd": "next = driver.find_element_by_css_selector('li.next>a')",
    "question_id": "20457174-80",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "next = driver.find_element_by_css_selector()",
    "canonical_cmd": "VAR_STR = driver.find_element_by_css_selector('li.next>a')"
  },
  {
    "nl": "match regex 'abc(de)fg(123)' on string 'abcdefg123 and again abcdefg123'",
    "cmd": "re.findall('abc(de)fg(123)', 'abcdefg123 and again abcdefg123')",
    "question_id": "6018340-91",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "re.findall(, )",
    "canonical_cmd": "re.findall('VAR_STR', 'VAR_STR')"
  },
  {
    "nl": "Move the cursor of file pointer `fh1` at the end of the file.",
    "cmd": "fh1.seek(2)",
    "question_id": "14332141-67",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.io#io.IOBase.seek"
    ],
    "clean_cmd": "fh1.seek()",
    "canonical_cmd": "VAR_STR.seek(2)"
  },
  {
    "nl": "Get the value of the minimum element in the second column of array `a`",
    "cmd": "a[np.argmin(a[:, (1)])]",
    "question_id": "14956683-99",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.argmin"
    ],
    "clean_cmd": "a[np.argmin(a[:, ()])]",
    "canonical_cmd": "VAR_STR[np.argmin(VAR_STR[:, (1)])]"
  },
  {
    "nl": "create a list containing elements of list `a` if the sum of the element is greater than 10",
    "cmd": "[item for item in a if sum(item) > 10]",
    "question_id": "2655956-72",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sum"
    ],
    "clean_cmd": "[item  item  a  sum(item) &gt; ]",
    "canonical_cmd": "[item for item in VAR_STR if sum(item) > 10]"
  },
  {
    "nl": "to convert a list of tuples `list_of_tuples` into list of lists",
    "cmd": "[list(t) for t in zip(*list_of_tuples)]",
    "question_id": "18637651-100",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip",
      "python.library.functions#list"
    ],
    "clean_cmd": "[list(t)  t  zip(*list_of_tuples)]",
    "canonical_cmd": "[list(t) for t in zip(*VAR_STR)]"
  },
  {
    "nl": "group a list `list_of_tuples` of tuples by values",
    "cmd": "zip(*list_of_tuples)",
    "question_id": "18637651-49",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip"
    ],
    "clean_cmd": "zip(*list_of_tuples)",
    "canonical_cmd": "zip(*VAR_STR)"
  },
  {
    "nl": "Add 1 to each integer value in list `my_list`",
    "cmd": "new_list = [(x + 1) for x in my_list]",
    "question_id": "9304408-69",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "new_list = [(x + )  x  my_list]",
    "canonical_cmd": "new_list = [(x + 1) for x in VAR_STR]"
  },
  {
    "nl": "Strip punctuation from string `s`",
    "cmd": "s.translate(None, string.punctuation)",
    "question_id": "265960-7",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.translate"
    ],
    "clean_cmd": "s.translate(None, string.punctuation)",
    "canonical_cmd": "VAR_STR.translate(None, string.punctuation)"
  },
  {
    "nl": "remove the last dot and all text beyond it in string `s`",
    "cmd": "re.sub('\\\\.[^.]+$', '', s)",
    "question_id": "35118265-63",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "re.sub(, , s)",
    "canonical_cmd": "re.sub('\\\\.[^.]+$', '', VAR_STR)"
  },
  {
    "nl": "merge lists `list_a` and `list_b` into a list of tuples",
    "cmd": "zip(list_a, list_b)",
    "question_id": "2407398-46",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip"
    ],
    "clean_cmd": "zip(list_a, list_b)",
    "canonical_cmd": "zip(VAR_STR, VAR_STR)"
  },
  {
    "nl": "merge lists `a` and `a` into a list of tuples",
    "cmd": "list(zip(a, b))",
    "question_id": "2407398-69",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip",
      "python.library.functions#list"
    ],
    "clean_cmd": "list(zip(a, b))",
    "canonical_cmd": "list(zip(VAR_STR, b))"
  },
  {
    "nl": "How to select element with Selenium Python xpath",
    "cmd": "driver.find_element_by_xpath(\"//div[@id='a']//a[@class='click']\")",
    "question_id": "19035186-84",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "driver.find_element_by_xpath()",
    "canonical_cmd": "driver.find_element_by_xpath(\"//div[@id='a']//a[@class='click']\")"
  },
  {
    "nl": "apply `numpy.linalg.norm` to each row of a matrix `a`",
    "cmd": "numpy.apply_along_axis(numpy.linalg.norm, 1, a)",
    "question_id": "7741878-15",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.apply_along_axis"
    ],
    "clean_cmd": "numpy.apply_along_axis(numpy.linalg.norm, , a)",
    "canonical_cmd": "numpy.apply_along_axis(numpy.linalg.norm, 1, VAR_STR)"
  },
  {
    "nl": "append dict `{'f': var6, 'g': var7, 'h': var8}` to value of key `e` in dict `jsobj['a']['b']`",
    "cmd": "jsobj['a']['b']['e'].append({'f': var6, 'g': var7, 'h': var8})",
    "question_id": "10895028-20",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.append"
    ],
    "clean_cmd": "jsobj[][][].append({: var6, : var7, : var8})",
    "canonical_cmd": "jsobj['a']['b']['VAR_STR'].append({VAR_STR})"
  },
  {
    "nl": "read json `elevations` to pandas dataframe `df`",
    "cmd": "pd.read_json(elevations)",
    "question_id": "21104592-91",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.read_json"
    ],
    "clean_cmd": "pd.read_json(elevations)",
    "canonical_cmd": "pd.read_json(VAR_STR)"
  },
  {
    "nl": "keep a list `dataList` of lists sorted as it is created by second element",
    "cmd": "dataList.sort(key=lambda x: x[1])",
    "question_id": "12324456-53",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#list.sort"
    ],
    "clean_cmd": "dataList.sort(key= x: x[])",
    "canonical_cmd": "VAR_STR.sort(key=lambda x: x[1])"
  },
  {
    "nl": "sorting a list of tuples `list_of_tuples` where each tuple is reversed",
    "cmd": "sorted(list_of_tuples, key=lambda tup: tup[::-1])",
    "question_id": "10213994-42",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted"
    ],
    "clean_cmd": "sorted(list_of_tuples, key= tup: tup[::-])",
    "canonical_cmd": "sorted(VAR_STR, key=lambda tup: tup[::-1])"
  },
  {
    "nl": "sorting a list of tuples `list_of_tuples` by second key",
    "cmd": "sorted(list_of_tuples, key=lambda tup: tup[1])",
    "question_id": "10213994-17",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted"
    ],
    "clean_cmd": "sorted(list_of_tuples, key= tup: tup[])",
    "canonical_cmd": "sorted(VAR_STR, key=lambda tup: tup[1])"
  },
  {
    "nl": "check if any values in a list `input_list` is a list",
    "cmd": "any(isinstance(el, list) for el in input_list)",
    "question_id": "5251663-9",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#isinstance",
      "python.library.functions#any"
    ],
    "clean_cmd": "any(isinstance(el, list)  el  input_list)",
    "canonical_cmd": "any(isinstance(el, list) for el in VAR_STR)"
  },
  {
    "nl": "check if string 'x' is  in list `['x', 'd', 'a', 's', 'd', 's']`",
    "cmd": "'x' in ['x', 'd', 'a', 's', 'd', 's']",
    "question_id": "4877844-81",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "  [, , , , , ]",
    "canonical_cmd": "'VAR_STR' in ['VAR_STR', 'd', 'a', 's', 'd', 's']"
  },
  {
    "nl": "Get the first and last 3 elements of list `l`",
    "cmd": "l[:3] + l[-3:]",
    "question_id": "40016359-9",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "l[:] + l[-:]",
    "canonical_cmd": "VAR_STR[:3] + VAR_STR[-3:]"
  },
  {
    "nl": "remove a substring \".com\" from the end of string `url`",
    "cmd": "if url.endswith('.com'):\n    url = url[:(-4)]",
    "question_id": "1038824-12",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.endswith"
    ],
    "clean_cmd": " url.endswith():url = url[:(-)]",
    "canonical_cmd": "if VAR_STR.endswith('VAR_STR'):\n    VAR_STR = VAR_STR[:-4]"
  },
  {
    "nl": "remove a substring \".com\" from the end of string `url`",
    "cmd": "url = re.sub('\\\\.com$', '', url)",
    "question_id": "1038824-99",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "url = re.sub(, , url)",
    "canonical_cmd": "VAR_STR = re.sub('\\\\.com$', '', VAR_STR)"
  },
  {
    "nl": "remove a substring \".com\" from the end of string `url`",
    "cmd": "print(url.replace('.com', ''))",
    "question_id": "1038824-76",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.replace"
    ],
    "clean_cmd": "print(url.replace(, ))",
    "canonical_cmd": "print(VAR_STR.replace('VAR_STR', ''))"
  },
  {
    "nl": "remove a substring `suffix` from the end of string `text`",
    "cmd": "if (not text.endswith(suffix)):\n    return text\nreturn text[:(len(text) - len(suffix))]",
    "question_id": "1038824-70",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len"
    ],
    "clean_cmd": " ( text.endswith(suffix)): text text[:(len(text) - len(suffix))]",
    "canonical_cmd": "if not VAR_STR.endswith(VAR_STR):\n    return VAR_STR\nreturn VAR_STR[:len(VAR_STR) - len(VAR_STR)]"
  },
  {
    "nl": "find the euclidean distance between two 3-d arrays `A` and `B`",
    "cmd": "np.sqrt(((A - B) ** 2).sum(-1))",
    "question_id": "40319433-71",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.sqrt",
      "python.library.functions#sum"
    ],
    "clean_cmd": "np.sqrt(((A - B) ** ).sum(-))",
    "canonical_cmd": "np.sqrt(((VAR_STR - VAR_STR) ** 2).sum(-1))"
  },
  {
    "nl": "split string `a` using new-line character '\\n' as separator",
    "cmd": "a.rstrip().split('\\n')",
    "question_id": "2094176-57",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.rstrip",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "a.rstrip().split()",
    "canonical_cmd": "VAR_STR.rstrip().split('VAR_STR')"
  },
  {
    "nl": "split a string `a` with new line character",
    "cmd": "a.split('\\n')[:-1]",
    "question_id": "2094176-44",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "a.split()[:-]",
    "canonical_cmd": "VAR_STR.split('\\n')[:-1]"
  },
  {
    "nl": "unpack the arguments out of list `params` to function `some_func`",
    "cmd": "some_func(*params)",
    "question_id": "4979542-63",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "some_func(*params)",
    "canonical_cmd": "VAR_STR(*VAR_STR)"
  },
  {
    "nl": "python regex for hyphenated words in `text`",
    "cmd": "re.findall('\\\\w+(?:-\\\\w+)+', text)",
    "question_id": "8383213-96",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "re.findall(, text)",
    "canonical_cmd": "re.findall('\\\\w+(?:-\\\\w+)+', VAR_STR)"
  },
  {
    "nl": "plot a bar graph from the column 'color' in the DataFrame 'df'",
    "cmd": "df.colour.value_counts().plot(kind='bar')",
    "question_id": "31029560-17",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.value_counts",
      "pandas.reference.api.pandas.dataframe.plot"
    ],
    "clean_cmd": "df.colour.value_counts().plot(kind=)",
    "canonical_cmd": "VAR_STR.colour.value_counts().plot(kind='bar')"
  },
  {
    "nl": "plot categorical data in series `df` with kind `bar` using pandas and matplotlib",
    "cmd": "df.groupby('colour').size().plot(kind='bar')",
    "question_id": "31029560-68",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.groupby",
      "pandas.reference.api.pandas.dataframe.plot",
      "pandas.reference.api.pandas.dataframe.size"
    ],
    "clean_cmd": "df.groupby().size().plot(kind=)",
    "canonical_cmd": "VAR_STR.groupby('colour').size().plot(kind='VAR_STR')"
  },
  {
    "nl": "generate random upper-case ascii string of 12 characters length",
    "cmd": "print(''.join(choice(ascii_uppercase) for i in range(12)))",
    "question_id": "18319101-1",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#range",
      "python.library.random#random.choice",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": "print(.join(choice(ascii_uppercase)  i  range()))",
    "canonical_cmd": "print(''.join(choice(ascii_uppercase) for i in range(12)))"
  },
  {
    "nl": "python: dots in the name of variable in a format string",
    "cmd": "\"\"\"Name: {0[person.name]}\"\"\".format({'person.name': 'Joe'})",
    "question_id": "7934620-36",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#format"
    ],
    "clean_cmd": ".format({: })",
    "canonical_cmd": "\"\"\"Name: {0[person.name]}\"\"\".format({'person.name': 'Joe'})"
  },
  {
    "nl": "open the file 'words.txt' in 'rU' mode",
    "cmd": "f = open('words.txt', 'rU')",
    "question_id": "13954840-33",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.request#open"
    ],
    "clean_cmd": "f = open(, )",
    "canonical_cmd": "f = open('VAR_STR', 'VAR_STR')"
  },
  {
    "nl": "Replace each value in column 'prod_type' of dataframe `df` with string 'responsive'",
    "cmd": "df['prod_type'] = 'responsive'",
    "question_id": "39602824-29",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "df[] = ",
    "canonical_cmd": "VAR_STR['VAR_STR'] = 'VAR_STR'"
  },
  {
    "nl": "python get time stamp on file `file` in '%m/%d/%Y' format",
    "cmd": "time.strftime('%m/%d/%Y', time.gmtime(os.path.getmtime(file)))",
    "question_id": "16994696-65",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.time#time.gmtime",
      "python.library.time#time.strftime",
      "python.library.os.path#os.path.getmtime"
    ],
    "clean_cmd": "time.strftime(, time.gmtime(os.path.getmtime(file)))",
    "canonical_cmd": "time.strftime('VAR_STR', time.gmtime(os.path.getmtime(VAR_STR)))"
  },
  {
    "nl": "duplicate data in pandas dataframe `x` for 5 times",
    "cmd": "pd.concat([x] * 5, ignore_index=True)",
    "question_id": "23887881-8",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.concat"
    ],
    "clean_cmd": "pd.concat([x] * , ignore_index=True)",
    "canonical_cmd": "pd.concat([VAR_STR] * 5, ignore_index=True)"
  },
  {
    "nl": "Get a repeated pandas data frame object `x` by `5` times",
    "cmd": "pd.concat([x] * 5)",
    "question_id": "23887881-70",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.concat"
    ],
    "clean_cmd": "pd.concat([x] * )",
    "canonical_cmd": "pd.concat([VAR_STR] * 5)"
  },
  {
    "nl": "kill a process `make.exe` from python script on windows",
    "cmd": "os.system('taskkill /im make.exe')",
    "question_id": "6278847-2",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.system"
    ],
    "clean_cmd": "os.system()",
    "canonical_cmd": "os.system('taskkill /im make.exe')"
  },
  {
    "nl": "check if all elements in list `mylist` are the same",
    "cmd": "len(set(mylist)) == 1",
    "question_id": "22240602-20",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len",
      "python.library.stdtypes#set"
    ],
    "clean_cmd": "len(set(mylist)) == ",
    "canonical_cmd": "len(set(VAR_STR)) == 1"
  },
  {
    "nl": "draw a grid line on every tick of plot `plt`",
    "cmd": "plt.grid(True)",
    "question_id": "8209568-32",
    "cmd_name": "conala",
    "oracle_man": [
      "matplotlib._as_gen.mpl_toolkits.axisartist.axislines.axes#mpl_toolkits.axisartist.axislines.Axes.grid"
    ],
    "clean_cmd": "plt.grid(True)",
    "canonical_cmd": "VAR_STR.grid(True)"
  },
  {
    "nl": "find recurring patterns in a string '42344343434'",
    "cmd": "re.findall('^(.+?)((.+)\\\\3+)$', '42344343434')[0][:-1]",
    "question_id": "11303238-52",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "re.findall(, )[][:-]",
    "canonical_cmd": "re.findall('^(.+?)((.+)\\\\3+)$', 'VAR_STR')[0][:-1]"
  },
  {
    "nl": "How to find all occurrences of an element in a list?",
    "cmd": "indices = [i for i, x in enumerate(my_list) if x == 'whatever']",
    "question_id": "6294179-66",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#enumerate"
    ],
    "clean_cmd": "indices = [i  i, x  enumerate(my_list)  x == ]",
    "canonical_cmd": "indices = [i for i, x in enumerate(my_list) if x == 'whatever']"
  },
  {
    "nl": "create a dictionary using two lists`x` and `y`",
    "cmd": "dict(zip(x, y))",
    "question_id": "15183084-19",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip",
      "python.library.stdtypes#dict"
    ],
    "clean_cmd": "dict(zip(x, y))",
    "canonical_cmd": "dict(zip(VAR_STR, VAR_STR))"
  },
  {
    "nl": "get full path of current directory",
    "cmd": "os.path.dirname(os.path.abspath(__file__))",
    "question_id": "3430372-86",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os.path#os.path.dirname",
      "python.library.os.path#os.path.abspath"
    ],
    "clean_cmd": "os.path.dirname(os.path.abspath(__file__))",
    "canonical_cmd": "os.path.dirname(os.path.abspath(__file__))"
  },
  {
    "nl": "generate a list from a pandas dataframe `df` with the column name and column values",
    "cmd": "df.values.tolist()",
    "question_id": "11811392-43",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.series.tolist"
    ],
    "clean_cmd": "df.values.tolist()",
    "canonical_cmd": "VAR_STR.values.tolist()"
  },
  {
    "nl": "check if all lists in list `L` have three elements of integer 1",
    "cmd": "all(x.count(1) == 3 for x in L)",
    "question_id": "12310141-33",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#all",
      "python.library.stdtypes#str.count"
    ],
    "clean_cmd": "all(x.count() ==   x  L)",
    "canonical_cmd": "all(x.count(1) == 3 for x in VAR_STR)"
  },
  {
    "nl": "Sort list `my_list` in alphabetical order based on the values associated with key 'name' of each dictionary in the list",
    "cmd": "my_list.sort(key=operator.itemgetter('name'))",
    "question_id": "5048841-98",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.operator#operator.itemgetter",
      "python.library.stdtypes#list.sort"
    ],
    "clean_cmd": "my_list.sort(key=operator.itemgetter())",
    "canonical_cmd": "VAR_STR.sort(key=operator.itemgetter('VAR_STR'))"
  },
  {
    "nl": "display first 5 characters of string 'aaabbbccc'",
    "cmd": "\"\"\"{:.5}\"\"\".format('aaabbbccc')",
    "question_id": "24076297-68",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#format"
    ],
    "clean_cmd": ".format()",
    "canonical_cmd": "\"\"\"{:.5}\"\"\".format('VAR_STR')"
  },
  {
    "nl": "Convert a list `['A:1', 'B:2', 'C:3', 'D:4']` to dictionary",
    "cmd": "dict(map(lambda s: s.split(':'), ['A:1', 'B:2', 'C:3', 'D:4']))",
    "question_id": "16374540-19",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#map",
      "python.library.stdtypes#dict",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "dict(map( s: s.split(), [, , , ]))",
    "canonical_cmd": "dict(map(lambda s: s.split(':'), [VAR_STR]))"
  },
  {
    "nl": "recursively go through all subdirectories and files in `rootdir`",
    "cmd": "for (root, subFolders, files) in os.walk(rootdir):\n    pass",
    "question_id": "13571134-46",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.walk"
    ],
    "clean_cmd": " (root, subFolders, files)  os.walk(rootdir):",
    "canonical_cmd": "for root, subFolders, files in os.walk(VAR_STR):\n    pass"
  },
  {
    "nl": "sort list `X` based on values from another list `Y`",
    "cmd": "[x for y, x in sorted(zip(Y, X))]",
    "question_id": "6618515-54",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip",
      "python.library.functions#sorted"
    ],
    "clean_cmd": "[x  y, x  sorted(zip(Y, X))]",
    "canonical_cmd": "[x for y, x in sorted(zip(VAR_STR, VAR_STR))]"
  },
  {
    "nl": "sorting list 'X' based on values from another list 'Y'",
    "cmd": "[x for y, x in sorted(zip(Y, X))]",
    "question_id": "6618515-6",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip",
      "python.library.functions#sorted"
    ],
    "clean_cmd": "[x  y, x  sorted(zip(Y, X))]",
    "canonical_cmd": "[x for y, x in sorted(zip(VAR_STR, VAR_STR))]"
  },
  {
    "nl": "remove parentheses and text within it in string `filename`",
    "cmd": "re.sub('\\\\([^)]*\\\\)', '', filename)",
    "question_id": "640001-87",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "re.sub(, , filename)",
    "canonical_cmd": "re.sub('\\\\([^)]*\\\\)', '', VAR_STR)"
  },
  {
    "nl": "find all `owl:Class` tags by parsing xml with namespace",
    "cmd": "root.findall('{http://www.w3.org/2002/07/owl#}Class')",
    "question_id": "14853243-64",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "root.findall()",
    "canonical_cmd": "root.findall('{http://www.w3.org/2002/07/owl#}Class')"
  },
  {
    "nl": "print a unicode string `text`",
    "cmd": "print(text.encode('windows-1252'))",
    "question_id": "6740865-68",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.encode"
    ],
    "clean_cmd": "print(text.encode())",
    "canonical_cmd": "print(VAR_STR.encode('windows-1252'))"
  },
  {
    "nl": "Concatenate dataframe `df_1` to dataframe `df_2` sorted by values of the column 'y'",
    "cmd": "pd.concat([df_1, df_2.sort_values('y')])",
    "question_id": "41192805-11",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.concat",
      "pandas.reference.api.pandas.dataframe.sort_values"
    ],
    "clean_cmd": "pd.concat([df_1, df_2.sort_values()])",
    "canonical_cmd": "pd.concat([VAR_STR, VAR_STR.sort_values('VAR_STR')])"
  },
  {
    "nl": "sum values greater than 0 in dictionary `d`",
    "cmd": "sum(v for v in list(d.values()) if v > 0)",
    "question_id": "15014276-86",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sum",
      "python.library.functions#list",
      "python.library.stdtypes#dict.values"
    ],
    "clean_cmd": "sum(v  v  list(d.values())  v &gt; )",
    "canonical_cmd": "sum(v for v in list(VAR_STR.values()) if v > 0)"
  },
  {
    "nl": "Get all the items from a list of tuple 'l' where second item in tuple is '1'.",
    "cmd": "[x for x in l if x[1] == 1]",
    "question_id": "18131367-54",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[x  x  l  x[] == ]",
    "canonical_cmd": "[x for x in VAR_STR if x[1] == 1]"
  },
  {
    "nl": "generate list of numbers in specific format using string formatting precision.",
    "cmd": "[('%.2d' % i) for i in range(16)]",
    "question_id": "12030074-25",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#range"
    ],
    "clean_cmd": "[( % i)  i  range()]",
    "canonical_cmd": "[('%.2d' % i) for i in range(16)]"
  },
  {
    "nl": "strip everything up to and including the character `&` from url `url`, strip the character `=` from the remaining string and concatenate `.html` to the end",
    "cmd": "url.split('&')[-1].replace('=', '') + '.html'",
    "question_id": "41133414-48",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.replace",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "url.split()[-].replace(, ) + ",
    "canonical_cmd": "VAR_STR.split('VAR_STR')[-1].replace('VAR_STR', '') + 'VAR_STR'"
  },
  {
    "nl": "select a random element from array `[1, 2, 3]`",
    "cmd": "random.choice([1, 2, 3])",
    "question_id": "1058712-18",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.random#random.choice"
    ],
    "clean_cmd": "random.choice([, , ])",
    "canonical_cmd": "random.choice([VAR_STR])"
  },
  {
    "nl": "remove all non-alphabet chars from string `s`",
    "cmd": "\"\"\"\"\"\".join([i for i in s if i.isalpha()])",
    "question_id": "22520932-13",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.isalpha",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": ".join([i  i  s  i.isalpha()])",
    "canonical_cmd": "\"\"\"\"\"\".join([i for i in VAR_STR if i.isalpha()])"
  },
  {
    "nl": "convert date strings in pandas dataframe column`df['date']` to pandas timestamps using the format '%d%b%Y'",
    "cmd": "df['date'] = pd.to_datetime(df['date'], format='%d%b%Y')",
    "question_id": "23797491-57",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.to_datetime"
    ],
    "clean_cmd": "df[] = pd.to_datetime(df[], format=)",
    "canonical_cmd": "df['date'] = pd.to_datetime(df['date'], format='VAR_STR')"
  },
  {
    "nl": "force bash interpreter '/bin/bash' to be used instead of shell",
    "cmd": "os.system('GREPDB=\"echo 123\"; /bin/bash -c \"$GREPDB\"')",
    "question_id": "21822054-79",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.system"
    ],
    "clean_cmd": "os.system()",
    "canonical_cmd": "os.system('GREPDB=\"echo 123\"; /bin/bash -c \"$GREPDB\"')"
  },
  {
    "nl": "Run a command `echo hello world` in bash instead of shell",
    "cmd": "os.system('/bin/bash -c \"echo hello world\"')",
    "question_id": "21822054-28",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.system"
    ],
    "clean_cmd": "os.system()",
    "canonical_cmd": "os.system('/bin/bash -c \"echo hello world\"')"
  },
  {
    "nl": "get index values of pandas dataframe `df` as list",
    "cmd": "df.index.values.tolist()",
    "question_id": "18358938-63",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.index.tolist"
    ],
    "clean_cmd": "df.index.values.tolist()",
    "canonical_cmd": "VAR_STR.index.values.tolist()"
  },
  {
    "nl": "run app `app` on host '192.168.0.58' and port 9000 in Flask",
    "cmd": "app.run(host='192.168.0.58', port=9000, debug=False)",
    "question_id": "30241279-15",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.pdb#pdb.run"
    ],
    "clean_cmd": "app.run(host=, port=, debug=False)",
    "canonical_cmd": "VAR_STR.run(host='VAR_STR', port=9000, debug=False)"
  },
  {
    "nl": "print unicode string `ex\\xe1mple` in uppercase",
    "cmd": "print('ex\\xe1mple'.upper())",
    "question_id": "727507-53",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.upper"
    ],
    "clean_cmd": "print(.upper())",
    "canonical_cmd": "print('VAR_STR'.upper())"
  },
  {
    "nl": "Get the sum of values to the power of their indices in a list `l`",
    "cmd": "sum(j ** i for i, j in enumerate(l, 1))",
    "question_id": "40639071-13",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#enumerate",
      "python.library.functions#sum"
    ],
    "clean_cmd": "sum(j ** i  i, j  enumerate(l, ))",
    "canonical_cmd": "sum(j ** i for i, j in enumerate(VAR_STR, 1))"
  },
  {
    "nl": "get the first row, second column; second row, first column, and first row third column values of numpy array `arr`",
    "cmd": "arr[[0, 1, 1], [1, 0, 2]]",
    "question_id": "14162026-97",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "arr[[, , ], [, , ]]",
    "canonical_cmd": "VAR_STR[[0, 1, 1], [1, 0, 2]]"
  },
  {
    "nl": "Delete mulitple columns `columnheading1`, `columnheading2` in pandas data frame `yourdf`",
    "cmd": "yourdf.drop(['columnheading1', 'columnheading2'], axis=1, inplace=True)",
    "question_id": "28538536-94",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.drop"
    ],
    "clean_cmd": "yourdf.drop([, ], axis=, inplace=True)",
    "canonical_cmd": "VAR_STR.drop(['VAR_STR', 'VAR_STR'], axis=1, inplace=True)"
  },
  {
    "nl": "Django get first 10 records of model `User` ordered by criteria 'age' of model 'pet'",
    "cmd": "User.objects.order_by('-pet__age')[:10]",
    "question_id": "40079728-79",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "User.objects.order_by()[:]",
    "canonical_cmd": "VAR_STR.objects.order_by('-pet__age')[:10]"
  },
  {
    "nl": "print bold text 'Hello'",
    "cmd": "print('\\x1b[1m' + 'Hello')",
    "question_id": "8924173-17",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "print( + )",
    "canonical_cmd": "print('\\x1b[1m' + 'VAR_STR')"
  },
  {
    "nl": "run python script 'script2.py' from another python script, passing in 1 as an argument",
    "cmd": "os.system('script2.py 1')",
    "question_id": "3781851-15",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.system"
    ],
    "clean_cmd": "os.system()",
    "canonical_cmd": "os.system('script2.py 1')"
  },
  {
    "nl": "create new column `A_perc` in dataframe `df` with row values equal to the value in column `A` divided by the value in column `sum`",
    "cmd": "df['A_perc'] = df['A'] / df['sum']",
    "question_id": "18504967-17",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "df[] = df[] / df[]",
    "canonical_cmd": "VAR_STR['VAR_STR'] = VAR_STR['VAR_STR'] / VAR_STR['VAR_STR']"
  },
  {
    "nl": "list duplicated elements in two lists `listA` and `listB`",
    "cmd": "list(set(listA) & set(listB))",
    "question_id": "11697709-100",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#set",
      "python.library.functions#list"
    ],
    "clean_cmd": "list(set(listA) &amp; set(listB))",
    "canonical_cmd": "list(set(VAR_STR) & set(VAR_STR))"
  },
  {
    "nl": "extract digits in a simple way from a python string",
    "cmd": "map(int, re.findall('\\\\d+', s))",
    "question_id": "10365225-64",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall",
      "python.library.functions#map"
    ],
    "clean_cmd": "map(int, re.findall(, s))",
    "canonical_cmd": "map(int, re.findall('\\\\d+', s))"
  },
  {
    "nl": "find overlapping matches from a string `hello` using regex",
    "cmd": "re.findall('(?=(\\\\w\\\\w))', 'hello')",
    "question_id": "11430863-98",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "re.findall(, )",
    "canonical_cmd": "re.findall('(?=(\\\\w\\\\w))', 'VAR_STR')"
  },
  {
    "nl": "Python / Remove special character from string",
    "cmd": "re.sub('[^a-zA-Z0-9-_*.]', '', my_string)",
    "question_id": "25991612-67",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "re.sub(, , my_string)",
    "canonical_cmd": "re.sub('[^a-zA-Z0-9-_*.]', '', my_string)"
  },
  {
    "nl": "Sort list `keys` based on its elements' dot-seperated numbers",
    "cmd": "keys.sort(key=lambda x: map(int, x.split('.')))",
    "question_id": "2597099-85",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#map",
      "python.library.stdtypes#list.sort",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "keys.sort(key= x: map(int, x.split()))",
    "canonical_cmd": "VAR_STR.sort(key=lambda x: map(int, x.split('.')))"
  },
  {
    "nl": "Sort a list of integers `keys` where each value is in string format",
    "cmd": "keys.sort(key=lambda x: [int(y) for y in x.split('.')])",
    "question_id": "2597099-42",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#int",
      "python.library.stdtypes#list.sort",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "keys.sort(key= x: [int(y)  y  x.split()])",
    "canonical_cmd": "VAR_STR.sort(key=lambda x: [int(y) for y in x.split('.')])"
  },
  {
    "nl": "get the tuple in list `a_list` that has the largest item in the second index",
    "cmd": "max_item = max(a_list, key=operator.itemgetter(1))",
    "question_id": "1874194-65",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.operator#operator.itemgetter",
      "python.library.functions#max"
    ],
    "clean_cmd": "max_item = max(a_list, key=operator.itemgetter())",
    "canonical_cmd": "max_item = max(VAR_STR, key=operator.itemgetter(1))"
  },
  {
    "nl": "find tuple in list of tuples `a_list` with the largest second element",
    "cmd": "max(a_list, key=operator.itemgetter(1))",
    "question_id": "1874194-25",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.operator#operator.itemgetter",
      "python.library.functions#max"
    ],
    "clean_cmd": "max(a_list, key=operator.itemgetter())",
    "canonical_cmd": "max(VAR_STR, key=operator.itemgetter(1))"
  },
  {
    "nl": "wait for shell command `p` evoked by  subprocess.Popen to complete",
    "cmd": "p.wait()",
    "question_id": "16196712-48",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.wait"
    ],
    "clean_cmd": "p.wait()",
    "canonical_cmd": "VAR_STR.wait()"
  },
  {
    "nl": "replace white spaces in dataframe `df` with '_'",
    "cmd": "df.replace(' ', '_', regex=True)",
    "question_id": "42462530-4",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.replace"
    ],
    "clean_cmd": "df.replace(, , regex=True)",
    "canonical_cmd": "VAR_STR.replace(' ', 'VAR_STR', regex=True)"
  },
  {
    "nl": "switch positions of each two adjacent characters in string `a`",
    "cmd": "print(''.join(''.join(i) for i in zip(a2, a1)) + a[-1] if len(a) % 2 else '')",
    "question_id": "30628176-5",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip",
      "python.library.functions#len",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": "print(.join(.join(i)  i  zip(a2, a1)) + a[-]  len(a) %   )",
    "canonical_cmd": "print(''.join(''.join(i) for i in zip(a2, a1)) + VAR_STR[-1] if len(VAR_STR) % \n    2 else '')"
  },
  {
    "nl": "Python: Extract numbers from a string",
    "cmd": "[int(s) for s in re.findall('\\\\b\\\\d+\\\\b', \"he33llo 42 I'm a 32 string 30\")]",
    "question_id": "4289331-11",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall",
      "python.library.functions#int"
    ],
    "clean_cmd": "[int(s)  s  re.findall(, )]",
    "canonical_cmd": "[int(s) for s in re.findall('\\\\b\\\\d+\\\\b', \"he33llo 42 I'm a 32 string 30\")]"
  },
  {
    "nl": "remove extra white spaces & tabs from a string `s`",
    "cmd": "\"\"\" \"\"\".join(s.split())",
    "question_id": "4241757-59",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.join",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": ".join(s.split())",
    "canonical_cmd": "\"\"\" \"\"\".join(VAR_STR.split())"
  },
  {
    "nl": "clear terminal screen on windows",
    "cmd": "os.system('cls')",
    "question_id": "4810537-49",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.system"
    ],
    "clean_cmd": "os.system()",
    "canonical_cmd": "os.system('cls')"
  },
  {
    "nl": "clear the terminal screen in Linux",
    "cmd": "os.system('clear')",
    "question_id": "4810537-55",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.system"
    ],
    "clean_cmd": "os.system()",
    "canonical_cmd": "os.system('clear')"
  },
  {
    "nl": "get value of key `post code` associated with first index of key `places` of dictionary `data`",
    "cmd": "print(data['places'][0]['post code'])",
    "question_id": "23306653-53",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "print(data[][][])",
    "canonical_cmd": "print(VAR_STR['VAR_STR'][0]['VAR_STR'])"
  },
  {
    "nl": "get a list of indices of non zero elements in a list `a`",
    "cmd": "[i for i, e in enumerate(a) if e != 0]",
    "question_id": "4111412-34",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#enumerate"
    ],
    "clean_cmd": "[i  i, e  enumerate(a)  e != ]",
    "canonical_cmd": "[i for i, e in enumerate(VAR_STR) if e != 0]"
  },
  {
    "nl": "Define a list with string values `['a', 'c', 'b', 'obj']`",
    "cmd": "['a', 'c', 'b', 'obj']",
    "question_id": "17589590-99",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[, , , ]",
    "canonical_cmd": "[VAR_STR]"
  },
  {
    "nl": "repeat every character for 7 times in string 'map'",
    "cmd": "\"\"\"\"\"\".join(map(lambda x: x * 7, 'map'))",
    "question_id": "38273353-74",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#map",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": ".join(map( x: x * , ))",
    "canonical_cmd": "\"\"\"\"\"\".join(VAR_STR(lambda x: x * 7, 'VAR_STR'))"
  },
  {
    "nl": "call a function with argument list `args`",
    "cmd": "func(*args)",
    "question_id": "817087-38",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functools#functools.partial.func"
    ],
    "clean_cmd": "func(*args)",
    "canonical_cmd": "func(*VAR_STR)"
  },
  {
    "nl": "restart a computer after `900` seconds using subprocess",
    "cmd": "subprocess.call(['shutdown', '/r', '/t', '900'])",
    "question_id": "14764126-22",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.subprocess#subprocess.call"
    ],
    "clean_cmd": "subprocess.call([, , , ])",
    "canonical_cmd": "subprocess.call(['shutdown', '/r', '/t', 'VAR_STR'])"
  },
  {
    "nl": "shutdown a computer using subprocess",
    "cmd": "subprocess.call(['shutdown', '/s'])",
    "question_id": "14764126-20",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.subprocess#subprocess.call"
    ],
    "clean_cmd": "subprocess.call([, ])",
    "canonical_cmd": "subprocess.call(['shutdown', '/s'])"
  },
  {
    "nl": "abort a computer shutdown using subprocess",
    "cmd": "subprocess.call(['shutdown', '/a '])",
    "question_id": "14764126-37",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.subprocess#subprocess.call"
    ],
    "clean_cmd": "subprocess.call([, ])",
    "canonical_cmd": "subprocess.call(['shutdown', '/a '])"
  },
  {
    "nl": "logoff computer having windows operating system using python",
    "cmd": "subprocess.call(['shutdown', '/l '])",
    "question_id": "14764126-39",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.subprocess#subprocess.call"
    ],
    "clean_cmd": "subprocess.call([, ])",
    "canonical_cmd": "subprocess.call(['shutdown', '/l '])"
  },
  {
    "nl": "shutdown and restart a computer running windows from script",
    "cmd": "subprocess.call(['shutdown', '/r'])",
    "question_id": "14764126-27",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.subprocess#subprocess.call"
    ],
    "clean_cmd": "subprocess.call([, ])",
    "canonical_cmd": "subprocess.call(['shutdown', '/r'])"
  },
  {
    "nl": "delete an empty directory",
    "cmd": "os.rmdir()",
    "question_id": "6996603-67",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.rmdir"
    ],
    "clean_cmd": "os.rmdir()",
    "canonical_cmd": "os.rmdir()"
  },
  {
    "nl": "recursively delete all contents in directory `path`",
    "cmd": "shutil.rmtree(path, ignore_errors=False, onerror=None)",
    "question_id": "6996603-65",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.shutil#shutil.rmtree"
    ],
    "clean_cmd": "shutil.rmtree(path, ignore_errors=False, onerror=None)",
    "canonical_cmd": "shutil.rmtree(VAR_STR, ignore_errors=False, onerror=None)"
  },
  {
    "nl": "recursively remove folder `name`",
    "cmd": "os.removedirs(name)",
    "question_id": "6996603-99",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.removedirs"
    ],
    "clean_cmd": "os.removedirs(name)",
    "canonical_cmd": "os.removedirs(VAR_STR)"
  },
  {
    "nl": "convert pandas DataFrame `df` to a dictionary using `id` field as the key",
    "cmd": "df.set_index('id').to_dict()",
    "question_id": "18695605-99",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.set_index",
      "pandas.reference.api.pandas.dataframe.to_dict"
    ],
    "clean_cmd": "df.set_index().to_dict()",
    "canonical_cmd": "VAR_STR.set_index('VAR_STR').to_dict()"
  },
  {
    "nl": "convert pandas dataframe `df` with fields 'id', 'value' to dictionary",
    "cmd": "df.set_index('id')['value'].to_dict()",
    "question_id": "18695605-31",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.set_index",
      "pandas.reference.api.pandas.dataframe.to_dict"
    ],
    "clean_cmd": "df.set_index()[].to_dict()",
    "canonical_cmd": "VAR_STR.set_index('VAR_STR')['VAR_STR'].to_dict()"
  },
  {
    "nl": "create list `new_list` containing the last 10 elements of list `my_list`",
    "cmd": "new_list = my_list[-10:]",
    "question_id": "8556076-78",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "new_list = my_list[-:]",
    "canonical_cmd": "VAR_STR = VAR_STR[-10:]"
  },
  {
    "nl": "get the last 10 elements from a list `my_list`",
    "cmd": "my_list[-10:]",
    "question_id": "8556076-43",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "my_list[-:]",
    "canonical_cmd": "VAR_STR[-10:]"
  },
  {
    "nl": "Run 'test2.py' file with python location 'path/to/python' and arguments 'neededArgumetGoHere' as a subprocess",
    "cmd": "call(['path/to/python', 'test2.py', 'neededArgumetGoHere'])",
    "question_id": "33218968-14",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.subprocess#subprocess.call"
    ],
    "clean_cmd": "call([, , ])",
    "canonical_cmd": "call(['VAR_STR', 'VAR_STR', 'VAR_STR'])"
  },
  {
    "nl": "parse date string '2009/05/13 19:19:30 -0400' using format '%Y/%m/%d %H:%M:%S %z'",
    "cmd": "datetime.strptime('2009/05/13 19:19:30 -0400', '%Y/%m/%d %H:%M:%S %z')",
    "question_id": "1101508-32",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.datetime#datetime.datetime.strptime"
    ],
    "clean_cmd": "datetime.strptime(, )",
    "canonical_cmd": "datetime.strptime('VAR_STR', 'VAR_STR')"
  },
  {
    "nl": "make new column 'C' in panda dataframe by adding values from other columns 'A' and 'B'",
    "cmd": "df['C'] = df['A'] + df['B']",
    "question_id": "34023918-59",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "df[] = df[] + df[]",
    "canonical_cmd": "df['VAR_STR'] = df['VAR_STR'] + df['VAR_STR']"
  },
  {
    "nl": "sum the length of lists in list `x` that are more than 1 item in length",
    "cmd": "sum(len(y) for y in x if len(y) > 1)",
    "question_id": "35707224-41",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len",
      "python.library.functions#sum"
    ],
    "clean_cmd": "sum(len(y)  y  x  len(y) &gt; )",
    "canonical_cmd": "sum(len(y) for y in VAR_STR if len(y) > 1)"
  },
  {
    "nl": "Normalize line ends in a string 'mixed'",
    "cmd": "mixed.replace('\\r\\n', '\\n').replace('\\r', '\\n')",
    "question_id": "1749466-50",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.replace"
    ],
    "clean_cmd": "mixed.replace(, ).replace(, )",
    "canonical_cmd": "VAR_STR.replace('\\r\\n', '\\n').replace('\\r', '\\n')"
  },
  {
    "nl": "replace each occurrence of the pattern '(http://\\\\S+|\\\\S*[^\\\\w\\\\s]\\\\S*)' within `a` with ''",
    "cmd": "re.sub('(http://\\\\S+|\\\\S*[^\\\\w\\\\s]\\\\S*)', '', a)",
    "question_id": "4695143-39",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "re.sub(, , a)",
    "canonical_cmd": "re.sub('VAR_STR', 'VAR_STR', VAR_STR)"
  },
  {
    "nl": "check if dictionary `subset` is a subset of dictionary `superset`",
    "cmd": "all(item in list(superset.items()) for item in list(subset.items()))",
    "question_id": "9323749-54",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list",
      "python.library.functions#all",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "all(item  list(superset.items())  item  list(subset.items()))",
    "canonical_cmd": "all(item in list(VAR_STR.items()) for item in list(VAR_STR.items()))"
  },
  {
    "nl": "Save plot `plt` as svg file 'test.svg'",
    "cmd": "plt.savefig('test.svg')",
    "question_id": "24525111-6",
    "cmd_name": "conala",
    "oracle_man": [
      "matplotlib.figure_api#matplotlib.figure.Figure.savefig"
    ],
    "clean_cmd": "plt.savefig()",
    "canonical_cmd": "VAR_STR.savefig('VAR_STR')"
  },
  {
    "nl": "check if elements in list `my_list` are coherent in order",
    "cmd": "return my_list == list(range(my_list[0], my_list[-1] + 1))",
    "question_id": "18131741-100",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#range",
      "python.library.functions#list"
    ],
    "clean_cmd": " my_list == list(range(my_list[], my_list[-] + ))",
    "canonical_cmd": "return VAR_STR == list(range(VAR_STR[0], VAR_STR[-1] + 1))"
  },
  {
    "nl": "decode encodeuricomponent in GAE",
    "cmd": "urllib.parse.unquote(h.path.encode('utf-8')).decode('utf-8')",
    "question_id": "9880173-43",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.parse#urllib.parse.unquote",
      "python.library.stdtypes#str.encode",
      "python.library.stdtypes#bytearray.decode"
    ],
    "clean_cmd": "urllib.parse.unquote(h.path.encode()).decode()",
    "canonical_cmd": "urllib.parse.unquote(h.path.encode('utf-8')).decode('utf-8')"
  },
  {
    "nl": "remove items from dictionary `myDict` if the item's value `val` is equal to 42",
    "cmd": "myDict = {key: val for key, val in list(myDict.items()) if val != 42}",
    "question_id": "29218750-37",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "myDict = {key: val  key, val  list(myDict.items())  val != }",
    "canonical_cmd": "VAR_STR = {key: VAR_STR for key, VAR_STR in list(VAR_STR.items()) if VAR_STR != 42}"
  },
  {
    "nl": "Remove all items from a dictionary `myDict` whose values are `42`",
    "cmd": "{key: val for key, val in list(myDict.items()) if val != 42}",
    "question_id": "29218750-94",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "{key: val  key, val  list(myDict.items())  val != }",
    "canonical_cmd": "{key: val for key, val in list(VAR_STR.items()) if val != 42}"
  },
  {
    "nl": "read the first line of a string `my_string`",
    "cmd": "my_string.splitlines()[0]",
    "question_id": "11833266-99",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.splitlines"
    ],
    "clean_cmd": "my_string.splitlines()[]",
    "canonical_cmd": "VAR_STR.splitlines()[0]"
  },
  {
    "nl": "How do I read the first line of a string?",
    "cmd": "my_string.split('\\n', 1)[0]",
    "question_id": "11833266-33",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "my_string.split(, )[]",
    "canonical_cmd": "my_string.split('\\n', 1)[0]"
  },
  {
    "nl": "extracting column `1` and `9` from array `data`",
    "cmd": "data[:, ([1, 9])]",
    "question_id": "8386675-18",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "data[:, ([, ])]",
    "canonical_cmd": "VAR_STR[:, ([1, 9])]"
  },
  {
    "nl": "serve a static html page 'your_template.html' at the root of a django project",
    "cmd": "url('^$', TemplateView.as_view(template_name='your_template.html'))",
    "question_id": "30650254-82",
    "cmd_name": "conala",
    "oracle_man": [
      "flask.api.index#flask.views.View.as_view",
      "django.ref.request-response#django.http.HttpResponseRedirect.url"
    ],
    "clean_cmd": "url(, TemplateView.as_view(template_name=))",
    "canonical_cmd": "url('^$', TemplateView.as_view(template_name='VAR_STR'))"
  },
  {
    "nl": "return the conversion of decimal `d` to hex without the '0x' prefix",
    "cmd": "hex(d).split('x')[1]",
    "question_id": "5796238-23",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#hex",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "hex(d).split()[]",
    "canonical_cmd": "hex(VAR_STR).split('x')[1]"
  },
  {
    "nl": "Get multiple matched strings using regex pattern `(?:review: )?(http://url.com/(\\\\d+))\\\\s?`",
    "cmd": "pattern = re.compile('(?:review: )?(http://url.com/(\\\\d+))\\\\s?', re.IGNORECASE)",
    "question_id": "17407691-98",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.compile"
    ],
    "clean_cmd": "pattern = re.compile(, re.IGNORECASE)",
    "canonical_cmd": "pattern = re.compile('VAR_STR', re.IGNORECASE)"
  },
  {
    "nl": "get count of values in numpy array `a` that are between values `25` and `100`",
    "cmd": "((25 < a) & (a < 100)).sum()",
    "question_id": "9560207-5",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sum"
    ],
    "clean_cmd": "(( &lt; a) &amp; (a &lt; )).sum()",
    "canonical_cmd": "((25 < VAR_STR) & (VAR_STR < 100)).sum()"
  },
  {
    "nl": "replace only first occurence of string `TEST` from a string `longlongTESTstringTEST`",
    "cmd": "'longlongTESTstringTEST'.replace('TEST', '?', 1)",
    "question_id": "4628618-90",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.replace"
    ],
    "clean_cmd": ".replace(, , )",
    "canonical_cmd": "\"\"\"VAR_STR\"\"\".replace('VAR_STR', '?', 1)"
  },
  {
    "nl": "format current date to pattern '{%Y-%m-%d %H:%M:%S}'",
    "cmd": "time.strftime('{%Y-%m-%d %H:%M:%S}')",
    "question_id": "21618351-81",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.time#time.strftime"
    ],
    "clean_cmd": "time.strftime()",
    "canonical_cmd": "time.strftime('VAR_STR')"
  },
  {
    "nl": "count the number of pairs in dictionary `d` whose value equal to `chosen_value`",
    "cmd": "sum(x == chosen_value for x in list(d.values()))",
    "question_id": "13462365-6",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sum",
      "python.library.functions#list",
      "python.library.stdtypes#dict.values"
    ],
    "clean_cmd": "sum(x == chosen_value  x  list(d.values()))",
    "canonical_cmd": "sum(x == VAR_STR for x in list(VAR_STR.values()))"
  },
  {
    "nl": "count the number of values in `d` dictionary that are predicate to function `some_condition`",
    "cmd": "sum(1 for x in list(d.values()) if some_condition(x))",
    "question_id": "13462365-75",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sum",
      "python.library.functions#list",
      "python.library.stdtypes#dict.values"
    ],
    "clean_cmd": "sum(  x  list(d.values())  some_condition(x))",
    "canonical_cmd": "sum(1 for x in list(VAR_STR.values()) if VAR_STR(x))"
  },
  {
    "nl": "Get absolute folder path and filename for file `existGDBPath `",
    "cmd": "os.path.split(os.path.abspath(existGDBPath))",
    "question_id": "17057544-4",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os.path#os.path.abspath",
      "python.library.os.path#os.path.split"
    ],
    "clean_cmd": "os.path.split(os.path.abspath(existGDBPath))",
    "canonical_cmd": "os.path.split(os.path.abspath(VAR_STR))"
  },
  {
    "nl": "extract folder path from file path",
    "cmd": "os.path.dirname(os.path.abspath(existGDBPath))",
    "question_id": "17057544-2",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os.path#os.path.dirname",
      "python.library.os.path#os.path.abspath"
    ],
    "clean_cmd": "os.path.dirname(os.path.abspath(existGDBPath))",
    "canonical_cmd": "os.path.dirname(os.path.abspath(existGDBPath))"
  },
  {
    "nl": "align values in array `b` to the order of corresponding values in array `a`",
    "cmd": "a[np.in1d(a, b)]",
    "question_id": "41923906-34",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.in1d"
    ],
    "clean_cmd": "a[np.in1d(a, b)]",
    "canonical_cmd": "VAR_STR[np.in1d(VAR_STR, VAR_STR)]"
  },
  {
    "nl": "Join pandas data frame `frame_1` and `frame_2` with left join by `county_ID` and right join by `countyid`",
    "cmd": "pd.merge(frame_1, frame_2, left_on='county_ID', right_on='countyid')",
    "question_id": "20375561-45",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.merge"
    ],
    "clean_cmd": "pd.merge(frame_1, frame_2, left_on=, right_on=)",
    "canonical_cmd": "pd.merge(VAR_STR, VAR_STR, left_on='VAR_STR', right_on='VAR_STR')"
  },
  {
    "nl": "Python JSON encoding",
    "cmd": "json.dumps({'apple': 'cat', 'banana': 'dog', 'pear': 'fish'})",
    "question_id": "983855-47",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.json#json.dumps"
    ],
    "clean_cmd": "json.dumps({: , : , : })",
    "canonical_cmd": "json.dumps({'apple': 'cat', 'banana': 'dog', 'pear': 'fish'})"
  },
  {
    "nl": "get key-value pairs in dictionary `my_dictionary` for all keys in list `my_list` in the order they appear in `my_list`",
    "cmd": "dict(zip(my_list, map(my_dictionary.get, my_list)))",
    "question_id": "9932549-12",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip",
      "python.library.functions#map",
      "python.library.stdtypes#dict"
    ],
    "clean_cmd": "dict(zip(my_list, map(my_dictionary.get, my_list)))",
    "canonical_cmd": "dict(zip(VAR_STR, map(VAR_STR.get, VAR_STR)))"
  },
  {
    "nl": "sort json `ips_data` by a key 'data_two'",
    "cmd": "sorted_list_of_keyvalues = sorted(list(ips_data.items()), key=item[1]['data_two'])",
    "question_id": "34148637-13",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.functions#list",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "sorted_list_of_keyvalues = sorted(list(ips_data.items()), key=item[][])",
    "canonical_cmd": "sorted_list_of_keyvalues = sorted(list(VAR_STR.items()), key=item[1]['VAR_STR'])"
  },
  {
    "nl": "remove empty strings from list `str_list`",
    "cmd": "str_list = list([_f for _f in str_list if _f])",
    "question_id": "3845423-4",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list"
    ],
    "clean_cmd": "str_list = list([_f  _f  str_list  _f])",
    "canonical_cmd": "VAR_STR = list([_f for _f in VAR_STR if _f])"
  },
  {
    "nl": "do a `left` merge of dataframes `x` and `y` on the column `state` and sort by `index`",
    "cmd": "x.reset_index().merge(y, how='left', on='state', sort=False).sort('index')",
    "question_id": "20206615-96",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.merge",
      "pandas.reference.api.pandas.dataframe.reset_index"
    ],
    "clean_cmd": "x.reset_index().merge(y, how=, on=, sort=False).sort()",
    "canonical_cmd": "VAR_STR.reset_index().merge(VAR_STR, how='VAR_STR', on='VAR_STR', sort=False).sort(\n    'VAR_STR')"
  },
  {
    "nl": "Confirm urls in Django properly",
    "cmd": "url('^$', include('sms.urls')),",
    "question_id": "32458541-85",
    "cmd_name": "conala",
    "oracle_man": [
      "django.ref.urls#django.urls.include",
      "django.ref.request-response#django.http.HttpResponseRedirect.url"
    ],
    "clean_cmd": "url(, include()),",
    "canonical_cmd": "url('^$', include('sms.urls')),"
  },
  {
    "nl": "Configure url in django properly",
    "cmd": "url('^', include('sms.urls')),",
    "question_id": "32458541-57",
    "cmd_name": "conala",
    "oracle_man": [
      "django.ref.urls#django.urls.include",
      "django.ref.request-response#django.http.HttpResponseRedirect.url"
    ],
    "clean_cmd": "url(, include()),",
    "canonical_cmd": "url('^', include('sms.urls')),"
  },
  {
    "nl": "sort a list of dictionary `persons` according to the key `['passport']['birth_info']['date']`",
    "cmd": "sorted(persons, key=lambda x: x['passport']['birth_info']['date'])",
    "question_id": "39804375-37",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted"
    ],
    "clean_cmd": "sorted(persons, key= x: x[][][])",
    "canonical_cmd": "sorted(VAR_STR, key=lambda x: x[VAR_STR])"
  },
  {
    "nl": "flatten list of tuples `a`",
    "cmd": "list(chain.from_iterable(a))",
    "question_id": "10941229-55",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list",
      "python.library.itertools#itertools.chain.from_iterable"
    ],
    "clean_cmd": "list(chain.from_iterable(a))",
    "canonical_cmd": "list(chain.from_iterable(VAR_STR))"
  },
  {
    "nl": "change the background colour of the button `pushbutton` to red",
    "cmd": "self.pushButton.setStyleSheet('background-color: red')",
    "question_id": "20668060-74",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "self.pushButton.setStyleSheet()",
    "canonical_cmd": "self.pushButton.setStyleSheet('background-color: red')"
  },
  {
    "nl": "Change log level dynamically to 'DEBUG' without restarting the application",
    "cmd": "logging.getLogger().setLevel(logging.DEBUG)",
    "question_id": "19617355-91",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.logging#logging.getLogger",
      "python.library.logging#logging.Handler.setLevel"
    ],
    "clean_cmd": "logging.getLogger().setLevel(logging.DEBUG)",
    "canonical_cmd": "logging.getLogger().setLevel(logging.VAR_STR)"
  },
  {
    "nl": "resample series `s` into 3 months bins and sum each bin",
    "cmd": "s.resample('3M', how='sum')",
    "question_id": "29100599-74",
    "cmd_name": "conala",
    "oracle_man": [
      "sklearn.modules.generated.sklearn.utils.resample#sklearn.utils.resample"
    ],
    "clean_cmd": "s.resample(, how=)",
    "canonical_cmd": "VAR_STR.resample('3M', how='sum')"
  },
  {
    "nl": "Set colorbar range from `0` to `15` for pyplot object `quadmesh` in matplotlib",
    "cmd": "quadmesh.set_clim(vmin=0, vmax=15)",
    "question_id": "15282189-27",
    "cmd_name": "conala",
    "oracle_man": [
      "matplotlib.collections_api#matplotlib.collections.QuadMesh.set_clim"
    ],
    "clean_cmd": "quadmesh.set_clim(vmin=, vmax=)",
    "canonical_cmd": "VAR_STR.set_clim(vmin=0, vmax=15)"
  },
  {
    "nl": "check if all boolean values in a python dictionary `dict` are true",
    "cmd": "all(dict.values())",
    "question_id": "2806611-99",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict.values",
      "python.library.functions#all"
    ],
    "clean_cmd": "all(dict.values())",
    "canonical_cmd": "all(VAR_STR.values())"
  },
  {
    "nl": "return a random word from a word list 'words'",
    "cmd": "print(random.choice(words))",
    "question_id": "1456617-78",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.random#random.choice"
    ],
    "clean_cmd": "print(random.choice(words))",
    "canonical_cmd": "print(random.choice(VAR_STR))"
  },
  {
    "nl": "escaping quotes in string",
    "cmd": "replace('\"', '\\\\\"')",
    "question_id": "6275762-86",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.replace"
    ],
    "clean_cmd": "replace(, )",
    "canonical_cmd": "replace('\"', '\\\\\"')"
  },
  {
    "nl": "extract floating number from string 'Current Level: 13.4 db.'",
    "cmd": "re.findall('\\\\d+\\\\.\\\\d+', 'Current Level: 13.4 db.')",
    "question_id": "4703390-2",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "re.findall(, )",
    "canonical_cmd": "re.findall('\\\\d+\\\\.\\\\d+', 'VAR_STR')"
  },
  {
    "nl": "extract floating point numbers from a string 'Current Level: -13.2 db or 14.2 or 3'",
    "cmd": "re.findall('[-+]?\\\\d*\\\\.\\\\d+|\\\\d+', 'Current Level: -13.2 db or 14.2 or 3')",
    "question_id": "4703390-17",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "re.findall(, )",
    "canonical_cmd": "re.findall('[-+]?\\\\d*\\\\.\\\\d+|\\\\d+', 'VAR_STR')"
  },
  {
    "nl": "split string `str` with delimiter '; ' or delimiter ', '",
    "cmd": "re.split('; |, ', str)",
    "question_id": "4998629-17",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.split"
    ],
    "clean_cmd": "re.split(, str)",
    "canonical_cmd": "re.split('; |, ', VAR_STR)"
  },
  {
    "nl": "replace non-ascii chars from a unicode string u'm\\xfasica'",
    "cmd": "unicodedata.normalize('NFKD', 'm\\xfasica').encode('ascii', 'ignore')",
    "question_id": "3704731-64",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.unicodedata#unicodedata.normalize",
      "python.library.stdtypes#str.encode"
    ],
    "clean_cmd": "unicodedata.normalize(, ).encode(, )",
    "canonical_cmd": "unicodedata.normalize('NFKD', 'VAR_STR').encode('ascii', 'ignore')"
  },
  {
    "nl": "convert a string `a` of letters embedded in squared brackets into embedded lists",
    "cmd": "[i.split() for i in re.findall('\\\\[([^\\\\[\\\\]]+)\\\\]', a)]",
    "question_id": "33147992-55",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall",
      "python.library.re#re.split"
    ],
    "clean_cmd": "[i.split()  i  re.findall(, a)]",
    "canonical_cmd": "[i.split() for i in re.findall('\\\\[([^\\\\[\\\\]]+)\\\\]', VAR_STR)]"
  },
  {
    "nl": "Parse DateTime object `datetimevariable` using format '%Y-%m-%d'",
    "cmd": "datetimevariable.strftime('%Y-%m-%d')",
    "question_id": "40173569-0",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.time#time.strftime"
    ],
    "clean_cmd": "datetimevariable.strftime()",
    "canonical_cmd": "VAR_STR.strftime('VAR_STR')"
  },
  {
    "nl": "get index of the first biggest element in list `a`",
    "cmd": "a.index(max(a))",
    "question_id": "3989016-64",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#max"
    ],
    "clean_cmd": "a.index(max(a))",
    "canonical_cmd": "VAR_STR.index(max(VAR_STR))"
  },
  {
    "nl": "create dataframe `males` containing data of dataframe `df` where column `Gender` is equal to 'Male' and column `Year` is equal to 2014",
    "cmd": "males = df[(df[Gender] == 'Male') & (df[Year] == 2014)]",
    "question_id": "22086116-75",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "males = df[(df[Gender] == ) &amp; (df[Year] == )]",
    "canonical_cmd": "VAR_STR = VAR_STR[(VAR_STR[VAR_STR] == 'VAR_STR') & (VAR_STR[VAR_STR] == 2014)]"
  },
  {
    "nl": "get a new string from the 3rd character to the end of the string `x`",
    "cmd": "x[2:]",
    "question_id": "663171-65",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "x[:]",
    "canonical_cmd": "VAR_STR[2:]"
  },
  {
    "nl": "get a new string including the first two characters of string `x`",
    "cmd": "x[:2]",
    "question_id": "663171-26",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "x[:]",
    "canonical_cmd": "VAR_STR[:2]"
  },
  {
    "nl": "get a new string including all but the last character of string `x`",
    "cmd": "x[:(-2)]",
    "question_id": "663171-13",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "x[:(-)]",
    "canonical_cmd": "VAR_STR[:-2]"
  },
  {
    "nl": "get a new string including the last two characters of string `x`",
    "cmd": "x[(-2):]",
    "question_id": "663171-83",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "x[(-):]",
    "canonical_cmd": "VAR_STR[-2:]"
  },
  {
    "nl": "get a new string with the 3rd to the second-to-last characters of string `x`",
    "cmd": "x[2:(-2)]",
    "question_id": "663171-42",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "x[:(-)]",
    "canonical_cmd": "VAR_STR[2:-2]"
  },
  {
    "nl": "reverse a string `some_string`",
    "cmd": "some_string[::(-1)]",
    "question_id": "663171-55",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "some_string[::(-)]",
    "canonical_cmd": "VAR_STR[::-1]"
  },
  {
    "nl": "select alternate characters of \"H-e-l-l-o- -W-o-r-l-d\"",
    "cmd": "'H-e-l-l-o- -W-o-r-l-d'[::2]",
    "question_id": "663171-82",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[::]",
    "canonical_cmd": "\"\"\"VAR_STR\"\"\"[::2]"
  },
  {
    "nl": "select a substring of `s` beginning at `beginning` of length `LENGTH`",
    "cmd": "s = s[beginning:(beginning + LENGTH)]",
    "question_id": "663171-31",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "s = s[beginning:(beginning + LENGTH)]",
    "canonical_cmd": "VAR_STR = VAR_STR[VAR_STR:VAR_STR + VAR_STR]"
  },
  {
    "nl": "Get a list of items in the list `container` with attribute equal to `value`",
    "cmd": "items = [item for item in container if item.attribute == value]",
    "question_id": "9089043-68",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "items = [item  item  container  item.attribute == value]",
    "canonical_cmd": "items = [item for item in VAR_STR if item.attribute == VAR_STR]"
  },
  {
    "nl": "Get the indices in array `b` of each element appearing in array `a`",
    "cmd": "np.in1d(b, a).nonzero()[0]",
    "question_id": "32191029-91",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.in1d",
      "numpy.reference.generated.numpy.nonzero"
    ],
    "clean_cmd": "np.in1d(b, a).nonzero()[]",
    "canonical_cmd": "np.in1d(VAR_STR, VAR_STR).nonzero()[0]"
  },
  {
    "nl": "sort a list of lists `L` by index 2 of the inner list",
    "cmd": "sorted(L, key=itemgetter(2))",
    "question_id": "4174941-83",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.operator#operator.itemgetter"
    ],
    "clean_cmd": "sorted(L, key=itemgetter())",
    "canonical_cmd": "sorted(VAR_STR, key=itemgetter(2))"
  },
  {
    "nl": "sort a list of lists `l` by index 2 of the inner list",
    "cmd": "l.sort(key=(lambda x: x[2]))",
    "question_id": "4174941-52",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#list.sort"
    ],
    "clean_cmd": "l.sort(key=( x: x[]))",
    "canonical_cmd": "VAR_STR.sort(key=lambda x: x[2])"
  },
  {
    "nl": "sort list `l` by index 2 of the item",
    "cmd": "sorted(l, key=(lambda x: x[2]))",
    "question_id": "4174941-58",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted"
    ],
    "clean_cmd": "sorted(l, key=( x: x[]))",
    "canonical_cmd": "sorted(VAR_STR, key=lambda x: x[2])"
  },
  {
    "nl": "sort a list of lists `list_to_sort` by indices 2,0,1 of the inner list",
    "cmd": "sorted_list = sorted(list_to_sort, key=itemgetter(2, 0, 1))",
    "question_id": "4174941-60",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.operator#operator.itemgetter"
    ],
    "clean_cmd": "sorted_list = sorted(list_to_sort, key=itemgetter(, , ))",
    "canonical_cmd": "sorted_list = sorted(VAR_STR, key=itemgetter(2, 0, 1))"
  },
  {
    "nl": "Change data type of data in column 'grade' of dataframe `data_df` into float and then to int",
    "cmd": "data_df['grade'] = data_df['grade'].astype(float).astype(int)",
    "question_id": "40707158-98",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.astype"
    ],
    "clean_cmd": "data_df[] = data_df[].astype(float).astype(int)",
    "canonical_cmd": "VAR_STR['VAR_STR'] = VAR_STR['VAR_STR'].astype(float).astype(int)"
  },
  {
    "nl": "get current url in selenium webdriver `browser`",
    "cmd": "print(browser.current_url)",
    "question_id": "15985339-6",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "print(browser.current_url)",
    "canonical_cmd": "print(VAR_STR.current_url)"
  },
  {
    "nl": "Delete an item with key \"key\" from `mydict`",
    "cmd": "mydict.pop('key', None)",
    "question_id": "15411107-8",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict.pop"
    ],
    "clean_cmd": "mydict.pop(, None)",
    "canonical_cmd": "VAR_STR.pop('VAR_STR', None)"
  },
  {
    "nl": "Delete an item with key `key` from `mydict`",
    "cmd": "del mydict[key]",
    "question_id": "15411107-89",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": " mydict[key]",
    "canonical_cmd": "del VAR_STR[VAR_STR]"
  },
  {
    "nl": "Delete an item with key `key` from `mydict`",
    "cmd": "try:\n    del mydict[key]\nexcept KeyError:\n    pass\ntry:\n    del mydict[key]\nexcept KeyError:\n    pass",
    "question_id": "15411107-82",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": ": mydict[key] KeyError:: mydict[key] KeyError:",
    "canonical_cmd": "try:\n    del VAR_STR[VAR_STR]\nexcept KeyError:\n    pass\ntry:\n    del VAR_STR[VAR_STR]\nexcept KeyError:\n    pass"
  },
  {
    "nl": "convert hex '\\xff' to integer",
    "cmd": "ord('\\xff')",
    "question_id": "19819863-80",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#ord"
    ],
    "clean_cmd": "ord()",
    "canonical_cmd": "ord('VAR_STR')"
  },
  {
    "nl": "run flask application `app` in debug mode.",
    "cmd": "app.run(debug=True)",
    "question_id": "32722143-13",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.pdb#pdb.run"
    ],
    "clean_cmd": "app.run(debug=True)",
    "canonical_cmd": "VAR_STR.run(debug=True)"
  },
  {
    "nl": "Get total number of values in a nested dictionary `food_colors`",
    "cmd": "sum(len(x) for x in list(food_colors.values()))",
    "question_id": "4581646-14",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len",
      "python.library.functions#sum",
      "python.library.functions#list",
      "python.library.stdtypes#dict.values"
    ],
    "clean_cmd": "sum(len(x)  x  list(food_colors.values()))",
    "canonical_cmd": "sum(len(x) for x in list(VAR_STR.values()))"
  },
  {
    "nl": "count all elements in a nested dictionary `food_colors`",
    "cmd": "sum(len(v) for v in food_colors.values())",
    "question_id": "4581646-42",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len",
      "python.library.functions#sum",
      "python.library.stdtypes#dict.values"
    ],
    "clean_cmd": "sum(len(v)  v  food_colors.values())",
    "canonical_cmd": "sum(len(v) for v in VAR_STR.values())"
  },
  {
    "nl": "concatenate sequence of numpy arrays  `LIST` into a one dimensional array along the first axis",
    "cmd": "numpy.concatenate(LIST, axis=0)",
    "question_id": "27516849-33",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.concatenate"
    ],
    "clean_cmd": "numpy.concatenate(LIST, axis=)",
    "canonical_cmd": "numpy.concatenate(VAR_STR, axis=0)"
  },
  {
    "nl": "print '[1, 2, 3]'",
    "cmd": "print('[%s, %s, %s]' % (1, 2, 3))",
    "question_id": "517355-78",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "print( % (, , ))",
    "canonical_cmd": "print('[%s, %s, %s]' % (1, 2, 3))"
  },
  {
    "nl": "Display `1 2 3` as a list of string",
    "cmd": "print('[{0}, {1}, {2}]'.format(1, 2, 3))",
    "question_id": "517355-35",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#format"
    ],
    "clean_cmd": "print(.format(, , ))",
    "canonical_cmd": "print('[{0}, {1}, {2}]'.format(1, 2, 3))"
  },
  {
    "nl": "read file 'myfile' using encoding 'iso-8859-1'",
    "cmd": "codecs.open('myfile', 'r', 'iso-8859-1').read()",
    "question_id": "16883447-58",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.codecs#codecs.open",
      "python.library.codecs#codecs.StreamReader.read"
    ],
    "clean_cmd": "codecs.open(, , ).read()",
    "canonical_cmd": "codecs.open('VAR_STR', 'r', 'VAR_STR').read()"
  },
  {
    "nl": "append `date` to list value of `key` in dictionary `dates_dict`, or create key `key` with value `date` in a list if it does not exist",
    "cmd": "dates_dict.setdefault(key, []).append(date)",
    "question_id": "26367812-93",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict.setdefault",
      "numpy.reference.generated.numpy.append"
    ],
    "clean_cmd": "dates_dict.setdefault(key, []).append(date)",
    "canonical_cmd": "VAR_STR.setdefault(VAR_STR, []).append(VAR_STR)"
  },
  {
    "nl": "build dictionary with keys of dictionary `_container` as keys and values of returned value of function `_value` with correlating key as parameter",
    "cmd": "{_key: _value(_key) for _key in _container}",
    "question_id": "19121722-38",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "{_key: _value(_key)  _key  _container}",
    "canonical_cmd": "{_key: VAR_STR(_key) for _key in VAR_STR}"
  },
  {
    "nl": "load a tsv file `c:/~/trainSetRel3.txt` into a pandas data frame",
    "cmd": "DataFrame.from_csv('c:/~/trainSetRel3.txt', sep='\\t')",
    "question_id": "9652832-83",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "DataFrame.from_csv(, sep=)",
    "canonical_cmd": "DataFrame.from_csv('VAR_STR', sep='\\t')"
  },
  {
    "nl": "regular expression matching all but 'aa' and 'bb' for string `string`",
    "cmd": "re.findall('-(?!aa-|bb-)([^-]+)', string)",
    "question_id": "39600161-2",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "re.findall(, string)",
    "canonical_cmd": "re.findall('-(?!aa-|bb-)([^-]+)', VAR_STR)"
  },
  {
    "nl": "regular expression matching all but 'aa' and 'bb'",
    "cmd": "re.findall('-(?!aa|bb)([^-]+)', string)",
    "question_id": "39600161-59",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "re.findall(, string)",
    "canonical_cmd": "re.findall('-(?!aa|bb)([^-]+)', string)"
  },
  {
    "nl": "click on the text button 'section-select-all' using selenium python",
    "cmd": "browser.find_element_by_class_name('section-select-all').click()",
    "question_id": "34527388-67",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "browser.find_element_by_class_name().click()",
    "canonical_cmd": "browser.find_element_by_class_name('VAR_STR').click()"
  },
  {
    "nl": "regex for repeating words in a string `s`",
    "cmd": "re.sub('(?<!\\\\S)((\\\\S+)(?:\\\\s+\\\\2))(?:\\\\s+\\\\2)+(?!\\\\S)', '\\\\1', s)",
    "question_id": "25474338-15",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "re.sub(, , s)",
    "canonical_cmd": "re.sub('(?<!\\\\S)((\\\\S+)(?:\\\\s+\\\\2))(?:\\\\s+\\\\2)+(?!\\\\S)', '\\\\1', VAR_STR)"
  },
  {
    "nl": "clear the textbox `text` in tkinter",
    "cmd": "tex.delete('1.0', END)",
    "question_id": "27966626-90",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.ast#ast.Delete"
    ],
    "clean_cmd": "tex.delete(, END)",
    "canonical_cmd": "tex.delete('1.0', END)"
  },
  {
    "nl": "get a dictionary with keys from one list `keys` and values from other list `data`",
    "cmd": "dict(zip(keys, zip(*data)))",
    "question_id": "11613284-26",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip",
      "python.library.stdtypes#dict"
    ],
    "clean_cmd": "dict(zip(keys, zip(*data)))",
    "canonical_cmd": "dict(zip(VAR_STR, zip(*VAR_STR)))"
  },
  {
    "nl": "create a list containing the `n` next values of generator `it`",
    "cmd": "[next(it) for _ in range(n)]",
    "question_id": "4152376-23",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#range",
      "python.library.functions#next"
    ],
    "clean_cmd": "[next(it)  _  range(n)]",
    "canonical_cmd": "[next(VAR_STR) for _ in range(VAR_STR)]"
  },
  {
    "nl": "get list of n next values of a generator `it`",
    "cmd": "list(itertools.islice(it, 0, n, 1))",
    "question_id": "4152376-40",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.itertools#itertools.islice",
      "python.library.functions#list"
    ],
    "clean_cmd": "list(itertools.islice(it, , n, ))",
    "canonical_cmd": "list(itertools.islice(VAR_STR, 0, n, 1))"
  },
  {
    "nl": "convert csv file 'test.csv' into two-dimensional matrix",
    "cmd": "numpy.loadtxt(open('test.csv', 'rb'), delimiter=',', skiprows=1)",
    "question_id": "4315506-84",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.loadtxt",
      "python.library.urllib.request#open"
    ],
    "clean_cmd": "numpy.loadtxt(open(, ), delimiter=, skiprows=)",
    "canonical_cmd": "numpy.loadtxt(open('VAR_STR', 'rb'), delimiter=',', skiprows=1)"
  },
  {
    "nl": "MySQL execute query 'SELECT * FROM foo WHERE bar = %s AND baz = %s' with parameters `param1` and `param2`",
    "cmd": "c.execute('SELECT * FROM foo WHERE bar = %s AND baz = %s', (param1, param2))",
    "question_id": "775296-19",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.msilib#msilib.View.Execute"
    ],
    "clean_cmd": "c.execute(, (param1, param2))",
    "canonical_cmd": "c.execute('VAR_STR', (VAR_STR, VAR_STR))"
  },
  {
    "nl": "get data of columns with Null values in dataframe `df`",
    "cmd": "df[pd.isnull(df).any(axis=1)]",
    "question_id": "14247586-5",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.isnull",
      "python.library.functions#any"
    ],
    "clean_cmd": "df[pd.isnull(df).any(axis=)]",
    "canonical_cmd": "VAR_STR[pd.isnull(VAR_STR).any(axis=1)]"
  },
  {
    "nl": "Collapse hierarchical column index to level 0 in dataframe `df`",
    "cmd": "df.columns = df.columns.get_level_values(0)",
    "question_id": "14507794-7",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "df.columns = df.columns.get_level_values()",
    "canonical_cmd": "VAR_STR.columns = VAR_STR.columns.get_level_values(0)"
  },
  {
    "nl": "get keys with same value in dictionary `d`",
    "cmd": "print([key for key in d if d[key] == 1])",
    "question_id": "24958010-61",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "print([key  key  d  d[key] == ])",
    "canonical_cmd": "print([key for key in VAR_STR if VAR_STR[key] == 1])"
  },
  {
    "nl": "get keys with same value in dictionary `d`",
    "cmd": "print([key for key, value in d.items() if value == 1])",
    "question_id": "24958010-12",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "print([key  key, value  d.items()  value == ])",
    "canonical_cmd": "print([key for key, value in VAR_STR.items() if value == 1])"
  },
  {
    "nl": "Get keys from a dictionary 'd' where the value is '1'.",
    "cmd": "print([key for key, value in list(d.items()) if value == 1])",
    "question_id": "24958010-69",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "print([key  key, value  list(d.items())  value == ])",
    "canonical_cmd": "print([key for key, value in list(VAR_STR.items()) if value == 1])"
  },
  {
    "nl": "coalesce non-word-characters in string `a`",
    "cmd": "print(re.sub('(\\\\W)\\\\1+', '\\\\1', a))",
    "question_id": "2813829-29",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "print(re.sub(, , a))",
    "canonical_cmd": "print(re.sub('(\\\\W)\\\\1+', '\\\\1', VAR_STR))"
  },
  {
    "nl": "variable number of digits `digits` in variable `value` in format string \"{0:.{1}%}\"",
    "cmd": "\"\"\"{0:.{1}%}\"\"\".format(value, digits)",
    "question_id": "14932247-15",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#format"
    ],
    "clean_cmd": ".format(value, digits)",
    "canonical_cmd": "\"\"\"VAR_STR\"\"\".format(VAR_STR, VAR_STR)"
  },
  {
    "nl": "sort list `a` in ascending order based on the addition of the second and third elements of each tuple in it",
    "cmd": "sorted(a, key=lambda x: (sum(x[1:3]), x[0]))",
    "question_id": "40384599-21",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.functions#sum"
    ],
    "clean_cmd": "sorted(a, key= x: (sum(x[:]), x[]))",
    "canonical_cmd": "sorted(VAR_STR, key=lambda x: (sum(x[1:3]), x[0]))"
  },
  {
    "nl": "sort a list of tuples `a` by the sum of second and third element of each tuple",
    "cmd": "sorted(a, key=lambda x: (sum(x[1:3]), x[0]), reverse=True)",
    "question_id": "40384599-36",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.functions#sum"
    ],
    "clean_cmd": "sorted(a, key= x: (sum(x[:]), x[]), reverse=True)",
    "canonical_cmd": "sorted(VAR_STR, key=lambda x: (sum(x[1:3]), x[0]), reverse=True)"
  },
  {
    "nl": "sorting a list of tuples `lst` by the sum of the second elements onwards, and third element of the tuple",
    "cmd": "sorted(lst, key=lambda x: (sum(x[1:]), x[0]))",
    "question_id": "40384599-2",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.functions#sum"
    ],
    "clean_cmd": "sorted(lst, key= x: (sum(x[:]), x[]))",
    "canonical_cmd": "sorted(VAR_STR, key=lambda x: (sum(x[1:]), x[0]))"
  },
  {
    "nl": "sort the list of tuples `lst` by the sum of every value except the first and by the first value in reverse order",
    "cmd": "sorted(lst, key=lambda x: (sum(x[1:]), x[0]), reverse=True)",
    "question_id": "40384599-2",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.functions#sum"
    ],
    "clean_cmd": "sorted(lst, key= x: (sum(x[:]), x[]), reverse=True)",
    "canonical_cmd": "sorted(VAR_STR, key=lambda x: (sum(x[1:]), x[0]), reverse=True)"
  },
  {
    "nl": "multiply all items in a list `[1, 2, 3, 4, 5, 6]` together",
    "cmd": "from functools import reduce\nreduce(lambda x, y: x * y, [1, 2, 3, 4, 5, 6])",
    "question_id": "13840379-89",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "from functools import reducereduce( x, y: x * y, [, , , , , ])",
    "canonical_cmd": "from functools import reduce\nreduce(lambda x, y: x * y, [VAR_STR])"
  },
  {
    "nl": "create a list containing the subtraction of each item in list `L` from the item prior to it",
    "cmd": "[(y - x) for x, y in zip(L, L[1:])]",
    "question_id": "4029436-9",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip"
    ],
    "clean_cmd": "[(y - x)  x, y  zip(L, L[:])]",
    "canonical_cmd": "[(y - x) for x, y in zip(VAR_STR, VAR_STR[1:])]"
  },
  {
    "nl": "sympy solve matrix of linear equations `(([1, 1, 1, 1], [1, 1, 2, 3]))` with variables `(x, y, z)`",
    "cmd": "linsolve(Matrix(([1, 1, 1, 1], [1, 1, 2, 3])), (x, y, z))",
    "question_id": "31547657-16",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.matrix"
    ],
    "clean_cmd": "linsolve(Matrix(([, , , ], [, , , ])), (x, y, z))",
    "canonical_cmd": "linsolve(Matrix(VAR_STR), (VAR_STR))"
  },
  {
    "nl": "replacing 'ABC' and 'AB' values in column 'BrandName' of dataframe `df` with 'A'",
    "cmd": "df['BrandName'].replace(['ABC', 'AB'], 'A')",
    "question_id": "27060098-88",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.replace"
    ],
    "clean_cmd": "df[].replace([, ], )",
    "canonical_cmd": "VAR_STR['VAR_STR'].replace(['VAR_STR', 'VAR_STR'], 'VAR_STR')"
  },
  {
    "nl": "replace values `['ABC', 'AB']` in a column 'BrandName' of  pandas dataframe `df` with another value 'A'",
    "cmd": "df['BrandName'] = df['BrandName'].replace(['ABC', 'AB'], 'A')",
    "question_id": "27060098-9",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.replace"
    ],
    "clean_cmd": "df[] = df[].replace([, ], )",
    "canonical_cmd": "VAR_STR['VAR_STR'] = VAR_STR['VAR_STR'].replace([VAR_STR], 'VAR_STR')"
  },
  {
    "nl": "Set time zone `Europe/Istanbul` in Django",
    "cmd": "TIME_ZONE = 'Europe/Istanbul'",
    "question_id": "29311354-62",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "TIME_ZONE = ",
    "canonical_cmd": "TIME_ZONE = 'VAR_STR'"
  },
  {
    "nl": "match regex pattern 'TAA(?:[ATGC]{3})+?TAA' on string `seq`",
    "cmd": "re.findall('TAA(?:[ATGC]{3})+?TAA', seq)",
    "question_id": "9618050-97",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "re.findall(, seq)",
    "canonical_cmd": "re.findall('VAR_STR', VAR_STR)"
  },
  {
    "nl": "remove the space between subplots in matplotlib.pyplot",
    "cmd": "fig.subplots_adjust(wspace=0, hspace=0)",
    "question_id": "41071947-3",
    "cmd_name": "conala",
    "oracle_man": [
      "matplotlib.figure_api#matplotlib.figure.FigureBase.subplots_adjust"
    ],
    "clean_cmd": "fig.subplots_adjust(wspace=, hspace=)",
    "canonical_cmd": "fig.subplots_adjust(wspace=0, hspace=0)"
  },
  {
    "nl": "convert js date object 'Tue, 22 Nov 2011 06:00:00 GMT' to python datetime",
    "cmd": "datetime.strptime('Tue, 22 Nov 2011 06:00:00 GMT', '%a, %d %b %Y %H:%M:%S %Z')",
    "question_id": "8153631-25",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.datetime#datetime.datetime.strptime"
    ],
    "clean_cmd": "datetime.strptime(, )",
    "canonical_cmd": "datetime.strptime('VAR_STR', '%a, %d %b %Y %H:%M:%S %Z')"
  },
  {
    "nl": "order a list of lists `l1` by the first value",
    "cmd": "l1.sort(key=lambda x: int(x[0]))",
    "question_id": "40744328-73",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#int",
      "python.library.stdtypes#list.sort"
    ],
    "clean_cmd": "l1.sort(key= x: int(x[]))",
    "canonical_cmd": "VAR_STR.sort(key=lambda x: int(x[0]))"
  },
  {
    "nl": "order a list of lists `[[1, 'mike'], [1, 'bob']]` by the first value of individual list",
    "cmd": "sorted([[1, 'mike'], [1, 'bob']])",
    "question_id": "40744328-93",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted"
    ],
    "clean_cmd": "sorted([[, ], [, ]])",
    "canonical_cmd": "sorted([VAR_STR])"
  },
  {
    "nl": "convert list of key-value tuples `[('A', 1), ('B', 2), ('C', 3)]` into dictionary",
    "cmd": "dict([('A', 1), ('B', 2), ('C', 3)])",
    "question_id": "6586310-21",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict"
    ],
    "clean_cmd": "dict([(, ), (, ), (, )])",
    "canonical_cmd": "dict([VAR_STR])"
  },
  {
    "nl": "Check if 3 is not in a list [2, 3, 4]",
    "cmd": "(3 not in [2, 3, 4])",
    "question_id": "10406130-25",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "(   [, , ])",
    "canonical_cmd": "3 not in [2, 3, 4]"
  },
  {
    "nl": "Check if tuple (2, 3) is not in a list [(2, 3), (5, 6), (9, 1)]",
    "cmd": "((2, 3) not in [(2, 3), (5, 6), (9, 1)])",
    "question_id": "10406130-84",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "((, )   [(, ), (, ), (, )])",
    "canonical_cmd": "(2, 3) not in [(2, 3), (5, 6), (9, 1)]"
  },
  {
    "nl": "Check if tuple (2, 3) is not in a list [(2, 7), (7, 3), \"hi\"]",
    "cmd": "((2, 3) not in [(2, 7), (7, 3), 'hi'])",
    "question_id": "10406130-41",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "((, )   [(, ), (, ), ])",
    "canonical_cmd": "(2, 3) not in [(2, 7), (7, 3), 'VAR_STR']"
  },
  {
    "nl": "Check if 3 is not in the list [4,5,6]",
    "cmd": "(3 not in [4, 5, 6])",
    "question_id": "10406130-74",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "(   [, , ])",
    "canonical_cmd": "3 not in [4, 5, 6]"
  },
  {
    "nl": "find consecutive consonants in a word `CONCENTRATION` using regex",
    "cmd": "re.findall('[bcdfghjklmnpqrstvwxyz]+', 'CONCERTATION', re.IGNORECASE)",
    "question_id": "27744882-82",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "re.findall(, , re.IGNORECASE)",
    "canonical_cmd": "re.findall('[bcdfghjklmnpqrstvwxyz]+', 'CONCERTATION', re.IGNORECASE)"
  },
  {
    "nl": "add string `-` in `4th` position of a string `s`",
    "cmd": "s[:4] + '-' + s[4:]",
    "question_id": "5254445-10",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "s[:] +  + s[:]",
    "canonical_cmd": "VAR_STR[:4] + 'VAR_STR' + VAR_STR[4:]"
  },
  {
    "nl": "check if character '-' exists in a dataframe `df` cell 'a'",
    "cmd": "df['a'].str.contains('-')",
    "question_id": "39299703-19",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.series.str.contains"
    ],
    "clean_cmd": "df[].str.contains()",
    "canonical_cmd": "VAR_STR['VAR_STR'].str.contains('VAR_STR')"
  },
  {
    "nl": "Jinja2 formate date `item.date` accorto pattern 'Y M d'",
    "cmd": "{{(item.date | date): 'Y M d'}}",
    "question_id": "794995-38",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "{{(item.date | date): }}",
    "canonical_cmd": "{{(item.date | date): 'VAR_STR'}}"
  },
  {
    "nl": "Convert long int `myNumber` into date and time represented in the the string format '%Y-%m-%d %H:%M:%S'",
    "cmd": "datetime.datetime.fromtimestamp(myNumber).strftime('%Y-%m-%d %H:%M:%S')",
    "question_id": "10664430-91",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.datetime#datetime.datetime.fromtimestamp",
      "python.library.datetime#datetime.datetime.strftime"
    ],
    "clean_cmd": "datetime.datetime.fromtimestamp(myNumber).strftime()",
    "canonical_cmd": "datetime.datetime.fromtimestamp(VAR_STR).strftime('VAR_STR')"
  },
  {
    "nl": "remove leading and trailing zeros in the string 'your_Strip'",
    "cmd": "your_string.strip('0')",
    "question_id": "13142347-60",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.strip"
    ],
    "clean_cmd": "your_string.strip()",
    "canonical_cmd": "your_string.strip('0')"
  },
  {
    "nl": "destruct elements of list `[1, 2, 3]` to variables `a`, `b` and `c`",
    "cmd": "a, b, c = [1, 2, 3]",
    "question_id": "19300174-53",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "a, b, c = [, , ]",
    "canonical_cmd": "VAR_STR, VAR_STR, VAR_STR = [VAR_STR]"
  },
  {
    "nl": "get a list of characters in string `x` matching regex pattern `pattern`",
    "cmd": "print(re.findall(pattern, x))",
    "question_id": "40094588-66",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "print(re.findall(pattern, x))",
    "canonical_cmd": "print(re.findall(VAR_STR, VAR_STR))"
  },
  {
    "nl": "get a list `slice` of array slices of the first two rows and columns from array `arr`",
    "cmd": "slice = [arr[i][0:2] for i in range(0, 2)]",
    "question_id": "17277100-38",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#range"
    ],
    "clean_cmd": "slice = [arr[i][:]  i  range(, )]",
    "canonical_cmd": "VAR_STR = [VAR_STR[i][0:2] for i in range(0, 2)]"
  },
  {
    "nl": "Create new list `result` by splitting each item in list `words`",
    "cmd": "result = [item for word in words for item in word.split(',')]",
    "question_id": "12808420-37",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "result = [item  word  words  item  word.split()]",
    "canonical_cmd": "VAR_STR = [item for word in VAR_STR for item in word.split(',')]"
  },
  {
    "nl": "sort list `lst` based on each element's number of occurrences",
    "cmd": "sorted(lst, key=lambda x: (-1 * c[x], lst.index(x)))",
    "question_id": "42394627-10",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.stdtypes#str.index"
    ],
    "clean_cmd": "sorted(lst, key= x: (- * c[x], lst.index(x)))",
    "canonical_cmd": "sorted(VAR_STR, key=lambda x: (-1 * c[x], VAR_STR.index(x)))"
  },
  {
    "nl": "convert string 'a' to hex",
    "cmd": "hex(ord('a'))",
    "question_id": "21669374-19",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#ord",
      "python.library.functions#hex"
    ],
    "clean_cmd": "hex(ord())",
    "canonical_cmd": "hex(ord('VAR_STR'))"
  },
  {
    "nl": "Insert a character `-` after every two elements in a string `s`",
    "cmd": "\"\"\"-\"\"\".join(a + b for a, b in zip(s[::2], s[1::2]))",
    "question_id": "3258573-36",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": ".join(a + b  a, b  zip(s[::], s[::]))",
    "canonical_cmd": "\"\"\"VAR_STR\"\"\".join(a + b for a, b in zip(VAR_STR[::2], VAR_STR[1::2]))"
  },
  {
    "nl": "replace fields delimited by braces {} in string \"Day old bread, 50% sale {0}\" with string 'today'",
    "cmd": "\"\"\"Day old bread, 50% sale {0}\"\"\".format('today')",
    "question_id": "2847272-82",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#format"
    ],
    "clean_cmd": ".format()",
    "canonical_cmd": "\"\"\"VAR_STR\"\"\".format('VAR_STR')"
  },
  {
    "nl": "zip two lists `[1, 2]` and `[3, 4]` into a list of two tuples containing elements at the same index in each list",
    "cmd": "zip([1, 2], [3, 4])",
    "question_id": "13704860-74",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip"
    ],
    "clean_cmd": "zip([, ], [, ])",
    "canonical_cmd": "zip([VAR_STR], [VAR_STR])"
  },
  {
    "nl": "remove all instances of parenthesesis containing text beginning with `as ` from string `line`",
    "cmd": "line = re.sub('\\\\(+as .*?\\\\) ', '', line)",
    "question_id": "37584492-24",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "line = re.sub(, , line)",
    "canonical_cmd": "VAR_STR = re.sub('\\\\(+as .*?\\\\) ', '', VAR_STR)"
  },
  {
    "nl": "decode url `url` with utf8 and print it",
    "cmd": "print(urllib.parse.unquote(url).decode('utf8'))",
    "question_id": "16566069-85",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.parse#urllib.parse.unquote",
      "python.library.stdtypes#bytearray.decode"
    ],
    "clean_cmd": "print(urllib.parse.unquote(url).decode())",
    "canonical_cmd": "print(urllib.parse.unquote(VAR_STR).decode('utf8'))"
  },
  {
    "nl": "decode a urllib escaped url string `url` with `utf8`",
    "cmd": "url = urllib.parse.unquote(url).decode('utf8')",
    "question_id": "16566069-92",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.parse#urllib.parse.unquote",
      "python.library.stdtypes#bytearray.decode"
    ],
    "clean_cmd": "url = urllib.parse.unquote(url).decode()",
    "canonical_cmd": "VAR_STR = urllib.parse.unquote(VAR_STR).decode('VAR_STR')"
  },
  {
    "nl": "substitute multiple whitespace with single whitespace in string `mystring`",
    "cmd": "\"\"\" \"\"\".join(mystring.split())",
    "question_id": "2077897-31",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.join",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": ".join(mystring.split())",
    "canonical_cmd": "\"\"\" \"\"\".join(VAR_STR.split())"
  },
  {
    "nl": "How can I split and parse a string in Python?",
    "cmd": "\"\"\"2.7.0_bf4fda703454\"\"\".split('_')",
    "question_id": "5749195-27",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": ".split()",
    "canonical_cmd": "\"\"\"2.7.0_bf4fda703454\"\"\".split('_')"
  },
  {
    "nl": "sort a list of tuples 'unsorted' based on two elements, second and third",
    "cmd": "sorted(unsorted, key=lambda element: (element[1], element[2]))",
    "question_id": "9376384-34",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted"
    ],
    "clean_cmd": "sorted(unsorted, key= element: (element[], element[]))",
    "canonical_cmd": "sorted(VAR_STR, key=lambda element: (element[1], element[2]))"
  },
  {
    "nl": "converting byte string `c` in unicode string",
    "cmd": "c.decode('unicode_escape')",
    "question_id": "13837848-67",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#bytearray.decode"
    ],
    "clean_cmd": "c.decode()",
    "canonical_cmd": "VAR_STR.decode('unicode_escape')"
  },
  {
    "nl": "sort list `files` based on variable `file_number`",
    "cmd": "files.sort(key=file_number)",
    "question_id": "9466017-43",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#list.sort"
    ],
    "clean_cmd": "files.sort(key=file_number)",
    "canonical_cmd": "VAR_STR.sort(key=VAR_STR)"
  },
  {
    "nl": "parse a comma-separated string number '1,000,000' into int",
    "cmd": "int('1,000,000'.replace(',', ''))",
    "question_id": "2953746-47",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#int",
      "python.library.stdtypes#str.replace"
    ],
    "clean_cmd": "int(.replace(, ))",
    "canonical_cmd": "int('VAR_STR'.replace(',', ''))"
  },
  {
    "nl": "multiply the columns of sparse matrix `m` by array `a` then multiply the rows of the resulting matrix by array `a`",
    "cmd": "numpy.dot(numpy.dot(a, m), a)",
    "question_id": "13163145-86",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.dot"
    ],
    "clean_cmd": "numpy.dot(numpy.dot(a, m), a)",
    "canonical_cmd": "numpy.dot(numpy.dot(VAR_STR, VAR_STR), VAR_STR)"
  },
  {
    "nl": "print \"Please enter something: \" to console, and read user input to `var`",
    "cmd": "var = input('Please enter something: ')",
    "question_id": "70797-30",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#input"
    ],
    "clean_cmd": "var = input()",
    "canonical_cmd": "VAR_STR = input('Please enter something: ')"
  },
  {
    "nl": "convert a set of tuples `queryresult` to a string `emaillist`",
    "cmd": "emaillist = '\\n'.join(item[0] for item in queryresult)",
    "question_id": "8704952-19",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": "emaillist = .join(item[]  item  queryresult)",
    "canonical_cmd": "VAR_STR = '\\n'.join(item[0] for item in VAR_STR)"
  },
  {
    "nl": "convert a set of tuples `queryresult` to a list of strings",
    "cmd": "[item[0] for item in queryresult]",
    "question_id": "8704952-43",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[item[]  item  queryresult]",
    "canonical_cmd": "[item[0] for item in VAR_STR]"
  },
  {
    "nl": "convert a list of tuples `queryresult` to a string from the first indexes.",
    "cmd": "emaillist = '\\n'.join([item[0] for item in queryresult])",
    "question_id": "8704952-35",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": "emaillist = .join([item[]  item  queryresult])",
    "canonical_cmd": "emaillist = '\\n'.join([item[0] for item in VAR_STR])"
  },
  {
    "nl": "list the contents of a directory '/home/username/www/'",
    "cmd": "os.listdir('/home/username/www/')",
    "question_id": "2759323-25",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.listdir"
    ],
    "clean_cmd": "os.listdir()",
    "canonical_cmd": "os.listdir('VAR_STR')"
  },
  {
    "nl": "list all the contents of the directory 'path'.",
    "cmd": "os.listdir('path')",
    "question_id": "2759323-64",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.listdir"
    ],
    "clean_cmd": "os.listdir()",
    "canonical_cmd": "os.listdir('VAR_STR')"
  },
  {
    "nl": "Set multi index on columns 'Company' and 'date' of data frame `df` in pandas.",
    "cmd": "df.set_index(['Company', 'date'], inplace=True)",
    "question_id": "24041436-36",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.set_index"
    ],
    "clean_cmd": "df.set_index([, ], inplace=True)",
    "canonical_cmd": "VAR_STR.set_index(['VAR_STR', 'VAR_STR'], inplace=True)"
  },
  {
    "nl": "use upper case letters to print hex value `value`",
    "cmd": "print('0x%X' % value)",
    "question_id": "13277440-73",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "print( % value)",
    "canonical_cmd": "print('0x%X' % VAR_STR)"
  },
  {
    "nl": "Find all Chinese characters in string `ipath`",
    "cmd": "re.findall('[\\u4e00-\\u9fff]+', ipath)",
    "question_id": "34587346-8",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "re.findall(, ipath)",
    "canonical_cmd": "re.findall('[\u4e00-\\u9fff]+', VAR_STR)"
  },
  {
    "nl": "parse string \"Jun 1 2005  1:33PM\" into datetime by format \"%b %d %Y %I:%M%p\"",
    "cmd": "datetime.strptime('Jun 1 2005  1:33PM', '%b %d %Y %I:%M%p')",
    "question_id": "466345-98",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.datetime#datetime.datetime.strptime"
    ],
    "clean_cmd": "datetime.strptime(, )",
    "canonical_cmd": "datetime.strptime('VAR_STR', 'VAR_STR')"
  },
  {
    "nl": "parse string \"Aug 28 1999 12:00AM\" into datetime",
    "cmd": "parser.parse('Aug 28 1999 12:00AM')",
    "question_id": "466345-79",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.email.parser#email.parser.Parser.parse"
    ],
    "clean_cmd": "parser.parse()",
    "canonical_cmd": "parser.parse('VAR_STR')"
  },
  {
    "nl": "sort list `lst` with positives coming before negatives with values sorted respectively",
    "cmd": "sorted(lst, key=lambda x: (x < 0, x))",
    "question_id": "40620804-62",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted"
    ],
    "clean_cmd": "sorted(lst, key= x: (x &lt; , x))",
    "canonical_cmd": "sorted(VAR_STR, key=lambda x: (x < 0, x))"
  },
  {
    "nl": "get tuples of the corresponding elements from lists `lst` and `lst2`",
    "cmd": "[(x, lst2[i]) for i, x in enumerate(lst)]",
    "question_id": "18990069-2",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#enumerate"
    ],
    "clean_cmd": "[(x, lst2[i])  i, x  enumerate(lst)]",
    "canonical_cmd": "[(x, VAR_STR[i]) for i, x in enumerate(VAR_STR)]"
  },
  {
    "nl": "create tuples containing elements that are at the same index of list `lst` and list `lst2`",
    "cmd": "[(i, j) for i, j in zip(lst, lst2)]",
    "question_id": "18990069-9",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip"
    ],
    "clean_cmd": "[(i, j)  i, j  zip(lst, lst2)]",
    "canonical_cmd": "[(i, j) for i, j in zip(VAR_STR, VAR_STR)]"
  },
  {
    "nl": "get tuples from lists `lst` and `lst2` using list comprehension in python 2",
    "cmd": "[(lst[i], lst2[i]) for i in range(len(lst))]",
    "question_id": "18990069-82",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len",
      "python.library.functions#range"
    ],
    "clean_cmd": "[(lst[i], lst2[i])  i  range(len(lst))]",
    "canonical_cmd": "[(VAR_STR[i], VAR_STR[i]) for i in range(len(VAR_STR))]"
  },
  {
    "nl": "python, format string \"{} %s {}\" to have 'foo' and 'bar' in the first and second positions",
    "cmd": "\"\"\"{} %s {}\"\"\".format('foo', 'bar')",
    "question_id": "4928526-31",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#format"
    ],
    "clean_cmd": ".format(, )",
    "canonical_cmd": "\"\"\"VAR_STR\"\"\".format('VAR_STR', 'VAR_STR')"
  },
  {
    "nl": "replace string ' and ' in string `stuff` with character '/'",
    "cmd": "stuff.replace(' and ', '/')",
    "question_id": "10037742-54",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.replace"
    ],
    "clean_cmd": "stuff.replace(, )",
    "canonical_cmd": "VAR_STR.replace(' and ', 'VAR_STR')"
  },
  {
    "nl": "Execute SQL statement `sql` with values of dictionary `myDict` as parameters",
    "cmd": "cursor.execute(sql, list(myDict.values()))",
    "question_id": "9336270-77",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list",
      "python.library.sqlite3#sqlite3.Cursor.execute",
      "python.library.stdtypes#dict.values"
    ],
    "clean_cmd": "cursor.execute(sql, list(myDict.values()))",
    "canonical_cmd": "cursor.execute(VAR_STR, list(VAR_STR.values()))"
  },
  {
    "nl": "set the size of figure `fig` in inches to width height of `w`, `h`",
    "cmd": "fig.set_size_inches(w, h, forward=True)",
    "question_id": "15882395-40",
    "cmd_name": "conala",
    "oracle_man": [
      "matplotlib.figure_api#matplotlib.figure.Figure.set_size_inches"
    ],
    "clean_cmd": "fig.set_size_inches(w, h, forward=True)",
    "canonical_cmd": "VAR_STR.set_size_inches(VAR_STR, VAR_STR, forward=True)"
  },
  {
    "nl": "get value of first index of each element in list `a`",
    "cmd": "[x[0] for x in a]",
    "question_id": "30062429-44",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[x[]  x  a]",
    "canonical_cmd": "[x[0] for x in VAR_STR]"
  },
  {
    "nl": "python how to get every first element in 2 dimensional list `a`",
    "cmd": "[i[0] for i in a]",
    "question_id": "30062429-17",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[i[]  i  a]",
    "canonical_cmd": "[i[0] for i in VAR_STR]"
  },
  {
    "nl": "read file `fname` line by line into a list `content`",
    "cmd": "with open(fname) as f:\n    content = f.readlines()",
    "question_id": "3277503-53",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.request#open",
      "python.library.io#io.IOBase.readlines"
    ],
    "clean_cmd": " open(fname)  f:content = f.readlines()",
    "canonical_cmd": "with open(VAR_STR) as f:\n    VAR_STR = f.readlines()"
  },
  {
    "nl": "read file 'filename' line by line into a list `lines`",
    "cmd": "with open('filename') as f:\n    lines = f.readlines()",
    "question_id": "3277503-95",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.request#open",
      "python.library.io#io.IOBase.readlines"
    ],
    "clean_cmd": " open()  f:lines = f.readlines()",
    "canonical_cmd": "with open('VAR_STR') as f:\n    VAR_STR = f.readlines()"
  },
  {
    "nl": "read file 'filename' line by line into a list `lines`",
    "cmd": "lines = [line.rstrip('\\n') for line in open('filename')]",
    "question_id": "3277503-68",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.request#open",
      "python.library.stdtypes#str.rstrip"
    ],
    "clean_cmd": "lines = [line.rstrip()  line  open()]",
    "canonical_cmd": "VAR_STR = [line.rstrip('\\n') for line in open('VAR_STR')]"
  },
  {
    "nl": "read file \"file.txt\" line by line into a list `array`",
    "cmd": "with open('file.txt', 'r') as ins:\n    array = []\n    for line in ins:\n        array.append(line)",
    "question_id": "3277503-73",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.request#open",
      "python.library.array#array.array.append"
    ],
    "clean_cmd": " open(, )  ins:array = [] line  ins:array.append(line)",
    "canonical_cmd": "with open('VAR_STR', 'r') as ins:\n    VAR_STR = []\n    for line in ins:\n        VAR_STR.append(line)"
  },
  {
    "nl": "Remove anything in parenthesis from string `item` with a regex",
    "cmd": "item = re.sub(' ?\\\\([^)]+\\\\)', '', item)",
    "question_id": "19794051-87",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "item = re.sub(, , item)",
    "canonical_cmd": "VAR_STR = re.sub(' ?\\\\([^)]+\\\\)', '', VAR_STR)"
  },
  {
    "nl": "Remove word characters in parenthesis from string `item` with a regex",
    "cmd": "item = re.sub(' ?\\\\(\\\\w+\\\\)', '', item)",
    "question_id": "19794051-95",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "item = re.sub(, , item)",
    "canonical_cmd": "VAR_STR = re.sub(' ?\\\\(\\\\w+\\\\)', '', VAR_STR)"
  },
  {
    "nl": "Remove all data inside parenthesis in string `item`",
    "cmd": "item = re.sub(' \\\\(\\\\w+\\\\)', '', item)",
    "question_id": "19794051-37",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "item = re.sub(, , item)",
    "canonical_cmd": "VAR_STR = re.sub(' \\\\(\\\\w+\\\\)', '', VAR_STR)"
  },
  {
    "nl": "retrieve the path from a Flask request",
    "cmd": "request.url",
    "question_id": "15974730-10",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "request.url",
    "canonical_cmd": "request.url"
  },
  {
    "nl": "delete all values in a list `mylist`",
    "cmd": "del mylist[:]",
    "question_id": "14465279-6",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": " mylist[:]",
    "canonical_cmd": "del VAR_STR[:]"
  },
  {
    "nl": "calculate ratio of sparsity in a numpy array `a`",
    "cmd": "np.isnan(a).sum() / np.prod(a.shape)",
    "question_id": "38708621-19",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.isnan",
      "numpy.reference.generated.numpy.prod",
      "python.library.functions#sum"
    ],
    "clean_cmd": "np.isnan(a).sum() / np.prod(a.shape)",
    "canonical_cmd": "np.isnan(VAR_STR).sum() / np.prod(VAR_STR.shape)"
  },
  {
    "nl": "get digits in string `my_string`",
    "cmd": "\"\"\"\"\"\".join(c for c in my_string if c.isdigit())",
    "question_id": "12005558-73",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.isdigit",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": ".join(c  c  my_string  c.isdigit())",
    "canonical_cmd": "\"\"\"\"\"\".join(c for c in VAR_STR if c.isdigit())"
  },
  {
    "nl": "get all characters between two `$` characters in string `string`",
    "cmd": "re.findall('\\\\$([^$]*)\\\\$', string)",
    "question_id": "15043326-39",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "re.findall(, string)",
    "canonical_cmd": "re.findall('\\\\$([^$]*)\\\\$', VAR_STR)"
  },
  {
    "nl": "getting the string between 2 '$' characters in '$sin (x)$ is an function of x'",
    "cmd": "re.findall('\\\\$(.*?)\\\\$', '$sin (x)$ is an function of x')",
    "question_id": "15043326-41",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "re.findall(, )",
    "canonical_cmd": "re.findall('\\\\$(.*?)\\\\$', 'VAR_STR')"
  },
  {
    "nl": "get list of string elements in string `data` delimited by commas, putting `0` in place of empty strings",
    "cmd": "[(int(x) if x else 0) for x in data.split(',')]",
    "question_id": "2606976-16",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#int",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "[(int(x)  x  )  x  data.split()]",
    "canonical_cmd": "[(int(x) if x else 0) for x in VAR_STR.split(',')]"
  },
  {
    "nl": "split string `s` into a list of strings based on ',' then replace empty strings with zero",
    "cmd": "\"\"\",\"\"\".join(x or '0' for x in s.split(','))",
    "question_id": "2606976-27",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.join",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": ".join(x    x  s.split())",
    "canonical_cmd": "\"\"\"VAR_STR\"\"\".join(x or '0' for x in VAR_STR.split('VAR_STR'))"
  },
  {
    "nl": "get a list of the keys in each dictionary in a dictionary of dictionaries `foo`",
    "cmd": "[k for d in list(foo.values()) for k in d]",
    "question_id": "41251391-88",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list",
      "python.library.stdtypes#dict.values"
    ],
    "clean_cmd": "[k  d  list(foo.values())  k  d]",
    "canonical_cmd": "[k for d in list(VAR_STR.values()) for k in d]"
  },
  {
    "nl": "count `True` values associated with key 'one' in dictionary `tadas`",
    "cmd": "sum(item['one'] for item in list(tadas.values()))",
    "question_id": "35561743-31",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sum",
      "python.library.functions#list",
      "python.library.stdtypes#dict.values"
    ],
    "clean_cmd": "sum(item[]  item  list(tadas.values()))",
    "canonical_cmd": "sum(item['VAR_STR'] for item in list(VAR_STR.values()))"
  },
  {
    "nl": "remove all duplicate items from a list `lseperatedOrblist`",
    "cmd": "woduplicates = list(set(lseperatedOrblist))",
    "question_id": "6764909-58",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list",
      "python.library.stdtypes#set"
    ],
    "clean_cmd": "woduplicates = list(set(lseperatedOrblist))",
    "canonical_cmd": "woduplicates = list(set(VAR_STR))"
  },
  {
    "nl": "prompt string 'Press Enter to continue...' to the console",
    "cmd": "input('Press Enter to continue...')",
    "question_id": "983354-40",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#input"
    ],
    "clean_cmd": "input()",
    "canonical_cmd": "input('VAR_STR')"
  },
  {
    "nl": "customize the time format in python logging",
    "cmd": "formatter = logging.Formatter('%(asctime)s;%(levelname)s;%(message)s')",
    "question_id": "3220284-47",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "formatter = logging.Formatter()",
    "canonical_cmd": "formatter = logging.Formatter('%(asctime)s;%(levelname)s;%(message)s')"
  },
  {
    "nl": "set the default encoding to 'utf-8'",
    "cmd": "sys.setdefaultencoding('utf8')",
    "question_id": "21129020-99",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "sys.setdefaultencoding()",
    "canonical_cmd": "sys.setdefaultencoding('utf8')"
  },
  {
    "nl": "reset index of dataframe `df`so that existing index values are transferred into `df`as columns",
    "cmd": "df.reset_index(inplace=True)",
    "question_id": "20110170-28",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.reset_index"
    ],
    "clean_cmd": "df.reset_index(inplace=True)",
    "canonical_cmd": "VAR_STR.reset_index(inplace=True)"
  },
  {
    "nl": "Get all the keys from dictionary `y` whose value is `1`",
    "cmd": "[i for i in y if y[i] == 1]",
    "question_id": "1920145-91",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[i  i  y  y[i] == ]",
    "canonical_cmd": "[i for i in VAR_STR if VAR_STR[i] == 1]"
  },
  {
    "nl": "Sort list `li` in descending order based on the second element of each list inside list`li`",
    "cmd": "sorted(li, key=operator.itemgetter(1), reverse=True)",
    "question_id": "18142090-6",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.operator#operator.itemgetter",
      "python.library.functions#sorted"
    ],
    "clean_cmd": "sorted(li, key=operator.itemgetter(), reverse=True)",
    "canonical_cmd": "sorted(VAR_STR, key=operator.itemgetter(1), reverse=True)"
  },
  {
    "nl": "remove false entries from a dictionary `hand`",
    "cmd": "{k: v for k, v in list(hand.items()) if v}",
    "question_id": "15158599-50",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "{k: v  k, v  list(hand.items())  v}",
    "canonical_cmd": "{k: v for k, v in list(VAR_STR.items()) if v}"
  },
  {
    "nl": "Get a dictionary from a dictionary `hand` where the values are present",
    "cmd": "dict((k, v) for k, v in hand.items() if v)",
    "question_id": "15158599-74",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "dict((k, v)  k, v  hand.items()  v)",
    "canonical_cmd": "dict((k, v) for k, v in VAR_STR.items() if v)"
  },
  {
    "nl": "sort a nested list by the inverse of element 2, then by element 1",
    "cmd": "sorted(l, key=lambda x: (-int(x[1]), x[0]))",
    "question_id": "34705205-97",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.functions#int"
    ],
    "clean_cmd": "sorted(l, key= x: (-int(x[]), x[]))",
    "canonical_cmd": "sorted(l, key=lambda x: (-int(x[1]), x[0]))"
  },
  {
    "nl": "count the number of words in a string `s`",
    "cmd": "len(s.split())",
    "question_id": "19410018-13",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "len(s.split())",
    "canonical_cmd": "len(VAR_STR.split())"
  },
  {
    "nl": "read line by line from stdin",
    "cmd": "for line in fileinput.input():\n    pass",
    "question_id": "1450393-15",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.fileinput#fileinput.input"
    ],
    "clean_cmd": " line  fileinput.input():",
    "canonical_cmd": "for line in fileinput.input():\n    pass"
  },
  {
    "nl": "read line by line from stdin",
    "cmd": "for line in sys.stdin:\n    pass",
    "question_id": "1450393-89",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": " line  sys.stdin:",
    "canonical_cmd": "for line in sys.stdin:\n    pass"
  },
  {
    "nl": "convert ascii value 'a' to int",
    "cmd": "ord('a')",
    "question_id": "3673428-56",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#ord"
    ],
    "clean_cmd": "ord()",
    "canonical_cmd": "ord('VAR_STR')"
  },
  {
    "nl": "get name of primary field `name` of django model `CustomPK`",
    "cmd": "CustomPK._meta.pk.name",
    "question_id": "13418405-19",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "CustomPK._meta.pk.name",
    "canonical_cmd": "VAR_STR._meta.pk.VAR_STR"
  },
  {
    "nl": "sort a pandas data frame according to column `Peak` in ascending and `Weeks` in descending order",
    "cmd": "df.sort_values(['Peak', 'Weeks'], ascending=[True, False], inplace=True)",
    "question_id": "13636592-2",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.sort_values"
    ],
    "clean_cmd": "df.sort_values([, ], ascending=[True, False], inplace=True)",
    "canonical_cmd": "df.sort_values(['VAR_STR', 'VAR_STR'], ascending=[True, False], inplace=True)"
  },
  {
    "nl": "sort a pandas data frame by column `Peak` in ascending and `Weeks` in descending order",
    "cmd": "df.sort(['Peak', 'Weeks'], ascending=[True, False], inplace=True)",
    "question_id": "13636592-66",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.index.sort"
    ],
    "clean_cmd": "df.sort([, ], ascending=[True, False], inplace=True)",
    "canonical_cmd": "df.sort(['VAR_STR', 'VAR_STR'], ascending=[True, False], inplace=True)"
  },
  {
    "nl": "setup a smtp mail server to `smtp.gmail.com` with port `587`",
    "cmd": "server = smtplib.SMTP('smtp.gmail.com', 587)",
    "question_id": "12030179-56",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.smtplib#smtplib.SMTP"
    ],
    "clean_cmd": "server = smtplib.SMTP(, )",
    "canonical_cmd": "server = smtplib.SMTP('VAR_STR', 587)"
  },
  {
    "nl": "execute external commands/script `your_own_script` with csh instead of bash",
    "cmd": "os.system('tcsh your_own_script')",
    "question_id": "533398-65",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.system"
    ],
    "clean_cmd": "os.system()",
    "canonical_cmd": "os.system('tcsh your_own_script')"
  },
  {
    "nl": "execute command 'echo $0' in Z shell",
    "cmd": "os.system(\"zsh -c 'echo $0'\")",
    "question_id": "533398-90",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.system"
    ],
    "clean_cmd": "os.system()",
    "canonical_cmd": "os.system(\"zsh -c 'echo $0'\")"
  },
  {
    "nl": "sum of product of combinations in a list `l`",
    "cmd": "sum([(i * j) for i, j in list(itertools.combinations(l, 2))])",
    "question_id": "34437284-35",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.itertools#itertools.combinations",
      "python.library.functions#sum",
      "python.library.functions#list"
    ],
    "clean_cmd": "sum([(i * j)  i, j  list(itertools.combinations(l, ))])",
    "canonical_cmd": "sum([(i * j) for i, j in list(itertools.combinations(VAR_STR, 2))])"
  },
  {
    "nl": "Truncate `\\r\\n` from each string in a list of string `example`",
    "cmd": "example = [x.replace('\\r\\n', '') for x in example]",
    "question_id": "13781828-57",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.replace"
    ],
    "clean_cmd": "example = [x.replace(, )  x  example]",
    "canonical_cmd": "VAR_STR = [x.replace('VAR_STR', '') for x in VAR_STR]"
  },
  {
    "nl": "right align string `mystring` with a width of 7",
    "cmd": "\"\"\"{:>7s}\"\"\".format(mystring)",
    "question_id": "16159228-35",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#format"
    ],
    "clean_cmd": ".format(mystring)",
    "canonical_cmd": "\"\"\"{:>7s}\"\"\".format(VAR_STR)"
  },
  {
    "nl": "creating a numpy array of 3d coordinates from three 1d arrays `x_p`, `y_p` and `z_p`",
    "cmd": "np.vstack(np.meshgrid(x_p, y_p, z_p)).reshape(3, -1).T",
    "question_id": "18253210-93",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.meshgrid",
      "numpy.reference.generated.numpy.vstack",
      "numpy.reference.generated.numpy.reshape"
    ],
    "clean_cmd": "np.vstack(np.meshgrid(x_p, y_p, z_p)).reshape(, -).T",
    "canonical_cmd": "np.vstack(np.meshgrid(VAR_STR, VAR_STR, VAR_STR)).reshape(3, -1).T"
  },
  {
    "nl": "move last item of array `a` to the first position",
    "cmd": "a[-1:] + a[:-1]",
    "question_id": "12845112-46",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "a[-:] + a[:-]",
    "canonical_cmd": "VAR_STR[-1:] + VAR_STR[:-1]"
  },
  {
    "nl": "split string `Word to Split` into a list of characters",
    "cmd": "list('Word to Split')",
    "question_id": "113655-47",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list"
    ],
    "clean_cmd": "list()",
    "canonical_cmd": "list('VAR_STR')"
  },
  {
    "nl": "Sum of sums of each list, in a list of lists named 'lists'.",
    "cmd": "sum(sum(x) for x in lists)",
    "question_id": "15465204-13",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sum"
    ],
    "clean_cmd": "sum(sum(x)  x  lists)",
    "canonical_cmd": "sum(sum(x) for x in VAR_STR)"
  },
  {
    "nl": "Set a window size to `1400, 1000` using selenium webdriver",
    "cmd": "driver.set_window_size(1400, 1000)",
    "question_id": "21899953-68",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "driver.set_window_size(, )",
    "canonical_cmd": "driver.set_window_size(1400, 1000)"
  },
  {
    "nl": "change the case of the first letter in string `s`",
    "cmd": "return s[0].upper() + s[1:]",
    "question_id": "4223923-88",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.upper"
    ],
    "clean_cmd": " s[].upper() + s[:]",
    "canonical_cmd": "return VAR_STR[0].upper() + VAR_STR[1:]"
  },
  {
    "nl": "get the sum of the products of each pair of corresponding elements in lists `a` and `b`",
    "cmd": "sum(x * y for x, y in zip(a, b))",
    "question_id": "41821112-71",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip",
      "python.library.functions#sum"
    ],
    "clean_cmd": "sum(x * y  x, y  zip(a, b))",
    "canonical_cmd": "sum(x * y for x, y in zip(VAR_STR, VAR_STR))"
  },
  {
    "nl": "sum the products of each two elements at the same index of list `a` and list `b`",
    "cmd": "list(x * y for x, y in list(zip(a, b)))",
    "question_id": "41821112-50",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list",
      "python.library.functions#zip"
    ],
    "clean_cmd": "list(x * y  x, y  list(zip(a, b)))",
    "canonical_cmd": "list(x * y for x, y in list(zip(VAR_STR, VAR_STR)))"
  },
  {
    "nl": "sum the product of each two items at the same index of list `a` and list `b`",
    "cmd": "sum(i * j for i, j in zip(a, b))",
    "question_id": "41821112-48",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip",
      "python.library.functions#sum"
    ],
    "clean_cmd": "sum(i * j  i, j  zip(a, b))",
    "canonical_cmd": "sum(i * j for i, j in zip(VAR_STR, VAR_STR))"
  },
  {
    "nl": "sum the product of elements of two lists named `a` and `b`",
    "cmd": "sum(x * y for x, y in list(zip(a, b)))",
    "question_id": "41821112-6",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip",
      "python.library.functions#sum",
      "python.library.functions#list"
    ],
    "clean_cmd": "sum(x * y  x, y  list(zip(a, b)))",
    "canonical_cmd": "sum(x * y for x, y in list(zip(VAR_STR, VAR_STR)))"
  },
  {
    "nl": "Split string `Hello` into a string of letters seperated by `,`",
    "cmd": "\"\"\",\"\"\".join('Hello')",
    "question_id": "14737222-31",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": ".join()",
    "canonical_cmd": "\"\"\"VAR_STR\"\"\".join('VAR_STR')"
  },
  {
    "nl": "Matplotlib change marker size to 500",
    "cmd": "scatter(x, y, s=500, color='green', marker='h')",
    "question_id": "19948732-25",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.plot.scatter"
    ],
    "clean_cmd": "scatter(x, y, s=, color=, marker=)",
    "canonical_cmd": "scatter(x, y, s=500, color='green', marker='h')"
  },
  {
    "nl": "change the state of the Tkinter `Text` widget to read only i.e. `disabled`",
    "cmd": "text.config(state=DISABLED)",
    "question_id": "3842155-75",
    "cmd_name": "conala",
    "oracle_man": [
      "flask.api.index#flask.Config"
    ],
    "clean_cmd": "text.config(state=DISABLED)",
    "canonical_cmd": "text.config(state=DISABLED)"
  },
  {
    "nl": "clear session key 'mykey'",
    "cmd": "del request.session['mykey']",
    "question_id": "2375335-29",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": " request.session[]",
    "canonical_cmd": "del request.session['VAR_STR']"
  },
  {
    "nl": "display attribute `attr` for each object `obj` in list `my_list_of_objs`",
    "cmd": "print([obj.attr for obj in my_list_of_objs])",
    "question_id": "444058-10",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "print([obj.attr  obj  my_list_of_objs])",
    "canonical_cmd": "print([VAR_STR.VAR_STR for VAR_STR in VAR_STR])"
  },
  {
    "nl": "lookup dictionary key `key1` in Django template `json`",
    "cmd": "{{json.key1}}",
    "question_id": "19745091-95",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "{{json.key1}}",
    "canonical_cmd": "{{VAR_STR.VAR_STR}}"
  },
  {
    "nl": "Sum elements of tuple `b` to their respective elements of each tuple in list `a`",
    "cmd": "c = [[(i + j) for i, j in zip(e, b)] for e in a]",
    "question_id": "40313203-39",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip"
    ],
    "clean_cmd": "c = [[(i + j)  i, j  zip(e, b)]  e  a]",
    "canonical_cmd": "c = [[(i + j) for i, j in zip(e, VAR_STR)] for e in VAR_STR]"
  },
  {
    "nl": "replace the last occurence of an expression '</div>' with '</bad>' in a string `s`",
    "cmd": "re.sub('(.*)</div>', '\\\\1</bad>', s)",
    "question_id": "2556108-6",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "re.sub(, , s)",
    "canonical_cmd": "re.sub('(.*)</div>', '\\\\1</bad>', VAR_STR)"
  },
  {
    "nl": "delete column 'column_name' from dataframe `df`",
    "cmd": "df = df.drop('column_name', 1)",
    "question_id": "13411544-24",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.drop"
    ],
    "clean_cmd": "df = df.drop(, )",
    "canonical_cmd": "VAR_STR = VAR_STR.drop('VAR_STR', 1)"
  },
  {
    "nl": "delete 1st, 2nd and 4th columns from dataframe `df`",
    "cmd": "df.drop(df.columns[[0, 1, 3]], axis=1)",
    "question_id": "13411544-67",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.drop"
    ],
    "clean_cmd": "df.drop(df.columns[[, , ]], axis=)",
    "canonical_cmd": "VAR_STR.drop(VAR_STR.columns[[0, 1, 3]], axis=1)"
  },
  {
    "nl": "delete a column `column_name` without having to reassign from pandas data frame `df`",
    "cmd": "df.drop('column_name', axis=1, inplace=True)",
    "question_id": "13411544-59",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.drop"
    ],
    "clean_cmd": "df.drop(, axis=, inplace=True)",
    "canonical_cmd": "VAR_STR.drop('VAR_STR', axis=1, inplace=True)"
  },
  {
    "nl": "Sort a data `a` in descending order based on the `modified` attribute of elements using lambda function",
    "cmd": "a = sorted(a, key=lambda x: x.modified, reverse=True)",
    "question_id": "3766633-89",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted"
    ],
    "clean_cmd": "a = sorted(a, key= x: x.modified, reverse=True)",
    "canonical_cmd": "VAR_STR = sorted(VAR_STR, key=lambda x: x.VAR_STR, reverse=True)"
  },
  {
    "nl": "create a dictionary `list_dict` containing each tuple in list `tuple_list` as values and the tuple's first element as the corresponding key",
    "cmd": "list_dict = {t[0]: t for t in tuple_list}",
    "question_id": "20059427-99",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "list_dict = {t[]: t  t  tuple_list}",
    "canonical_cmd": "VAR_STR = {t[0]: t for t in VAR_STR}"
  },
  {
    "nl": "print list `t` into a table-like shape",
    "cmd": "print('\\n'.join('  '.join(map(str, row)) for row in t))",
    "question_id": "16677816-22",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#map",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": "print(.join(.join(map(str, row))  row  t))",
    "canonical_cmd": "print('\\n'.join('  '.join(map(str, row)) for row in VAR_STR))"
  },
  {
    "nl": "factorize all string values in dataframe `s` into floats",
    "cmd": "(s.factorize()[0] + 1).astype('float')",
    "question_id": "42458734-13",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.factorize",
      "pandas.reference.api.pandas.dataframe.astype"
    ],
    "clean_cmd": "(s.factorize()[] + ).astype()",
    "canonical_cmd": "(VAR_STR.factorize()[0] + 1).astype('float')"
  },
  {
    "nl": "find out the number of non-matched elements at the same index of list `a` and list `b`",
    "cmd": "sum(1 for i, j in zip(a, b) if i != j)",
    "question_id": "14914615-30",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip",
      "python.library.functions#sum"
    ],
    "clean_cmd": "sum(  i, j  zip(a, b)  i != j)",
    "canonical_cmd": "sum(1 for i, j in zip(VAR_STR, VAR_STR) if i != j)"
  },
  {
    "nl": "Get all the second values from a list of lists `A`",
    "cmd": "[row[1] for row in A]",
    "question_id": "903853-42",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[row[]  row  A]",
    "canonical_cmd": "[row[1] for row in VAR_STR]"
  },
  {
    "nl": "extract first column from a multi-dimensional array `a`",
    "cmd": "[row[0] for row in a]",
    "question_id": "903853-19",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[row[]  row  a]",
    "canonical_cmd": "[row[0] for row in VAR_STR]"
  },
  {
    "nl": "print a celsius symbol on x axis of a plot `ax`",
    "cmd": "ax.set_xlabel('Temperature (\\u2103)')",
    "question_id": "8440117-70",
    "cmd_name": "conala",
    "oracle_man": [
      "matplotlib._as_gen.matplotlib.axes.axes.set_xlabel"
    ],
    "clean_cmd": "ax.set_xlabel()",
    "canonical_cmd": "VAR_STR.set_xlabel('Temperature (\u2103)')"
  },
  {
    "nl": "Print a celsius symbol with matplotlib",
    "cmd": "ax.set_xlabel('Temperature ($^\\\\circ$C)')",
    "question_id": "8440117-34",
    "cmd_name": "conala",
    "oracle_man": [
      "matplotlib._as_gen.matplotlib.axes.axes.set_xlabel"
    ],
    "clean_cmd": "ax.set_xlabel()",
    "canonical_cmd": "ax.set_xlabel('Temperature ($^\\\\circ$C)')"
  },
  {
    "nl": "Convert list of dictionaries `L` into a flat dictionary",
    "cmd": "dict(pair for d in L for pair in list(d.items()))",
    "question_id": "3494906-84",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict",
      "python.library.functions#list",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "dict(pair  d  L  pair  list(d.items()))",
    "canonical_cmd": "dict(pair for d in VAR_STR for pair in list(d.items()))"
  },
  {
    "nl": "merge a list of dictionaries in list `L` into a single dict",
    "cmd": "{k: v for d in L for k, v in list(d.items())}",
    "question_id": "3494906-48",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "{k: v  d  L  k, v  list(d.items())}",
    "canonical_cmd": "{k: v for d in VAR_STR for k, v in list(d.items())}"
  },
  {
    "nl": "compile Visual Studio project `project.sln` from the command line through python",
    "cmd": "os.system('msbuild project.sln /p:Configuration=Debug')",
    "question_id": "498106-72",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.system"
    ],
    "clean_cmd": "os.system()",
    "canonical_cmd": "os.system('msbuild project.sln /p:Configuration=Debug')"
  },
  {
    "nl": "read lines from a csv file `./urls-eu.csv` into a list of lists `arr`",
    "cmd": "arr = [line.split(',') for line in open('./urls-eu.csv')]",
    "question_id": "1532810-2",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.request#open",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "arr = [line.split()  line  open()]",
    "canonical_cmd": "VAR_STR = [line.split(',') for line in open('VAR_STR')]"
  },
  {
    "nl": "replace occurrences of two whitespaces or more with one whitespace ' ' in string `s`",
    "cmd": "re.sub(' +', ' ', s)",
    "question_id": "943809-13",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "re.sub(, , s)",
    "canonical_cmd": "re.sub(' +', ' ', VAR_STR)"
  },
  {
    "nl": "count the number of rows with missing values in a pandas dataframe `df`",
    "cmd": "sum(df.apply(lambda x: sum(x.isnull().values), axis=1) > 0)",
    "question_id": "28199524-81",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sum",
      "pandas.reference.api.pandas.dataframe.apply",
      "pandas.reference.api.pandas.dataframe.isnull"
    ],
    "clean_cmd": "sum(df.apply( x: sum(x.isnull().values), axis=) &gt; )",
    "canonical_cmd": "sum(VAR_STR.apply(lambda x: sum(x.isnull().values), axis=1) > 0)"
  },
  {
    "nl": "Clicking a link using selenium using python",
    "cmd": "driver.find_element_by_xpath('xpath').click()",
    "question_id": "17498027-0",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "driver.find_element_by_xpath().click()",
    "canonical_cmd": "driver.find_element_by_xpath('xpath').click()"
  },
  {
    "nl": "convert a string literal `s` with values `\\\\` to raw string literal",
    "cmd": "s = s.replace('\\\\', '\\\\\\\\')",
    "question_id": "7262828-60",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.replace"
    ],
    "clean_cmd": "s = s.replace(, )",
    "canonical_cmd": "VAR_STR = VAR_STR.replace('VAR_STR', '\\\\\\\\')"
  },
  {
    "nl": "get multiple integer values from a string 'string1'",
    "cmd": "map(int, re.findall('\\\\d+', string1))",
    "question_id": "11339210-72",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall",
      "python.library.functions#map"
    ],
    "clean_cmd": "map(int, re.findall(, string1))",
    "canonical_cmd": "map(int, re.findall('\\\\d+', VAR_STR))"
  },
  {
    "nl": "sort list `l` based on its elements' digits",
    "cmd": "sorted(l, key=lambda x: int(re.search('\\\\d+', x).group(0)))",
    "question_id": "39129846-99",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.search",
      "python.library.functions#sorted",
      "python.library.functions#int",
      "python.library.re#re.Match.group"
    ],
    "clean_cmd": "sorted(l, key= x: int(re.search(, x).group()))",
    "canonical_cmd": "sorted(VAR_STR, key=lambda x: int(re.search('\\\\d+', x).group(0)))"
  },
  {
    "nl": "get first element of each tuple in list `A`",
    "cmd": "[tup[0] for tup in A]",
    "question_id": "31302904-21",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[tup[]  tup  A]",
    "canonical_cmd": "[tup[0] for tup in VAR_STR]"
  },
  {
    "nl": "Write a comment `# Data for Class A\\n` to a file object `f`",
    "cmd": "f.write('# Data for Class A\\n')",
    "question_id": "30994370-39",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.write"
    ],
    "clean_cmd": "f.write()",
    "canonical_cmd": "VAR_STR.write('VAR_STR')"
  },
  {
    "nl": "Find the greatest number in set `(1, 2, 3)`",
    "cmd": "print(max(1, 2, 3))",
    "question_id": "3090175-87",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#max"
    ],
    "clean_cmd": "print(max(, , ))",
    "canonical_cmd": "print(max(VAR_STR))"
  },
  {
    "nl": "split string 'Words, words, words.' on punctuation",
    "cmd": "re.split('\\\\W+', 'Words, words, words.')",
    "question_id": "19894478-99",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.split"
    ],
    "clean_cmd": "re.split(, )",
    "canonical_cmd": "re.split('\\\\W+', 'VAR_STR')"
  },
  {
    "nl": "flatten a tuple `l`",
    "cmd": "[(a, b, c) for a, (b, c) in l]",
    "question_id": "18500541-37",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[(a, b, c)  a, (b, c)  l]",
    "canonical_cmd": "[(a, b, c) for a, (b, c) in VAR_STR]"
  },
  {
    "nl": "replace value '-' in any column of pandas dataframe to \"NaN\"",
    "cmd": "df.replace('-', 'NaN')",
    "question_id": "42172204-74",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.replace"
    ],
    "clean_cmd": "df.replace(, )",
    "canonical_cmd": "df.replace('VAR_STR', 'VAR_STR')"
  },
  {
    "nl": "Generate a random integer between 0 and 9",
    "cmd": "randint(0, 9)",
    "question_id": "3996904-78",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.random#random.randint"
    ],
    "clean_cmd": "randint(, )",
    "canonical_cmd": "randint(0, 9)"
  },
  {
    "nl": "Generate a random integer between `a` and `b`",
    "cmd": "random.randint(a, b)",
    "question_id": "3996904-29",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.random#random.randint"
    ],
    "clean_cmd": "random.randint(a, b)",
    "canonical_cmd": "random.randint(VAR_STR, VAR_STR)"
  },
  {
    "nl": "Generate random integers between 0 and 9",
    "cmd": "print((random.randint(0, 9)))",
    "question_id": "3996904-34",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.random#random.randint"
    ],
    "clean_cmd": "print((random.randint(, )))",
    "canonical_cmd": "print(random.randint(0, 9))"
  },
  {
    "nl": "get an element at index `[1,1]`in a numpy array `arr`",
    "cmd": "print(arr[1, 1])",
    "question_id": "3582601-40",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "print(arr[, ])",
    "canonical_cmd": "print(VAR_STR[1, 1])"
  },
  {
    "nl": "call `doSomething()` in a try-except without handling the exception",
    "cmd": "try:\n    doSomething()\nexcept:\n    pass",
    "question_id": "730764-0",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": ":doSomething():",
    "canonical_cmd": "try:\n    doSomething()\nexcept:\n    pass"
  },
  {
    "nl": "call `doSomething()` in a try-except without handling the exception",
    "cmd": "try:\n    doSomething()\nexcept Exception:\n    pass",
    "question_id": "730764-10",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": ":doSomething() Exception:",
    "canonical_cmd": "try:\n    doSomething()\nexcept Exception:\n    pass"
  },
  {
    "nl": "Create a list containing words that contain vowel letter followed by the same vowel in file 'file.text'",
    "cmd": "[w for w in open('file.txt') if not re.search('[aeiou]{2}', w)]",
    "question_id": "38862349-53",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.search",
      "python.library.urllib.request#open"
    ],
    "clean_cmd": "[w  w  open()   re.search(, w)]",
    "canonical_cmd": "[w for w in open('file.txt') if not re.search('[aeiou]{2}', w)]"
  },
  {
    "nl": "print a rational number `3/2`",
    "cmd": "print('\\n\\x1b[4m' + '3' + '\\x1b[0m' + '\\n2')",
    "question_id": "39381222-81",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "print( +  +  + )",
    "canonical_cmd": "print('\\n\\x1b[4m' + '3' + '\\x1b[0m' + '\\n2')"
  },
  {
    "nl": "append 4 to list `foo`",
    "cmd": "foo.append(4)",
    "question_id": "4641765-34",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.append"
    ],
    "clean_cmd": "foo.append()",
    "canonical_cmd": "VAR_STR.append(4)"
  },
  {
    "nl": "append a list [8, 7] to list `foo`",
    "cmd": "foo.append([8, 7])",
    "question_id": "4641765-55",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.append"
    ],
    "clean_cmd": "foo.append([, ])",
    "canonical_cmd": "VAR_STR.append([8, 7])"
  },
  {
    "nl": "insert 77 to index 2 of list `x`",
    "cmd": "x.insert(2, 77)",
    "question_id": "4641765-97",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.insert"
    ],
    "clean_cmd": "x.insert(, )",
    "canonical_cmd": "VAR_STR.insert(2, 77)"
  },
  {
    "nl": "apply a list of functions named 'functions' over a list of values named 'values'",
    "cmd": "[x(y) for x, y in zip(functions, values)]",
    "question_id": "4231345-36",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip"
    ],
    "clean_cmd": "[x(y)  x, y  zip(functions, values)]",
    "canonical_cmd": "[x(y) for x, y in zip(VAR_STR, VAR_STR)]"
  },
  {
    "nl": "Convert array `x` into a correlation matrix",
    "cmd": "np.corrcoef(x)",
    "question_id": "18432823-76",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.corrcoef"
    ],
    "clean_cmd": "np.corrcoef(x)",
    "canonical_cmd": "np.corrcoef(VAR_STR)"
  },
  {
    "nl": "reading tab-delimited csv file `filename` with pandas on mac",
    "cmd": "pandas.read_csv(filename, sep='\\t', lineterminator='\\r')",
    "question_id": "27896214-81",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.read_csv"
    ],
    "clean_cmd": "pandas.read_csv(filename, sep=, lineterminator=)",
    "canonical_cmd": "pandas.read_csv(VAR_STR, sep='\\t', lineterminator='\\r')"
  },
  {
    "nl": "add multiple columns `hour`, `weekday`, `weeknum` to pandas data frame `df` from lambda function `lambdafunc`",
    "cmd": "df[['hour', 'weekday', 'weeknum']] = df.apply(lambdafunc, axis=1)",
    "question_id": "30026815-9",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.apply"
    ],
    "clean_cmd": "df[[, , ]] = df.apply(lambdafunc, axis=)",
    "canonical_cmd": "VAR_STR[['VAR_STR', 'VAR_STR', 'VAR_STR']] = VAR_STR.apply(VAR_STR, axis=1)"
  },
  {
    "nl": "Check if key 'key1' in `dict`",
    "cmd": "('key1' in dict)",
    "question_id": "1602934-33",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "(  dict)",
    "canonical_cmd": "'VAR_STR' in VAR_STR"
  },
  {
    "nl": "Check if key 'a' in `d`",
    "cmd": "('a' in d)",
    "question_id": "1602934-63",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "(  d)",
    "canonical_cmd": "'VAR_STR' in VAR_STR"
  },
  {
    "nl": "Check if key 'c' in `d`",
    "cmd": "('c' in d)",
    "question_id": "1602934-89",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "(  d)",
    "canonical_cmd": "'VAR_STR' in VAR_STR"
  },
  {
    "nl": "Check if a given key 'key1' exists in dictionary `dict`",
    "cmd": "if ('key1' in dict):\n    pass",
    "question_id": "1602934-96",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": " (  dict):",
    "canonical_cmd": "if 'VAR_STR' in VAR_STR:\n    pass"
  },
  {
    "nl": "Check if a given key `key` exists in dictionary `d`",
    "cmd": "if (key in d):\n    pass",
    "question_id": "1602934-21",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": " (key  d):",
    "canonical_cmd": "if VAR_STR in VAR_STR:\n    pass"
  },
  {
    "nl": "remove multiple spaces in a string `foo`",
    "cmd": "\"\"\" \"\"\".join(foo.split())",
    "question_id": "1546226-68",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.join",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": ".join(foo.split())",
    "canonical_cmd": "\"\"\" \"\"\".join(VAR_STR.split())"
  },
  {
    "nl": "convert tuple `tst` to string `tst2`",
    "cmd": "tst2 = str(tst)",
    "question_id": "3886669-57",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str"
    ],
    "clean_cmd": "tst2 = str(tst)",
    "canonical_cmd": "VAR_STR = str(VAR_STR)"
  },
  {
    "nl": "Get the position of a regex match for word `is` in a string `String`",
    "cmd": "re.search('\\\\bis\\\\b', String).start()",
    "question_id": "2674391-16",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.search",
      "python.library.re#re.Match.start"
    ],
    "clean_cmd": "re.search(, String).start()",
    "canonical_cmd": "re.search('\\\\bis\\\\b', VAR_STR).start()"
  },
  {
    "nl": "Get the position of a regex match `is` in a string `String`",
    "cmd": "re.search('is', String).start()",
    "question_id": "2674391-66",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.search",
      "python.library.re#re.Match.start"
    ],
    "clean_cmd": "re.search(, String).start()",
    "canonical_cmd": "re.search('VAR_STR', VAR_STR).start()"
  },
  {
    "nl": "check if 3 is inside list `[1, 2, 3]`",
    "cmd": "3 in [1, 2, 3]",
    "question_id": "9542738-42",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "  [, , ]",
    "canonical_cmd": "3 in [VAR_STR]"
  },
  {
    "nl": "get items from list `a` that don't appear in list `b`",
    "cmd": "[y for y in a if y not in b]",
    "question_id": "15474933-42",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[y  y  a  y   b]",
    "canonical_cmd": "[y for y in VAR_STR if y not in VAR_STR]"
  },
  {
    "nl": "identify duplicated rows in columns 'PplNum' and 'RoomNum' with additional column in dataframe `df`",
    "cmd": "df.groupby(['PplNum', 'RoomNum']).cumcount() + 1",
    "question_id": "37497559-37",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.groupby",
      "pandas.reference.api.pandas.core.groupby.groupby.cumcount"
    ],
    "clean_cmd": "df.groupby([, ]).cumcount() + ",
    "canonical_cmd": "VAR_STR.groupby(['VAR_STR', 'VAR_STR']).cumcount() + 1"
  },
  {
    "nl": "Jinja join elements of array `tags` with space string ' '",
    "cmd": "{{tags | join(' ')}}",
    "question_id": "13002848-88",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": "{{tags | join()}}",
    "canonical_cmd": "{{VAR_STR | join(' ')}}"
  },
  {
    "nl": "django urlsafe base64 decode string `uenc` with decryption",
    "cmd": "base64.urlsafe_b64decode(uenc.encode('ascii'))",
    "question_id": "2229827-41",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.base64#base64.urlsafe_b64decode",
      "python.library.base64#base64.encode"
    ],
    "clean_cmd": "base64.urlsafe_b64decode(uenc.encode())",
    "canonical_cmd": "base64.urlsafe_b64decode(VAR_STR.encode('ascii'))"
  },
  {
    "nl": "copy list `old_list` as `new_list`",
    "cmd": "new_list = old_list[:]",
    "question_id": "2612802-18",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "new_list = old_list[:]",
    "canonical_cmd": "VAR_STR = VAR_STR[:]"
  },
  {
    "nl": "copy list `old_list` as `new_list`",
    "cmd": "new_list = list(old_list)",
    "question_id": "2612802-25",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list"
    ],
    "clean_cmd": "new_list = list(old_list)",
    "canonical_cmd": "VAR_STR = list(VAR_STR)"
  },
  {
    "nl": "copy list `old_list` as `new_list`",
    "cmd": "new_list = copy.copy(old_list)",
    "question_id": "2612802-99",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "new_list = copy.copy(old_list)",
    "canonical_cmd": "VAR_STR = copy.copy(VAR_STR)"
  },
  {
    "nl": "deep copy list `old_list` as `new_list`",
    "cmd": "new_list = copy.deepcopy(old_list)",
    "question_id": "2612802-37",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.copy#copy.deepcopy"
    ],
    "clean_cmd": "new_list = copy.deepcopy(old_list)",
    "canonical_cmd": "VAR_STR = copy.deepcopy(VAR_STR)"
  },
  {
    "nl": "make a copy of list `old_list`",
    "cmd": "[i for i in old_list]",
    "question_id": "2612802-26",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[i  i  old_list]",
    "canonical_cmd": "[i for i in VAR_STR]"
  },
  {
    "nl": "generate unique equal hash for equal dictionaries `a` and `b`",
    "cmd": "hash(pformat(a)) == hash(pformat(b))",
    "question_id": "16735786-56",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#hash",
      "python.library.pprint#pprint.pformat"
    ],
    "clean_cmd": "hash(pformat(a)) == hash(pformat(b))",
    "canonical_cmd": "hash(pformat(VAR_STR)) == hash(pformat(VAR_STR))"
  },
  {
    "nl": "split string `s` by letter 's'",
    "cmd": "s.split('s')",
    "question_id": "13128565-88",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "s.split()",
    "canonical_cmd": "VAR_STR.split('VAR_STR')"
  },
  {
    "nl": "Get a list of strings `split_text` with fixed chunk size `n` from a string `the_list`",
    "cmd": "split_list = [the_list[i:i + n] for i in range(0, len(the_list), n)]",
    "question_id": "6372228-68",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len",
      "python.library.functions#range"
    ],
    "clean_cmd": "split_list = [the_list[i:i + n]  i  range(, len(the_list), n)]",
    "canonical_cmd": "split_list = [VAR_STR[i:i + VAR_STR] for i in range(0, len(VAR_STR), VAR_STR)]"
  },
  {
    "nl": "sum of squares values in a list `l`",
    "cmd": "sum(i * i for i in l)",
    "question_id": "26894227-7",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sum"
    ],
    "clean_cmd": "sum(i * i  i  l)",
    "canonical_cmd": "sum(i * i for i in VAR_STR)"
  },
  {
    "nl": "calculate the sum of the squares of each value in list `l`",
    "cmd": "sum(map(lambda x: x * x, l))",
    "question_id": "26894227-0",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#map",
      "python.library.functions#sum"
    ],
    "clean_cmd": "sum(map( x: x * x, l))",
    "canonical_cmd": "sum(map(lambda x: x * x, VAR_STR))"
  },
  {
    "nl": "get a list each value `i` in the implicit tuple `range(3)`",
    "cmd": "list(i for i in range(3))",
    "question_id": "41127441-97",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#range",
      "python.library.functions#list"
    ],
    "clean_cmd": "list(i  i  range())",
    "canonical_cmd": "list(VAR_STR for VAR_STR in range(3))"
  },
  {
    "nl": "revers correlating bits of integer `n`",
    "cmd": "int('{:08b}'.format(n)[::-1], 2)",
    "question_id": "12681945-62",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#int",
      "python.library.functions#format"
    ],
    "clean_cmd": "int(.format(n)[::-], )",
    "canonical_cmd": "int('{:08b}'.format(VAR_STR)[::-1], 2)"
  },
  {
    "nl": "Filter Django objects by `author` with ids `1` and `2`",
    "cmd": "Book.objects.filter(author__id=1).filter(author__id=2)",
    "question_id": "5301996-48",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#filter"
    ],
    "clean_cmd": "Book.objects.filter(author__id=).filter(author__id=)",
    "canonical_cmd": "Book.objects.filter(author__id=1).filter(author__id=2)"
  },
  {
    "nl": "Write a regex statement to match 'lol' to 'lolllll'.",
    "cmd": "re.sub('l+', 'l', 'lollll')",
    "question_id": "3895874-90",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "re.sub(, , )",
    "canonical_cmd": "re.sub('l+', 'l', 'lollll')"
  },
  {
    "nl": "printing numbers rounding up to third decimal place",
    "cmd": "print('%.3f' % 3.1415)",
    "question_id": "3241594-99",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "print( % 3.1415)",
    "canonical_cmd": "print('%.3f' % 3.1415)"
  },
  {
    "nl": "Plot using the color code `#112233` in matplotlib pyplot",
    "cmd": "pyplot.plot(x, y, color='#112233')",
    "question_id": "6027690-60",
    "cmd_name": "conala",
    "oracle_man": [
      "matplotlib._as_gen.matplotlib.pyplot.plot"
    ],
    "clean_cmd": "pyplot.plot(x, y, color=)",
    "canonical_cmd": "pyplot.plot(x, y, color='VAR_STR')"
  },
  {
    "nl": "remove 20 symbols in front of '.' in string 'unique12345678901234567890.mkv'",
    "cmd": "re.sub('.{20}(.mkv)', '\\\\1', 'unique12345678901234567890.mkv')",
    "question_id": "4358701-40",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "re.sub(, , )",
    "canonical_cmd": "re.sub('.{20}(.mkv)', '\\\\1', 'VAR_STR')"
  },
  {
    "nl": "check if any element of list `substring_list` are in string `string`",
    "cmd": "any(substring in string for substring in substring_list)",
    "question_id": "8122079-23",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#any"
    ],
    "clean_cmd": "any(substring  string  substring  substring_list)",
    "canonical_cmd": "any(substring in VAR_STR for substring in VAR_STR)"
  },
  {
    "nl": "convert list `myintegers` into a unicode string",
    "cmd": "\"\"\"\"\"\".join(chr(i) for i in myintegers)",
    "question_id": "3855093-58",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#chr",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": ".join(chr(i)  i  myintegers)",
    "canonical_cmd": "\"\"\"\"\"\".join(chr(i) for i in VAR_STR)"
  },
  {
    "nl": "get os name",
    "cmd": "import platform\nplatform.system()",
    "question_id": "1854-80",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.system"
    ],
    "clean_cmd": "import platformplatform.system()",
    "canonical_cmd": "import platform\nplatform.system()"
  },
  {
    "nl": "get os version",
    "cmd": "import platform\nplatform.release()",
    "question_id": "1854-31",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#memoryview.release"
    ],
    "clean_cmd": "import platformplatform.release()",
    "canonical_cmd": "import platform\nplatform.release()"
  },
  {
    "nl": "get the name of the OS",
    "cmd": "print(os.name)",
    "question_id": "1854-51",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "print(os.name)",
    "canonical_cmd": "print(os.name)"
  },
  {
    "nl": "Make function `WRITEFUNCTION` output nothing in curl `p`",
    "cmd": "p.setopt(pycurl.WRITEFUNCTION, lambda x: None)",
    "question_id": "7668141-64",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "p.setopt(pycurl.WRITEFUNCTION,  x: None)",
    "canonical_cmd": "VAR_STR.setopt(pycurl.VAR_STR, lambda x: None)"
  },
  {
    "nl": "split a string `s` at line breaks `\\r\\n`",
    "cmd": "[map(int, x.split('\\t')) for x in s.rstrip().split('\\r\\n')]",
    "question_id": "21205074-1",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#map",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "[map(int, x.split())  x  s.rstrip().split()]",
    "canonical_cmd": "[map(int, x.split('\\t')) for x in VAR_STR.rstrip().split('VAR_STR')]"
  },
  {
    "nl": "binarize the values in columns of list `order` in a pandas data frame",
    "cmd": "pd.concat([df, pd.get_dummies(df, '', '').astype(int)], axis=1)[order]",
    "question_id": "38704545-19",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.get_dummies",
      "pandas.reference.api.pandas.concat",
      "pandas.reference.api.pandas.index.astype"
    ],
    "clean_cmd": "pd.concat([df, pd.get_dummies(df, , ).astype(int)], axis=)[order]",
    "canonical_cmd": "pd.concat([df, pd.get_dummies(df, '', '').astype(int)], axis=1)[VAR_STR]"
  },
  {
    "nl": "Parse string '21/11/06 16:30' according to format '%d/%m/%y %H:%M'",
    "cmd": "datetime.strptime('21/11/06 16:30', '%d/%m/%y %H:%M')",
    "question_id": "4363072-10",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.datetime#datetime.datetime.strptime"
    ],
    "clean_cmd": "datetime.strptime(, )",
    "canonical_cmd": "datetime.strptime('VAR_STR', 'VAR_STR')"
  },
  {
    "nl": "Get the value with the maximum length in each column in array `foo`",
    "cmd": "[max(len(str(x)) for x in line) for line in zip(*foo)]",
    "question_id": "6018916-82",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip",
      "python.library.functions#len",
      "python.library.functions#max",
      "python.library.stdtypes#str"
    ],
    "clean_cmd": "[max(len(str(x))  x  line)  line  zip(*foo)]",
    "canonical_cmd": "[max(len(str(x)) for x in line) for line in zip(*VAR_STR)]"
  },
  {
    "nl": "finding words in string `s` after keyword 'name'",
    "cmd": "re.search('name (.*)', s)",
    "question_id": "6633678-69",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.search"
    ],
    "clean_cmd": "re.search(, s)",
    "canonical_cmd": "re.search('name (.*)', VAR_STR)"
  },
  {
    "nl": "set data in column 'value' of dataframe `df` equal to first element of each list",
    "cmd": "df['value'] = df['value'].str[0]",
    "question_id": "38147447-67",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "df[] = df[].str[]",
    "canonical_cmd": "VAR_STR['VAR_STR'] = VAR_STR['VAR_STR'].str[0]"
  },
  {
    "nl": "get element at index 0 of each list in column 'value' of dataframe `df`",
    "cmd": "df['value'] = df['value'].str.get(0)",
    "question_id": "38147447-11",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.series.str.get"
    ],
    "clean_cmd": "df[] = df[].str.get()",
    "canonical_cmd": "VAR_STR['VAR_STR'] = VAR_STR['VAR_STR'].str.get(0)"
  },
  {
    "nl": "remove square bracket '[]' from pandas dataframe `df` column 'value'",
    "cmd": "df['value'] = df['value'].str.strip('[]')",
    "question_id": "38147447-21",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.strip"
    ],
    "clean_cmd": "df[] = df[].str.strip()",
    "canonical_cmd": "VAR_STR['VAR_STR'] = VAR_STR['VAR_STR'].str.strip('VAR_STR')"
  },
  {
    "nl": "Get a list from two strings `12345` and `ab` with values as each character concatenated",
    "cmd": "[(x + y) for x in '12345' for y in 'ab']",
    "question_id": "18649884-25",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[(x + y)  x    y  ]",
    "canonical_cmd": "[(x + y) for x in 'VAR_STR' for y in 'VAR_STR']"
  },
  {
    "nl": "sort keys of dictionary 'd' based on their values",
    "cmd": "sorted(d, key=lambda k: d[k][1])",
    "question_id": "4690094-53",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted"
    ],
    "clean_cmd": "sorted(d, key= k: d[k][])",
    "canonical_cmd": "sorted(VAR_STR, key=lambda k: VAR_STR[k][1])"
  },
  {
    "nl": "sort list `student_tuples` by second element of each tuple in ascending and third element of each tuple in descending",
    "cmd": "print(sorted(student_tuples, key=lambda t: (-t[2], t[0])))",
    "question_id": "16537636-23",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted"
    ],
    "clean_cmd": "print(sorted(student_tuples, key= t: (-t[], t[])))",
    "canonical_cmd": "print(sorted(VAR_STR, key=lambda t: (-t[2], t[0])))"
  },
  {
    "nl": "convert a list of lists `list_of_lists` into a list of strings keeping empty sub-lists as empty string ''",
    "cmd": "[''.join(l) for l in list_of_lists]",
    "question_id": "18022241-97",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": "[.join(l)  l  list_of_lists]",
    "canonical_cmd": "['VAR_STR'.join(l) for l in VAR_STR]"
  },
  {
    "nl": "sort objects in `Articles` in descending order of counts of `likes`",
    "cmd": "Article.objects.annotate(like_count=Count('likes')).order_by('-like_count')",
    "question_id": "35097130-6",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.count",
      "matplotlib._as_gen.matplotlib.pyplot.annotate"
    ],
    "clean_cmd": "Article.objects.annotate(like_count=Count()).order_by()",
    "canonical_cmd": "Article.objects.annotate(like_count=Count('VAR_STR')).order_by('-like_count')"
  },
  {
    "nl": "divide each element in list `myList` by integer `myInt`",
    "cmd": "myList[:] = [(x / myInt) for x in myList]",
    "question_id": "8244915-81",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "myList[:] = [(x / myInt)  x  myList]",
    "canonical_cmd": "VAR_STR[:] = [(x / VAR_STR) for x in VAR_STR]"
  },
  {
    "nl": "format string with dict `{'5': 'you'}` with integer keys",
    "cmd": "'hello there %(5)s' % {'5': 'you'}",
    "question_id": "20677660-40",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": " % {: }",
    "canonical_cmd": "'hello there %(5)s' % {VAR_STR}"
  },
  {
    "nl": "Check if all elements in list `lst` are tupples of long and int",
    "cmd": "all(isinstance(x, int) for x in lst)",
    "question_id": "13252333-55",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#isinstance",
      "python.library.functions#all"
    ],
    "clean_cmd": "all(isinstance(x, int)  x  lst)",
    "canonical_cmd": "all(isinstance(x, int) for x in VAR_STR)"
  },
  {
    "nl": "check if all elements in a list 'lst' are the same type 'int'",
    "cmd": "all(isinstance(x, int) for x in lst)",
    "question_id": "13252333-39",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#isinstance",
      "python.library.functions#all"
    ],
    "clean_cmd": "all(isinstance(x, int)  x  lst)",
    "canonical_cmd": "all(isinstance(x, VAR_STR) for x in VAR_STR)"
  },
  {
    "nl": "sort a dictionary `y` by value then by key",
    "cmd": "sorted(list(y.items()), key=lambda x: (x[1], x[0]), reverse=True)",
    "question_id": "7742752-61",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.functions#list",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "sorted(list(y.items()), key= x: (x[], x[]), reverse=True)",
    "canonical_cmd": "sorted(list(VAR_STR.items()), key=lambda x: (x[1], x[0]), reverse=True)"
  },
  {
    "nl": "get mean of columns `2, 5, 6, 7, 8` for all rows in dataframe `df`",
    "cmd": "df.iloc[:, ([2, 5, 6, 7, 8])].mean(axis=1)",
    "question_id": "36454494-31",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.mean",
      "pandas.reference.api.pandas.dataframe.loc"
    ],
    "clean_cmd": "df.iloc[:, ([, , , , ])].mean(axis=)",
    "canonical_cmd": "VAR_STR.iloc[:, ([2, 5, 6, 7, 8])].mean(axis=1)"
  },
  {
    "nl": "Create a key `key` if it does not exist in dict `dic` and append element `value` to value.",
    "cmd": "dic.setdefault(key, []).append(value)",
    "question_id": "12905999-80",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict.setdefault",
      "numpy.reference.generated.numpy.append"
    ],
    "clean_cmd": "dic.setdefault(key, []).append(value)",
    "canonical_cmd": "VAR_STR.setdefault(VAR_STR, []).append(VAR_STR)"
  },
  {
    "nl": "Convert each list in list `main_list` into a tuple",
    "cmd": "map(list, zip(*main_list))",
    "question_id": "31676133-11",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip",
      "python.library.functions#map"
    ],
    "clean_cmd": "map(list, zip(*main_list))",
    "canonical_cmd": "map(list, zip(*VAR_STR))"
  },
  {
    "nl": "print list of items `myList`",
    "cmd": "print('\\n'.join(str(p) for p in myList))",
    "question_id": "15769246-18",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": "print(.join(str(p)  p  myList))",
    "canonical_cmd": "print('\\n'.join(str(p) for p in VAR_STR))"
  },
  {
    "nl": "stack two dataframes next to each other in pandas",
    "cmd": "pd.concat([GOOG, AAPL], keys=['GOOG', 'AAPL'], axis=1)",
    "question_id": "13079852-67",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.concat"
    ],
    "clean_cmd": "pd.concat([GOOG, AAPL], keys=[, ], axis=)",
    "canonical_cmd": "pd.concat([GOOG, AAPL], keys=['GOOG', 'AAPL'], axis=1)"
  },
  {
    "nl": "add dictionary `{'class': {'section': 5}}` to key 'Test' of dictionary `dic`",
    "cmd": "dic['Test'].update({'class': {'section': 5}})",
    "question_id": "37855490-47",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict.update"
    ],
    "clean_cmd": "dic[].update({: {: }})",
    "canonical_cmd": "VAR_STR['VAR_STR'].update({VAR_STR})"
  },
  {
    "nl": "replace everything that is not an alphabet or a digit with '' in 's'.",
    "cmd": "re.sub('[\\\\W_]+', '', s)",
    "question_id": "6323296-29",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "re.sub(, , s)",
    "canonical_cmd": "re.sub('[\\\\W_]+', 'VAR_STR', VAR_STR)"
  },
  {
    "nl": "set size of `figure` to landscape A4 i.e. `11.69, 8.27` inches",
    "cmd": "figure(figsize=(11.69, 8.27))",
    "question_id": "15571267-95",
    "cmd_name": "conala",
    "oracle_man": [
      "matplotlib.figure_api#matplotlib.figure.Figure"
    ],
    "clean_cmd": "figure(figsize=(11.69, 8.27))",
    "canonical_cmd": "VAR_STR(figsize=(11.69, 8.27))"
  },
  {
    "nl": "django jinja slice list `mylist` by '3:8'",
    "cmd": "{{(mylist | slice): '3:8'}}",
    "question_id": "23422542-68",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "{{(mylist | slice): }}",
    "canonical_cmd": "{{(VAR_STR | slice): 'VAR_STR'}}"
  },
  {
    "nl": "replace character 'a' with character 'e' and character 's' with character '3' in file `contents`",
    "cmd": "newcontents = contents.replace('a', 'e').replace('s', '3')",
    "question_id": "10562778-75",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.replace"
    ],
    "clean_cmd": "newcontents = contents.replace(, ).replace(, )",
    "canonical_cmd": "newcontents = VAR_STR.replace('VAR_STR', 'VAR_STR').replace('VAR_STR', 'VAR_STR')"
  },
  {
    "nl": "unpack keys and values of a dictionary `d` into two lists",
    "cmd": "keys, values = zip(*list(d.items()))",
    "question_id": "6612769-92",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip",
      "python.library.functions#list",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "keys, values = zip(*list(d.items()))",
    "canonical_cmd": "keys, values = zip(*list(VAR_STR.items()))"
  },
  {
    "nl": "Sort a list 'lst' in descending order.",
    "cmd": "sorted(lst, reverse=True)",
    "question_id": "4644025-48",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted"
    ],
    "clean_cmd": "sorted(lst, reverse=True)",
    "canonical_cmd": "sorted(VAR_STR, reverse=True)"
  },
  {
    "nl": "Get the dot product of matrix `[1,0,0,1,0,0]` and matrix `[[0,1],[1,1],[1,0],[1,0],[1,1],[0,1]]`",
    "cmd": "np.dot([1, 0, 0, 1, 0, 0], [[0, 1], [1, 1], [1, 0], [1, 0], [1, 1], [0, 1]])",
    "question_id": "28253102-16",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.dot"
    ],
    "clean_cmd": "np.dot([, , , , , ], [[, ], [, ], [, ], [, ], [, ], [, ]])",
    "canonical_cmd": "np.dot([1, 0, 0, 1, 0, 0], [[0, 1], [1, 1], [1, 0], [1, 0], [1, 1], [0, 1]])"
  },
  {
    "nl": "get a random item from list `choices`",
    "cmd": "random_choice = random.choice(choices)",
    "question_id": "30651487-40",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.random#random.choice"
    ],
    "clean_cmd": "random_choice = random.choice(choices)",
    "canonical_cmd": "random_choice = random.choice(VAR_STR)"
  },
  {
    "nl": "apply jinja2 filters `forceescape` and `linebreaks` on variable `my_variable`",
    "cmd": "{{my_variable | forceescape | linebreaks}}",
    "question_id": "4901483-87",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "{{my_variable | forceescape | linebreaks}}",
    "canonical_cmd": "{{VAR_STR | VAR_STR | VAR_STR}}"
  },
  {
    "nl": "parse string `s` to int when string contains a number",
    "cmd": "int(''.join(c for c in s if c.isdigit()))",
    "question_id": "17888152-82",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#int",
      "python.library.stdtypes#str.isdigit",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": "int(.join(c  c  s  c.isdigit()))",
    "canonical_cmd": "int(''.join(c for c in VAR_STR if c.isdigit()))"
  },
  {
    "nl": "check if any elements in one list `list1` are in another list `list2`",
    "cmd": "len(set(list1).intersection(list2)) > 0",
    "question_id": "16138015-5",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len",
      "python.library.stdtypes#set",
      "python.library.stdtypes#frozenset.intersection"
    ],
    "clean_cmd": "len(set(list1).intersection(list2)) &gt; ",
    "canonical_cmd": "len(set(VAR_STR).intersection(VAR_STR)) > 0"
  },
  {
    "nl": "strip and split each line `line` on white spaces",
    "cmd": "line.strip().split(' ')",
    "question_id": "11354544-77",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.strip",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "line.strip().split()",
    "canonical_cmd": "VAR_STR.strip().split(' ')"
  },
  {
    "nl": "Insert records in bulk from \"table1\" of \"master\" DB to \"table1\" of  sqlite3 `cursor` object",
    "cmd": "cursor.execute('INSERT OR REPLACE INTO master.table1 SELECT * FROM table1')",
    "question_id": "8215686-82",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.sqlite3#sqlite3.Cursor.execute"
    ],
    "clean_cmd": "cursor.execute()",
    "canonical_cmd": "VAR_STR.execute('INSERT OR REPLACE INTO master.table1 SELECT * FROM table1')"
  },
  {
    "nl": "check if string `str` is palindrome",
    "cmd": "str(n) == str(n)[::-1]",
    "question_id": "17331290-99",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str"
    ],
    "clean_cmd": "str(n) == str(n)[::-]",
    "canonical_cmd": "VAR_STR(n) == VAR_STR(n)[::-1]"
  },
  {
    "nl": "Remove duplicates elements from list `sequences` and sort it in ascending order",
    "cmd": "sorted(set(itertools.chain.from_iterable(sequences)))",
    "question_id": "7458689-47",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.itertools#itertools.chain.from_iterable",
      "python.library.functions#sorted",
      "python.library.stdtypes#set"
    ],
    "clean_cmd": "sorted(set(itertools.chain.from_iterable(sequences)))",
    "canonical_cmd": "sorted(set(itertools.chain.from_iterable(VAR_STR)))"
  },
  {
    "nl": "decode utf-8 code `x` into a raw unicode literal",
    "cmd": "print(str(x).decode('raw_unicode_escape'))",
    "question_id": "8901996-28",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str",
      "pandas.reference.api.pandas.series.str.decode"
    ],
    "clean_cmd": "print(str(x).decode())",
    "canonical_cmd": "print(str(VAR_STR).decode('raw_unicode_escape'))"
  },
  {
    "nl": "Compare if each value in list `a` is less than respective index value in list `b`",
    "cmd": "all(i < j for i, j in zip(a, b))",
    "question_id": "28767642-83",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip",
      "python.library.functions#all"
    ],
    "clean_cmd": "all(i &lt; j  i, j  zip(a, b))",
    "canonical_cmd": "all(i < j for i, j in zip(VAR_STR, VAR_STR))"
  },
  {
    "nl": "create a list containing keys of dictionary `d` and sort it alphabetically",
    "cmd": "sorted(d, key=d.get)",
    "question_id": "575819-43",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted"
    ],
    "clean_cmd": "sorted(d, key=d.get)",
    "canonical_cmd": "sorted(VAR_STR, key=VAR_STR.get)"
  },
  {
    "nl": "extend dictionary `a` with key/value pairs of dictionary `b`",
    "cmd": "a.update(b)",
    "question_id": "577234-9",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict.update"
    ],
    "clean_cmd": "a.update(b)",
    "canonical_cmd": "VAR_STR.update(VAR_STR)"
  },
  {
    "nl": "Delete character \"M\" from a string `s` using python",
    "cmd": "s = s.replace('M', '')",
    "question_id": "3559559-67",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.replace"
    ],
    "clean_cmd": "s = s.replace(, )",
    "canonical_cmd": "VAR_STR = VAR_STR.replace('VAR_STR', '')"
  },
  {
    "nl": "How to delete a character from a string using python?",
    "cmd": "newstr = oldstr.replace('M', '')",
    "question_id": "3559559-70",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.replace"
    ],
    "clean_cmd": "newstr = oldstr.replace(, )",
    "canonical_cmd": "newstr = oldstr.replace('M', '')"
  },
  {
    "nl": "save xlsxwriter file in 'app/smth1/smth2/Expenses01.xlsx' path and assign to variable `workbook`",
    "cmd": "workbook = xlsxwriter.Workbook('app/smth1/smth2/Expenses01.xlsx')",
    "question_id": "22904654-17",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "workbook = xlsxwriter.Workbook()",
    "canonical_cmd": "VAR_STR = xlsxwriter.Workbook('VAR_STR')"
  },
  {
    "nl": "save xlsxwriter file to 'C:/Users/Steven/Documents/demo.xlsx' path",
    "cmd": "workbook = xlsxwriter.Workbook('C:/Users/Steven/Documents/demo.xlsx')",
    "question_id": "22904654-42",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "workbook = xlsxwriter.Workbook()",
    "canonical_cmd": "workbook = xlsxwriter.Workbook('VAR_STR')"
  },
  {
    "nl": "creating a 5x6 matrix filled with `None` and save it as `x`",
    "cmd": "x = [[None for _ in range(5)] for _ in range(6)]",
    "question_id": "4230000-64",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#range"
    ],
    "clean_cmd": "x = [[None  _  range()]  _  range()]",
    "canonical_cmd": "VAR_STR = [[None for _ in range(5)] for _ in range(6)]"
  },
  {
    "nl": "insert ' ' between every three digit before '.' and replace ',' with '.' in 12345678.46",
    "cmd": "format(12345678.46, ',').replace(',', ' ').replace('.', ',')",
    "question_id": "17484631-67",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#format",
      "python.library.stdtypes#str.replace"
    ],
    "clean_cmd": "format(12345678.46, ).replace(, ).replace(, )",
    "canonical_cmd": "format(12345678.46, 'VAR_STR').replace('VAR_STR', ' ').replace('VAR_STR', 'VAR_STR')"
  },
  {
    "nl": "upload uploaded file from path '/upload' to Google cloud storage 'my_bucket' bucket",
    "cmd": "upload_url = blobstore.create_upload_url('/upload', gs_bucket_name='my_bucket')",
    "question_id": "23823206-75",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "upload_url = blobstore.create_upload_url(, gs_bucket_name=)",
    "canonical_cmd": "upload_url = blobstore.create_upload_url('VAR_STR', gs_bucket_name='VAR_STR')"
  },
  {
    "nl": "Group the values from django model `Article` with group by value `pub_date` and annotate by `title`",
    "cmd": "Article.objects.values('pub_date').annotate(article_count=Count('title'))",
    "question_id": "1908741-46",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict.values",
      "python.library.stdtypes#str.count"
    ],
    "clean_cmd": "Article.objects.values().annotate(article_count=Count())",
    "canonical_cmd": "VAR_STR.objects.values('VAR_STR').annotate(article_count=Count('VAR_STR'))"
  },
  {
    "nl": "Retrieve list of values from dictionary 'd'",
    "cmd": "list(d.values())",
    "question_id": "16228248-100",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list",
      "python.library.stdtypes#dict.values"
    ],
    "clean_cmd": "list(d.values())",
    "canonical_cmd": "list(VAR_STR.values())"
  },
  {
    "nl": "Convert a string into a list",
    "cmd": "list('hello')",
    "question_id": "7522533-5",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list"
    ],
    "clean_cmd": "list()",
    "canonical_cmd": "list('hello')"
  },
  {
    "nl": "check if type of variable `s` is a string",
    "cmd": "isinstance(s, str)",
    "question_id": "4843173-34",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#isinstance"
    ],
    "clean_cmd": "isinstance(s, str)",
    "canonical_cmd": "isinstance(VAR_STR, str)"
  },
  {
    "nl": "check if type of a variable `s` is string",
    "cmd": "isinstance(s, str)",
    "question_id": "4843173-74",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#isinstance"
    ],
    "clean_cmd": "isinstance(s, str)",
    "canonical_cmd": "isinstance(VAR_STR, str)"
  },
  {
    "nl": "check python version",
    "cmd": "sys.version",
    "question_id": "1093322-99",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "sys.version",
    "canonical_cmd": "sys.version"
  },
  {
    "nl": "check python version",
    "cmd": "sys.version_info",
    "question_id": "1093322-38",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "sys.version_info",
    "canonical_cmd": "sys.version_info"
  },
  {
    "nl": "select all rows from pandas DataFrame 'df' where the value in column 'A' is greater than 1 or less than -1 in column 'B'.",
    "cmd": "df[(df['A'] > 1) | (df['B'] < -1)]",
    "question_id": "8916302-70",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "df[(df[] &gt; ) | (df[] &lt; -)]",
    "canonical_cmd": "VAR_STR[(VAR_STR['VAR_STR'] > 1) | (VAR_STR['VAR_STR'] < -1)]"
  },
  {
    "nl": "check if all values in the columns of a numpy matrix `a` are same",
    "cmd": "np.all(a == a[(0), :], axis=0)",
    "question_id": "14859458-75",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.all"
    ],
    "clean_cmd": "np.all(a == a[(), :], axis=)",
    "canonical_cmd": "np.all(VAR_STR == VAR_STR[(0), :], axis=0)"
  },
  {
    "nl": "replace `0` with `2` in the list `[0, 1, 0, 3]`",
    "cmd": "[(a if a else 2) for a in [0, 1, 0, 3]]",
    "question_id": "2951701-85",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[(a  a  )  a  [, , , ]]",
    "canonical_cmd": "[(a if a else 2) for a in [VAR_STR]]"
  },
  {
    "nl": "Save array at index 0, index 1 and index 8 of array `np` to tmp file `tmp`",
    "cmd": "np.savez(tmp, *[getarray[0], getarray[1], getarray[8]])",
    "question_id": "22712292-60",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.savez"
    ],
    "clean_cmd": "np.savez(tmp, *[getarray[], getarray[], getarray[]])",
    "canonical_cmd": "VAR_STR.savez(VAR_STR, *[getarray[0], getarray[1], getarray[8]])"
  },
  {
    "nl": "Getting the last element of list `some_list`",
    "cmd": "some_list[(-1)]",
    "question_id": "930397-17",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "some_list[(-)]",
    "canonical_cmd": "VAR_STR[-1]"
  },
  {
    "nl": "Getting the second to last element of list `some_list`",
    "cmd": "some_list[(-2)]",
    "question_id": "930397-2",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "some_list[(-)]",
    "canonical_cmd": "VAR_STR[-2]"
  },
  {
    "nl": "gets the `n` th-to-last element in list `some_list`",
    "cmd": "some_list[(- n)]",
    "question_id": "930397-12",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "some_list[(- n)]",
    "canonical_cmd": "VAR_STR[-VAR_STR]"
  },
  {
    "nl": "get the last element in list `alist`",
    "cmd": "alist[(-1)]",
    "question_id": "930397-75",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "alist[(-)]",
    "canonical_cmd": "VAR_STR[-1]"
  },
  {
    "nl": "get the last element in list `astr`",
    "cmd": "astr[(-1)]",
    "question_id": "930397-92",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "astr[(-)]",
    "canonical_cmd": "VAR_STR[-1]"
  },
  {
    "nl": "Produce a string that is suitable as Unicode literal from string 'M\\\\N{AMPERSAND}M\\\\N{APOSTROPHE}s'",
    "cmd": "'M\\\\N{AMPERSAND}M\\\\N{APOSTROPHE}s'.encode().decode('unicode-escape')",
    "question_id": "30747705-14",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.encode",
      "python.library.stdtypes#bytearray.decode"
    ],
    "clean_cmd": ".encode().decode()",
    "canonical_cmd": "\"\"\"VAR_STR\"\"\".encode().decode('unicode-escape')"
  },
  {
    "nl": "Parse a unicode string `M\\\\N{AMPERSAND}M\\\\N{APOSTROPHE}s`",
    "cmd": "'M\\\\N{AMPERSAND}M\\\\N{APOSTROPHE}s'.decode('unicode-escape')",
    "question_id": "30747705-97",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#bytearray.decode"
    ],
    "clean_cmd": ".decode()",
    "canonical_cmd": "\"\"\"VAR_STR\"\"\".decode('unicode-escape')"
  },
  {
    "nl": "sum values in list of dictionaries `example_list` with key 'gold'",
    "cmd": "sum(item['gold'] for item in example_list)",
    "question_id": "11692613-80",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sum"
    ],
    "clean_cmd": "sum(item[]  item  example_list)",
    "canonical_cmd": "sum(item['VAR_STR'] for item in VAR_STR)"
  },
  {
    "nl": "get a sum of all values from key `gold` in a list of dictionary `example_list`",
    "cmd": "sum([item['gold'] for item in example_list])",
    "question_id": "11692613-33",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sum"
    ],
    "clean_cmd": "sum([item[]  item  example_list])",
    "canonical_cmd": "sum([item['VAR_STR'] for item in VAR_STR])"
  },
  {
    "nl": "Get all the values in key `gold` summed from a list of dictionary `myLIst`",
    "cmd": "sum(item['gold'] for item in myLIst)",
    "question_id": "11692613-31",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sum"
    ],
    "clean_cmd": "sum(item[]  item  myLIst)",
    "canonical_cmd": "sum(item['VAR_STR'] for item in VAR_STR)"
  },
  {
    "nl": "replace '-' in pandas dataframe `df` with `np.nan`",
    "cmd": "df.replace('-', np.nan)",
    "question_id": "17097236-34",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.replace"
    ],
    "clean_cmd": "df.replace(, np.nan)",
    "canonical_cmd": "VAR_STR.replace('VAR_STR', np.nan)"
  },
  {
    "nl": "disable logging while running unit tests in python django",
    "cmd": "logging.disable(logging.CRITICAL)",
    "question_id": "5255657-81",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.logging#logging.disable"
    ],
    "clean_cmd": "logging.disable(logging.CRITICAL)",
    "canonical_cmd": "logging.disable(logging.CRITICAL)"
  },
  {
    "nl": "normalize the dataframe `df` along the rows",
    "cmd": "np.sqrt(np.square(df).sum(axis=1))",
    "question_id": "18524112-81",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.square",
      "numpy.reference.generated.numpy.sqrt",
      "python.library.functions#sum"
    ],
    "clean_cmd": "np.sqrt(np.square(df).sum(axis=))",
    "canonical_cmd": "np.sqrt(np.square(VAR_STR).sum(axis=1))"
  },
  {
    "nl": "Sort a string `s` in lexicographic order",
    "cmd": "sorted(s, key=str.upper)",
    "question_id": "7371935-12",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted"
    ],
    "clean_cmd": "sorted(s, key=str.upper)",
    "canonical_cmd": "sorted(VAR_STR, key=str.upper)"
  },
  {
    "nl": "sort string `s` in lexicographic order",
    "cmd": "sorted(sorted(s), key=str.upper)",
    "question_id": "7371935-89",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted"
    ],
    "clean_cmd": "sorted(sorted(s), key=str.upper)",
    "canonical_cmd": "sorted(sorted(VAR_STR), key=str.upper)"
  },
  {
    "nl": "get a sorted list of the characters of string `s` in lexicographic order, with lowercase letters first",
    "cmd": "sorted(s, key=str.lower)",
    "question_id": "7371935-26",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted"
    ],
    "clean_cmd": "sorted(s, key=str.lower)",
    "canonical_cmd": "sorted(VAR_STR, key=str.lower)"
  },
  {
    "nl": "insert a new field 'geolocCountry' on an existing document  'b' using pymongo",
    "cmd": "db.Doc.update({'_id': b['_id']}, {'$set': {'geolocCountry': myGeolocCountry}})",
    "question_id": "15666169-3",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.turtle#turtle.update"
    ],
    "clean_cmd": "db.Doc.update({: b[]}, {: {: myGeolocCountry}})",
    "canonical_cmd": "db.Doc.update({'_id': VAR_STR['_id']}, {'$set': {'VAR_STR': myGeolocCountry}})"
  },
  {
    "nl": "pars a string 'http://example.org/#comments' to extract hashtags into an array",
    "cmd": "re.findall('#(\\\\w+)', 'http://example.org/#comments')",
    "question_id": "2527892-27",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "re.findall(, )",
    "canonical_cmd": "re.findall('#(\\\\w+)', 'VAR_STR')"
  },
  {
    "nl": "assign the index of the last occurence of `x` in list `s` to the variable `last`",
    "cmd": "last = len(s) - s[::-1].index(x) - 1",
    "question_id": "34438901-83",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len",
      "python.library.stdtypes#str.index"
    ],
    "clean_cmd": "last = len(s) - s[::-].index(x) - ",
    "canonical_cmd": "VAR_STR = len(VAR_STR) - VAR_STR[::-1].index(VAR_STR) - 1"
  },
  {
    "nl": "check if datafram `df` has any NaN vlaues",
    "cmd": "df.isnull().values.any()",
    "question_id": "29530232-39",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.isnull",
      "pandas.reference.api.pandas.dataframe.any"
    ],
    "clean_cmd": "df.isnull().values.any()",
    "canonical_cmd": "VAR_STR.isnull().values.any()"
  },
  {
    "nl": "sum all elements of two-dimensions list `[[1, 2, 3, 4], [2, 4, 5, 6]]]`",
    "cmd": "sum([sum(x) for x in [[1, 2, 3, 4], [2, 4, 5, 6]]])",
    "question_id": "9497290-5",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sum"
    ],
    "clean_cmd": "sum([sum(x)  x  [[, , , ], [, , , ]]])",
    "canonical_cmd": "sum([sum(x) for x in [VAR_STR])"
  },
  {
    "nl": "Print a dictionary `{'user': {'name': 'Markus'}}` with string formatting",
    "cmd": "\"\"\"Hello {user[name]}\"\"\".format(**{'user': {'name': 'Markus'}})",
    "question_id": "29035168-66",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#format"
    ],
    "clean_cmd": ".format(**{: {: }})",
    "canonical_cmd": "\"\"\"Hello {user[name]}\"\"\".format(**{VAR_STR})"
  },
  {
    "nl": "zip lists `[1, 2], [3, 4], [5, 6]` in a list",
    "cmd": "zip(*[[1, 2], [3, 4], [5, 6]])",
    "question_id": "4112265-53",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip"
    ],
    "clean_cmd": "zip(*[[, ], [, ], [, ]])",
    "canonical_cmd": "zip(*[[VAR_STR]])"
  },
  {
    "nl": "zip lists in a list [[1, 2], [3, 4], [5, 6]]",
    "cmd": "zip(*[[1, 2], [3, 4], [5, 6]])",
    "question_id": "4112265-19",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip"
    ],
    "clean_cmd": "zip(*[[, ], [, ], [, ]])",
    "canonical_cmd": "zip(*[[1, 2], [3, 4], [5, 6]])"
  },
  {
    "nl": "Add 100 to each element of column \"x\" in dataframe `a`",
    "cmd": "a['x'].apply(lambda x, y: x + y, args=(100,))",
    "question_id": "21188504-9",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.apply"
    ],
    "clean_cmd": "a[].apply( x, y: x + y, args=(,))",
    "canonical_cmd": "VAR_STR['VAR_STR'].apply(lambda VAR_STR, y: VAR_STR + y, args=(100,))"
  },
  {
    "nl": "get a list of of elements resulting from splitting user input by commas and stripping white space from each resulting string `s`",
    "cmd": "[s.strip() for s in input().split(',')]",
    "question_id": "1397827-4",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#input",
      "python.library.stdtypes#str.strip",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "[s.strip()  s  input().split()]",
    "canonical_cmd": "[VAR_STR.strip() for VAR_STR in input().split(',')]"
  },
  {
    "nl": "encode `u'X\\xc3\\xbcY\\xc3\\x9f'` as unicode and decode with utf-8",
    "cmd": "'X\\xc3\\xbcY\\xc3\\x9f'.encode('raw_unicode_escape').decode('utf-8')",
    "question_id": "4267019-49",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.encode",
      "python.library.stdtypes#bytearray.decode"
    ],
    "clean_cmd": ".encode().decode()",
    "canonical_cmd": "\"\"\"X\u00c3\u00bcY\u00c3\u009f\"\"\".encode('raw_unicode_escape').decode('utf-8')"
  },
  {
    "nl": "Jinja parse datetime object `car.date_of_manufacture` to use format pattern `datetime`",
    "cmd": "{{car.date_of_manufacture | datetime}}",
    "question_id": "4830535-51",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "{{car.date_of_manufacture | datetime}}",
    "canonical_cmd": "{{car.date_of_manufacture | VAR_STR}}"
  },
  {
    "nl": "Get the date object `date_of_manufacture` of object `car` in string  format '%Y-%m-%d'",
    "cmd": "{{car.date_of_manufacture.strftime('%Y-%m-%d')}}",
    "question_id": "4830535-54",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.time#time.strftime"
    ],
    "clean_cmd": "{{car.date_of_manufacture.strftime()}}",
    "canonical_cmd": "{{VAR_STR.VAR_STR.strftime('VAR_STR')}}"
  },
  {
    "nl": "convert a DateTime string back to a DateTime object of format '%Y-%m-%d %H:%M:%S.%f'",
    "cmd": "datetime.strptime('2010-11-13 10:33:54.227806', '%Y-%m-%d %H:%M:%S.%f')",
    "question_id": "4170655-54",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.datetime#datetime.datetime.strptime"
    ],
    "clean_cmd": "datetime.strptime(, )",
    "canonical_cmd": "datetime.strptime('2010-11-13 10:33:54.227806', 'VAR_STR')"
  },
  {
    "nl": "Pandas: How can I use the apply() function for a single column?",
    "cmd": "df['a'] = df['a'].apply(lambda x: x + 1)",
    "question_id": "34962104-52",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.series.apply"
    ],
    "clean_cmd": "df[] = df[].apply( x: x + )",
    "canonical_cmd": "df['a'] = df['a'].apply(lambda x: x + 1)"
  },
  {
    "nl": "Get a list `myList` from 1 to 10",
    "cmd": "myList = [i for i in range(10)]",
    "question_id": "11479392-70",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#range"
    ],
    "clean_cmd": "myList = [i  i  range()]",
    "canonical_cmd": "VAR_STR = [i for i in range(10)]"
  },
  {
    "nl": "find the mean of elements in list `l`",
    "cmd": "sum(l) / float(len(l))",
    "question_id": "9039961-94",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len",
      "python.library.functions#float",
      "python.library.functions#sum"
    ],
    "clean_cmd": "sum(l) / float(len(l))",
    "canonical_cmd": "sum(VAR_STR) / float(len(VAR_STR))"
  },
  {
    "nl": "split string 'happy_hats_for_cats' using string '_for_'",
    "cmd": "re.split('_for_', 'happy_hats_for_cats')",
    "question_id": "34410358-72",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.split"
    ],
    "clean_cmd": "re.split(, )",
    "canonical_cmd": "re.split('VAR_STR', 'VAR_STR')"
  },
  {
    "nl": "Split string 'sad_pandas_and_happy_cats_for_people' based on string 'and', 'or' or 'for'",
    "cmd": "re.split('_(?:for|or|and)_', 'sad_pandas_and_happy_cats_for_people')",
    "question_id": "34410358-16",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.split"
    ],
    "clean_cmd": "re.split(, )",
    "canonical_cmd": "re.split('_(?:for|or|and)_', 'VAR_STR')"
  },
  {
    "nl": "Split a string `l` by multiple words `for` or `or` or `and`",
    "cmd": "[re.split('_(?:f?or|and)_', s) for s in l]",
    "question_id": "34410358-18",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.split"
    ],
    "clean_cmd": "[re.split(, s)  s  l]",
    "canonical_cmd": "[re.split('_(?:f?or|and)_', s) for s in VAR_STR]"
  },
  {
    "nl": "get the path of Python executable under windows",
    "cmd": "os.path.dirname(sys.executable)",
    "question_id": "647515-26",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os.path#os.path.dirname"
    ],
    "clean_cmd": "os.path.dirname(sys.executable)",
    "canonical_cmd": "os.path.dirname(sys.executable)"
  },
  {
    "nl": "how to format a list of arguments `my_args` into a string",
    "cmd": "'Hello %s' % ', '.join(my_args)",
    "question_id": "18391059-36",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": " % .join(my_args)",
    "canonical_cmd": "'Hello %s' % ', '.join(VAR_STR)"
  },
  {
    "nl": "loop through 0 to 10 with step 2",
    "cmd": "for i in range(0, 10, 2):\n    pass",
    "question_id": "2990121-19",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#range"
    ],
    "clean_cmd": " i  range(, , ):",
    "canonical_cmd": "for i in range(0, 10, 2):\n    pass"
  },
  {
    "nl": "loop through `mylist` with step 2",
    "cmd": "for i in mylist[::2]:\n    pass",
    "question_id": "2990121-50",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": " i  mylist[::]:",
    "canonical_cmd": "for i in VAR_STR[::2]:\n    pass"
  },
  {
    "nl": "search and split string 'aaa bbb ccc ddd eee fff' by delimiter '(ddd)'",
    "cmd": "re.split('(ddd)', 'aaa bbb ccc ddd eee fff', 1)",
    "question_id": "8970524-33",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.split"
    ],
    "clean_cmd": "re.split(, , )",
    "canonical_cmd": "re.split('VAR_STR', 'VAR_STR', 1)"
  },
  {
    "nl": "regex search and split string 'aaa bbb ccc ddd eee fff' by delimiter '(d(d)d)'",
    "cmd": "re.split('(d(d)d)', 'aaa bbb ccc ddd eee fff', 1)",
    "question_id": "8970524-8",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.split"
    ],
    "clean_cmd": "re.split(, , )",
    "canonical_cmd": "re.split('VAR_STR', 'VAR_STR', 1)"
  },
  {
    "nl": "unescape special characters without splitting data in array of strings `['I ', u'<', '3s U ', u'&', ' you luvz me']`",
    "cmd": "\"\"\"\"\"\".join(['I ', '<', '3s U ', '&', ' you luvz me'])",
    "question_id": "20876077-34",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": ".join([, , , , ])",
    "canonical_cmd": "\"\"\"\"\"\".join(['I ', '<', '3s U ', '&', ' you luvz me'])"
  },
  {
    "nl": "output first 100 characters in a string `my_string`",
    "cmd": "print(my_string[0:100])",
    "question_id": "3486384-86",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "print(my_string[:])",
    "canonical_cmd": "print(VAR_STR[0:100])"
  },
  {
    "nl": "print backslash",
    "cmd": "print('\\\\')",
    "question_id": "19095796-93",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "print()",
    "canonical_cmd": "print('\\\\')"
  },
  {
    "nl": "convert a dataframe `df`'s column `ID` into datetime, after removing the first and last 3 letters",
    "cmd": "pd.to_datetime(df.ID.str[1:-3])",
    "question_id": "42100344-66",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.to_datetime"
    ],
    "clean_cmd": "pd.to_datetime(df.ID.str[:-])",
    "canonical_cmd": "pd.to_datetime(VAR_STR.VAR_STR.str[1:-3])"
  },
  {
    "nl": "create 3 by 3 matrix of random numbers",
    "cmd": "numpy.random.random((3, 3))",
    "question_id": "15451958-12",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "numpy.random.random((, ))",
    "canonical_cmd": "numpy.random.random((3, 3))"
  },
  {
    "nl": "create a list with permutations of string 'abcd'",
    "cmd": "list(powerset('abcd'))",
    "question_id": "1482308-10",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list"
    ],
    "clean_cmd": "list(powerset())",
    "canonical_cmd": "list(powerset('VAR_STR'))"
  },
  {
    "nl": "prepend string 'hello' to all items in list 'a'",
    "cmd": "['hello{0}'.format(i) for i in a]",
    "question_id": "13331419-85",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#format"
    ],
    "clean_cmd": "[.format(i)  i  a]",
    "canonical_cmd": "['hello{0}'.format(i) for i in VAR_STR]"
  },
  {
    "nl": "get the opposite diagonal of a numpy array `array`",
    "cmd": "np.diag(np.rot90(array))",
    "question_id": "16114333-7",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.rot90",
      "numpy.reference.generated.numpy.diag"
    ],
    "clean_cmd": "np.diag(np.rot90(array))",
    "canonical_cmd": "np.diag(np.rot90(VAR_STR))"
  },
  {
    "nl": "get the average of a list values for each key in dictionary `d`)",
    "cmd": "[(i, sum(j) / len(j)) for i, j in list(d.items())]",
    "question_id": "29565452-57",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len",
      "python.library.functions#sum",
      "python.library.functions#list",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "[(i, sum(j) / len(j))  i, j  list(d.items())]",
    "canonical_cmd": "[(i, sum(j) / len(j)) for i, j in list(VAR_STR.items())]"
  },
  {
    "nl": "get domain/host name from request object in Django",
    "cmd": "request.META['HTTP_HOST']",
    "question_id": "29945684-49",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "request.META[]",
    "canonical_cmd": "request.META['HTTP_HOST']"
  },
  {
    "nl": "sort list `bar` by each element's attribute `attrb1` and attribute `attrb2` in reverse order",
    "cmd": "bar.sort(key=lambda x: (x.attrb1, x.attrb2), reverse=True)",
    "question_id": "4768151-85",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#list.sort"
    ],
    "clean_cmd": "bar.sort(key= x: (x.attrb1, x.attrb2), reverse=True)",
    "canonical_cmd": "VAR_STR.sort(key=lambda x: (x.VAR_STR, x.VAR_STR), reverse=True)"
  },
  {
    "nl": "Represent DateTime object '10/05/2012' with format '%d/%m/%Y' into format '%Y-%m-%d'",
    "cmd": "datetime.datetime.strptime('10/05/2012', '%d/%m/%Y').strftime('%Y-%m-%d')",
    "question_id": "10541640-99",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.datetime#datetime.datetime.strptime",
      "python.library.datetime#datetime.datetime.strftime"
    ],
    "clean_cmd": "datetime.datetime.strptime(, ).strftime()",
    "canonical_cmd": "datetime.datetime.strptime('VAR_STR', 'VAR_STR').strftime('VAR_STR')"
  },
  {
    "nl": "find a tag `option` whose `value` attribute is `state` in selenium",
    "cmd": "driver.find_element_by_xpath(\"//option[@value='\" + state + \"']\").click()",
    "question_id": "32874539-7",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "driver.find_element_by_xpath( + state + ).click()",
    "canonical_cmd": "driver.find_element_by_xpath(\"//option[@value='\" + VAR_STR + \"']\").click()"
  },
  {
    "nl": "write a regex pattern to match even number of letter `A`",
    "cmd": "re.compile('^([^A]*)AA([^A]|AA)*$')",
    "question_id": "2045175-76",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.compile"
    ],
    "clean_cmd": "re.compile()",
    "canonical_cmd": "re.compile('^([^A]*)AA([^A]|AA)*$')"
  },
  {
    "nl": "group rows of pandas dataframe `df` with same 'id'",
    "cmd": "df.groupby('id').agg(lambda x: x.tolist())",
    "question_id": "34776651-9",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.groupby",
      "pandas.reference.api.pandas.dataframe.agg",
      "pandas.reference.api.pandas.series.tolist"
    ],
    "clean_cmd": "df.groupby().agg( x: x.tolist())",
    "canonical_cmd": "VAR_STR.groupby('VAR_STR').agg(lambda x: x.tolist())"
  },
  {
    "nl": "list all files of a directory `mypath`",
    "cmd": "onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]",
    "question_id": "3207219-21",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.listdir",
      "python.library.tarfile#tarfile.TarInfo.isfile",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": "onlyfiles = [f  f  listdir(mypath)  isfile(join(mypath, f))]",
    "canonical_cmd": "onlyfiles = [f for f in listdir(VAR_STR) if isfile(join(VAR_STR, f))]"
  },
  {
    "nl": "list all files of a directory `mypath`",
    "cmd": "f = []\nfor (dirpath, dirnames, filenames) in walk(mypath):\n    f.extend(filenames)\n    break",
    "question_id": "3207219-76",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.walk",
      "python.library.collections#collections.deque.extend"
    ],
    "clean_cmd": "f = [] (dirpath, dirnames, filenames)  walk(mypath):f.extend(filenames)",
    "canonical_cmd": "f = []\nfor dirpath, dirnames, filenames in walk(VAR_STR):\n    f.extend(filenames)\n    break"
  },
  {
    "nl": "list all \".txt\" files of a directory \"/home/adam/\"",
    "cmd": "print(glob.glob('/home/adam/*.txt'))",
    "question_id": "3207219-42",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "print(glob.glob())",
    "canonical_cmd": "print(glob.glob('/home/adam/*.txt'))"
  },
  {
    "nl": "list all files of a directory \"somedirectory\"",
    "cmd": "os.listdir('somedirectory')",
    "question_id": "3207219-15",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.listdir"
    ],
    "clean_cmd": "os.listdir()",
    "canonical_cmd": "os.listdir('VAR_STR')"
  },
  {
    "nl": "call parent class `Instructor` of child class constructor",
    "cmd": "super(Instructor, self).__init__(name, year)",
    "question_id": "12557612-50",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#super",
      "python.library.logging#logging.Handler.__init__"
    ],
    "clean_cmd": "super(Instructor, self).__init__(name, year)",
    "canonical_cmd": "super(VAR_STR, self).__init__(name, year)"
  },
  {
    "nl": "Get all the texts without tags from beautiful soup object `soup`",
    "cmd": "\"\"\"\"\"\".join(soup.findAll(text=True))",
    "question_id": "2416823-80",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": ".join(soup.findAll(text=True))",
    "canonical_cmd": "\"\"\"\"\"\".join(VAR_STR.findAll(text=True))"
  },
  {
    "nl": "format a string `num` using string formatting",
    "cmd": "\"\"\"{0:.3g}\"\"\".format(num)",
    "question_id": "2389846-77",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#format"
    ],
    "clean_cmd": ".format(num)",
    "canonical_cmd": "\"\"\"{0:.3g}\"\"\".format(VAR_STR)"
  },
  {
    "nl": "change string `s` to upper case",
    "cmd": "s.upper()",
    "question_id": "9257094-100",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.upper"
    ],
    "clean_cmd": "s.upper()",
    "canonical_cmd": "VAR_STR.upper()"
  },
  {
    "nl": "extract the first four rows of the column `ID` from a pandas dataframe `df`",
    "cmd": "df.groupby('ID').head(4)",
    "question_id": "40987319-40",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.groupby",
      "pandas.reference.api.pandas.dataframe.head"
    ],
    "clean_cmd": "df.groupby().head()",
    "canonical_cmd": "VAR_STR.groupby('VAR_STR').head(4)"
  },
  {
    "nl": "Convert escaped utf string to utf string in `your string`",
    "cmd": "print('your string'.decode('string_escape'))",
    "question_id": "42548362-27",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#bytearray.decode"
    ],
    "clean_cmd": "print(.decode())",
    "canonical_cmd": "print('VAR_STR'.decode('string_escape'))"
  },
  {
    "nl": "sort list `['14:10:01', '03:12:08']`",
    "cmd": "sorted(['14:10:01', '03:12:08'])",
    "question_id": "17713873-84",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted"
    ],
    "clean_cmd": "sorted([, ])",
    "canonical_cmd": "sorted([VAR_STR])"
  },
  {
    "nl": "count the number of True values associated with key 'success' in dictionary `d`",
    "cmd": "sum(1 if d['success'] else 0 for d in s)",
    "question_id": "35269374-86",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sum"
    ],
    "clean_cmd": "sum(  d[]    d  s)",
    "canonical_cmd": "sum(1 if VAR_STR['VAR_STR'] else 0 for VAR_STR in s)"
  },
  {
    "nl": "get the sum of values associated with the key \u2018success\u2019 for a list of dictionaries `s`",
    "cmd": "sum(d['success'] for d in s)",
    "question_id": "35269374-25",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sum"
    ],
    "clean_cmd": "sum(d[]  d  s)",
    "canonical_cmd": "sum(d['success'] for d in VAR_STR)"
  },
  {
    "nl": "replace unicode character '\\u2022' in string 'str' with '*'",
    "cmd": "str.decode('utf-8').replace('\\u2022', '*').encode('utf-8')",
    "question_id": "13093727-42",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.encode",
      "python.library.stdtypes#str.replace",
      "pandas.reference.api.pandas.series.str.decode"
    ],
    "clean_cmd": "str.decode().replace(, ).encode()",
    "canonical_cmd": "VAR_STR.decode('utf-8').replace('VAR_STR', 'VAR_STR').encode('utf-8')"
  },
  {
    "nl": "replace unicode characters ''\\u2022' in string 'str' with '*'",
    "cmd": "str.decode('utf-8').replace('\\u2022', '*')",
    "question_id": "13093727-83",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.replace",
      "pandas.reference.api.pandas.series.str.decode"
    ],
    "clean_cmd": "str.decode().replace(, )",
    "canonical_cmd": "str.decode('utf-8').replace('\u2022', '*')"
  },
  {
    "nl": "sum a list of numbers `list_of_nums`",
    "cmd": "sum(list_of_nums)",
    "question_id": "4362586-5",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sum"
    ],
    "clean_cmd": "sum(list_of_nums)",
    "canonical_cmd": "sum(VAR_STR)"
  },
  {
    "nl": "Spawn a process to run python script `myscript.py` in C++",
    "cmd": "system('python myscript.py')",
    "question_id": "41246071-1",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.system"
    ],
    "clean_cmd": "system()",
    "canonical_cmd": "system('python myscript.py')"
  },
  {
    "nl": "create file 'x' if file 'x' does not exist",
    "cmd": "fd = os.open('x', os.O_WRONLY | os.O_CREAT | os.O_EXCL)",
    "question_id": "1348026-75",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.open"
    ],
    "clean_cmd": "fd = os.open(, os.O_WRONLY | os.O_CREAT | os.O_EXCL)",
    "canonical_cmd": "fd = os.open('VAR_STR', os.O_WRONLY | os.O_CREAT | os.O_EXCL)"
  },
  {
    "nl": "split a string `s` into integers",
    "cmd": "l = (int(x) for x in s.split())",
    "question_id": "6429638-16",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#int",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "l = (int(x)  x  s.split())",
    "canonical_cmd": "l = (int(x) for x in VAR_STR.split())"
  },
  {
    "nl": "split a string `42 0` by white spaces.",
    "cmd": "\"\"\"42 0\"\"\".split()",
    "question_id": "6429638-63",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": ".split()",
    "canonical_cmd": "\"\"\"VAR_STR\"\"\".split()"
  },
  {
    "nl": "How to split a string into integers in Python?",
    "cmd": "map(int, '42 0'.split())",
    "question_id": "6429638-62",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#map",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "map(int, .split())",
    "canonical_cmd": "map(int, '42 0'.split())"
  },
  {
    "nl": "getting a list of all subdirectories in the directory `directory`",
    "cmd": "os.walk(directory)",
    "question_id": "973473-86",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.walk"
    ],
    "clean_cmd": "os.walk(directory)",
    "canonical_cmd": "os.walk(VAR_STR)"
  },
  {
    "nl": "get a list of all subdirectories in the directory `directory`",
    "cmd": "[x[0] for x in os.walk(directory)]",
    "question_id": "973473-48",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.walk"
    ],
    "clean_cmd": "[x[]  x  os.walk(directory)]",
    "canonical_cmd": "[x[0] for x in os.walk(VAR_STR)]"
  },
  {
    "nl": "combine two sequences into a dictionary",
    "cmd": "dict(zip(keys, values))",
    "question_id": "579856-6",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip",
      "python.library.stdtypes#dict"
    ],
    "clean_cmd": "dict(zip(keys, values))",
    "canonical_cmd": "dict(zip(keys, values))"
  },
  {
    "nl": "update the dictionary `mydic` with dynamic keys `i` and values with key 'name' from dictionary `o`",
    "cmd": "mydic.update({i: o['name']})",
    "question_id": "13860026-30",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict.update"
    ],
    "clean_cmd": "mydic.update({i: o[]})",
    "canonical_cmd": "VAR_STR.update({VAR_STR: VAR_STR['VAR_STR']})"
  },
  {
    "nl": "get a list of variables from module 'adfix.py' in current module.",
    "cmd": "print([item for item in dir(adfix) if not item.startswith('__')])",
    "question_id": "9759820-64",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#dir",
      "python.library.stdtypes#str.startswith"
    ],
    "clean_cmd": "print([item  item  dir(adfix)   item.startswith()])",
    "canonical_cmd": "print([item for item in dir(adfix) if not item.startswith('__')])"
  },
  {
    "nl": "update dictionary `b`, overwriting values where keys are identical, with contents of dictionary `d`",
    "cmd": "b.update(d)",
    "question_id": "12717716-47",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict.update"
    ],
    "clean_cmd": "b.update(d)",
    "canonical_cmd": "VAR_STR.update(VAR_STR)"
  },
  {
    "nl": "make a row-by-row copy `y` of array `x`",
    "cmd": "y = [row[:] for row in x]",
    "question_id": "6532881-46",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "y = [row[:]  row  x]",
    "canonical_cmd": "VAR_STR = [row[:] for row in VAR_STR]"
  },
  {
    "nl": "read excel file `file_name` using pandas",
    "cmd": "dfs = pd.read_excel(file_name, sheetname=None)",
    "question_id": "16888888-32",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.read_excel"
    ],
    "clean_cmd": "dfs = pd.read_excel(file_name, sheetname=None)",
    "canonical_cmd": "dfs = pd.read_excel(VAR_STR, sheetname=None)"
  },
  {
    "nl": "Find all words containing letters between A and Z in string `formula`",
    "cmd": "re.findall('\\\\b[A-Z]', formula)",
    "question_id": "13840883-15",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "re.findall(, formula)",
    "canonical_cmd": "re.findall('\\\\b[A-Z]', VAR_STR)"
  },
  {
    "nl": "How can I launch an instance of an application using Python?",
    "cmd": "os.system('start excel.exe <path/to/file>')",
    "question_id": "247724-56",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.system"
    ],
    "clean_cmd": "os.system()",
    "canonical_cmd": "os.system('start excel.exe <path/to/file>')"
  },
  {
    "nl": "create a flat dictionary by summing values associated with similar keys in each dictionary of list `dictlist`",
    "cmd": "dict((key, sum(d[key] for d in dictList)) for key in dictList[0])",
    "question_id": "974678-36",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict",
      "python.library.functions#sum"
    ],
    "clean_cmd": "dict((key, sum(d[key]  d  dictList))  key  dictList[])",
    "canonical_cmd": "dict((key, sum(d[key] for d in dictList)) for key in dictList[0])"
  },
  {
    "nl": "Remove all items from a dictionary `d` where the values are less than `1`",
    "cmd": "d = dict((k, v) for k, v in d.items() if v > 0)",
    "question_id": "8425046-90",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "d = dict((k, v)  k, v  d.items()  v &gt; )",
    "canonical_cmd": "VAR_STR = dict((k, v) for k, v in VAR_STR.items() if v > 0)"
  },
  {
    "nl": "Filter dictionary `d` to have items with value greater than 0",
    "cmd": "d = {k: v for k, v in list(d.items()) if v > 0}",
    "question_id": "8425046-64",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "d = {k: v  k, v  list(d.items())  v &gt; }",
    "canonical_cmd": "VAR_STR = {k: v for k, v in list(VAR_STR.items()) if v > 0}"
  },
  {
    "nl": "split string 'fooxyzbar' based on case-insensitive matching using string 'XYZ'",
    "cmd": "re.compile('XYZ', re.IGNORECASE).split('fooxyzbar')",
    "question_id": "8993904-11",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.compile",
      "python.library.re#re.split"
    ],
    "clean_cmd": "re.compile(, re.IGNORECASE).split()",
    "canonical_cmd": "re.compile('VAR_STR', re.IGNORECASE).split('VAR_STR')"
  },
  {
    "nl": "BeautifulSoup find tag 'div' with styling 'width=300px;' in HTML string `soup`",
    "cmd": "soup.findAll('div', style='width=300px;')",
    "question_id": "3945750-54",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "soup.findAll(, style=)",
    "canonical_cmd": "VAR_STR.findAll('VAR_STR', style='VAR_STR')"
  },
  {
    "nl": "send a signal `signal.SIGUSR1` to the current process",
    "cmd": "os.kill(os.getpid(), signal.SIGUSR1)",
    "question_id": "15080500-99",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.getpid",
      "python.library.os#os.kill"
    ],
    "clean_cmd": "os.kill(os.getpid(), signal.SIGUSR1)",
    "canonical_cmd": "os.kill(os.getpid(), signal.SIGUSR1)"
  },
  {
    "nl": "check if all elements in list `myList` are identical",
    "cmd": "all(x == myList[0] for x in myList)",
    "question_id": "3844801-14",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#all"
    ],
    "clean_cmd": "all(x == myList[]  x  myList)",
    "canonical_cmd": "all(x == VAR_STR[0] for x in VAR_STR)"
  },
  {
    "nl": "format number of spaces between strings `Python`, `:` and `Very Good` to be `20`",
    "cmd": "print('%*s : %*s' % (20, 'Python', 20, 'Very Good'))",
    "question_id": "4302166-0",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "print( % (, , , ))",
    "canonical_cmd": "print('%*s : %*s' % (20, 'VAR_STR', 20, 'VAR_STR'))"
  },
  {
    "nl": "How to convert a string from CP-1251 to UTF-8?",
    "cmd": "d.decode('cp1251').encode('utf8')",
    "question_id": "7555335-11",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.encode",
      "python.library.stdtypes#bytearray.decode"
    ],
    "clean_cmd": "d.decode().encode()",
    "canonical_cmd": "d.decode('cp1251').encode('utf8')"
  },
  {
    "nl": "get rid of None values in dictionary `kwargs`",
    "cmd": "res = {k: v for k, v in list(kwargs.items()) if v is not None}",
    "question_id": "2544710-74",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "res = {k: v  k, v  list(kwargs.items())  v   None}",
    "canonical_cmd": "res = {k: v for k, v in list(VAR_STR.items()) if v is not None}"
  },
  {
    "nl": "get rid of None values in dictionary `kwargs`",
    "cmd": "res = dict((k, v) for k, v in kwargs.items() if v is not None)",
    "question_id": "2544710-4",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "res = dict((k, v)  k, v  kwargs.items()  v   None)",
    "canonical_cmd": "res = dict((k, v) for k, v in VAR_STR.items() if v is not None)"
  },
  {
    "nl": "concatenate a list of strings `['a', 'b', 'c']`",
    "cmd": "\"\"\"\"\"\".join(['a', 'b', 'c'])",
    "question_id": "6726636-88",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": ".join([, , ])",
    "canonical_cmd": "\"\"\"\"\"\".join([VAR_STR])"
  },
  {
    "nl": "sending http headers to `client`",
    "cmd": "client.send('HTTP/1.0 200 OK\\r\\n')",
    "question_id": "8315209-12",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.http.client#http.client.HTTPConnection.send"
    ],
    "clean_cmd": "client.send()",
    "canonical_cmd": "VAR_STR.send('HTTP/1.0 200 OK\\r\\n')"
  },
  {
    "nl": "split a multi-line string `inputString` into separate strings",
    "cmd": "inputString.split('\\n')",
    "question_id": "172439-45",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "inputString.split()",
    "canonical_cmd": "VAR_STR.split('\\n')"
  },
  {
    "nl": "Split a multi-line string ` a \\n b \\r\\n c ` by new line character `\\n`",
    "cmd": "' a \\n b \\r\\n c '.split('\\n')",
    "question_id": "172439-30",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": ".split()",
    "canonical_cmd": "' a \\n b \\r\\n c '.split('VAR_STR')"
  },
  {
    "nl": "concatenate elements of list `b` by a colon \":\"",
    "cmd": "\"\"\":\"\"\".join(str(x) for x in b)",
    "question_id": "13954222-2",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": ".join(str(x)  x  b)",
    "canonical_cmd": "\"\"\"VAR_STR\"\"\".join(str(x) for x in VAR_STR)"
  },
  {
    "nl": "Calculate sum over all rows of 2D numpy array",
    "cmd": "a.sum(axis=1)",
    "question_id": "13567345-100",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sum"
    ],
    "clean_cmd": "a.sum(axis=)",
    "canonical_cmd": "a.sum(axis=1)"
  },
  {
    "nl": "concatenate items of list `l` with a space ' '",
    "cmd": "print(' '.join(map(str, l)))",
    "question_id": "13550423-44",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#map",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": "print(.join(map(str, l)))",
    "canonical_cmd": "print(' '.join(map(str, VAR_STR)))"
  },
  {
    "nl": "run script 'hello.py' with argument 'htmlfilename.htm' on terminal using python executable",
    "cmd": "subprocess.call(['python.exe', 'hello.py', 'htmlfilename.htm'])",
    "question_id": "25651990-82",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.subprocess#subprocess.call"
    ],
    "clean_cmd": "subprocess.call([, , ])",
    "canonical_cmd": "subprocess.call(['python.exe', 'VAR_STR', 'VAR_STR'])"
  },
  {
    "nl": "How can I parse a time string containing milliseconds in it with python?",
    "cmd": "time.strptime('30/03/09 16:31:32.123', '%d/%m/%y %H:%M:%S.%f')",
    "question_id": "698223-79",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.time#time.strptime"
    ],
    "clean_cmd": "time.strptime(, )",
    "canonical_cmd": "time.strptime('30/03/09 16:31:32.123', '%d/%m/%y %H:%M:%S.%f')"
  },
  {
    "nl": "convert a string `my_string` with dot and comma into a float number `my_float`",
    "cmd": "my_float = float(my_string.replace(',', ''))",
    "question_id": "6633523-61",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#float",
      "python.library.stdtypes#str.replace"
    ],
    "clean_cmd": "my_float = float(my_string.replace(, ))",
    "canonical_cmd": "VAR_STR = float(VAR_STR.replace(',', ''))"
  },
  {
    "nl": "convert a string `123,456.908` with dot and comma into a floating number",
    "cmd": "float('123,456.908'.replace(',', ''))",
    "question_id": "6633523-78",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#float",
      "python.library.stdtypes#str.replace"
    ],
    "clean_cmd": "float(.replace(, ))",
    "canonical_cmd": "float('VAR_STR'.replace(',', ''))"
  },
  {
    "nl": "set pythonpath in python script.",
    "cmd": "sys.path.append('/path/to/whatever')",
    "question_id": "3108285-59",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.append"
    ],
    "clean_cmd": "sys.path.append()",
    "canonical_cmd": "sys.path.append('/path/to/whatever')"
  },
  {
    "nl": "split string 'Words, words, words.' using a regex '(\\\\W+)'",
    "cmd": "re.split('(\\\\W+)', 'Words, words, words.')",
    "question_id": "2195340-19",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.split"
    ],
    "clean_cmd": "re.split(, )",
    "canonical_cmd": "re.split('VAR_STR', 'VAR_STR')"
  },
  {
    "nl": "open a file `Output.txt` in append mode",
    "cmd": "file = open('Output.txt', 'a')",
    "question_id": "17977584-11",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.request#open"
    ],
    "clean_cmd": "file = open(, )",
    "canonical_cmd": "file = open('VAR_STR', 'a')"
  },
  {
    "nl": "argparse add argument with flag '--version' and version action of '%(prog)s 2.0' to parser `parser`",
    "cmd": "parser.add_argument('--version', action='version', version='%(prog)s 2.0')",
    "question_id": "15405636-62",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.argparse#argparse.ArgumentParser.add_argument"
    ],
    "clean_cmd": "parser.add_argument(, action=, version=)",
    "canonical_cmd": "VAR_STR.add_argument('VAR_STR', action='version', version='VAR_STR')"
  },
  {
    "nl": "remove key 'c' from dictionary `d`",
    "cmd": "{i: d[i] for i in d if i != 'c'}",
    "question_id": "17665809-31",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "{i: d[i]  i  d  i != }",
    "canonical_cmd": "{i: VAR_STR[i] for i in VAR_STR if i != 'VAR_STR'}"
  },
  {
    "nl": "Create new DataFrame object by merging columns \"key\" of  dataframes `split_df` and `csv_df` and rename the columns from dataframes `split_df` and `csv_df` with suffix `_left` and `_right` respectively",
    "cmd": "pd.merge(split_df, csv_df, on=['key'], suffixes=('_left', '_right'))",
    "question_id": "41861705-8",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.merge"
    ],
    "clean_cmd": "pd.merge(split_df, csv_df, on=[], suffixes=(, ))",
    "canonical_cmd": "pd.merge(VAR_STR, VAR_STR, on=['VAR_STR'], suffixes=('VAR_STR', 'VAR_STR'))"
  },
  {
    "nl": "Split a string `s` by space with `4` splits",
    "cmd": "s.split(' ', 4)",
    "question_id": "10697757-85",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "s.split(, )",
    "canonical_cmd": "VAR_STR.split(' ', 4)"
  },
  {
    "nl": "read keyboard-input",
    "cmd": "input('Enter your input:')",
    "question_id": "5404068-69",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#input"
    ],
    "clean_cmd": "input()",
    "canonical_cmd": "input('Enter your input:')"
  },
  {
    "nl": "enable debug mode on Flask application `app`",
    "cmd": "app.run(debug=True)",
    "question_id": "16344756-59",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.pdb#pdb.run"
    ],
    "clean_cmd": "app.run(debug=True)",
    "canonical_cmd": "VAR_STR.run(debug=True)"
  },
  {
    "nl": "python save list `mylist` to file object 'save.txt'",
    "cmd": "pickle.dump(mylist, open('save.txt', 'wb'))",
    "question_id": "40133826-8",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.pickle#pickle.dump",
      "python.library.urllib.request#open"
    ],
    "clean_cmd": "pickle.dump(mylist, open(, ))",
    "canonical_cmd": "pickle.dump(VAR_STR, open('VAR_STR', 'wb'))"
  },
  {
    "nl": "Create 3d array of zeroes of size `(3,3,3)`",
    "cmd": "numpy.zeros((3, 3, 3))",
    "question_id": "2173087-86",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.zeros"
    ],
    "clean_cmd": "numpy.zeros((, , ))",
    "canonical_cmd": "numpy.zeros((3, 3, 3))"
  },
  {
    "nl": "cut off the last word of a sentence `content`",
    "cmd": "\"\"\" \"\"\".join(content.split(' ')[:-1])",
    "question_id": "6266727-11",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.join",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": ".join(content.split()[:-])",
    "canonical_cmd": "\"\"\" \"\"\".join(VAR_STR.split(' ')[:-1])"
  },
  {
    "nl": "convert scalar `x` to array",
    "cmd": "x = np.asarray(x).reshape(1, -1)[(0), :]",
    "question_id": "30385151-65",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.asarray",
      "numpy.reference.generated.numpy.reshape"
    ],
    "clean_cmd": "x = np.asarray(x).reshape(, -)[(), :]",
    "canonical_cmd": "VAR_STR = np.asarray(VAR_STR).reshape(1, -1)[(0), :]"
  },
  {
    "nl": "sum all elements of nested list `L`",
    "cmd": "sum(sum(i) if isinstance(i, list) else i for i in L)",
    "question_id": "15856127-74",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sum",
      "python.library.functions#isinstance"
    ],
    "clean_cmd": "sum(sum(i)  isinstance(i, list)  i  i  L)",
    "canonical_cmd": "sum(sum(i) if isinstance(i, list) else i for i in VAR_STR)"
  },
  {
    "nl": "Multiple each value by `2` for all keys in a dictionary `my_dict`",
    "cmd": "my_dict.update((x, y * 2) for x, y in list(my_dict.items()))",
    "question_id": "5010536-34",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "my_dict.update((x, y * )  x, y  list(my_dict.items()))",
    "canonical_cmd": "VAR_STR.update((x, y * 2) for x, y in list(VAR_STR.items()))"
  },
  {
    "nl": "running bash script 'sleep.sh'",
    "cmd": "subprocess.call('sleep.sh', shell=True)",
    "question_id": "13745648-71",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.subprocess#subprocess.call"
    ],
    "clean_cmd": "subprocess.call(, shell=True)",
    "canonical_cmd": "subprocess.call('VAR_STR', shell=True)"
  },
  {
    "nl": "Join elements of list `l` with a comma `,`",
    "cmd": "\"\"\",\"\"\".join(l)",
    "question_id": "44778-8",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": ".join(l)",
    "canonical_cmd": "\"\"\"VAR_STR\"\"\".join(VAR_STR)"
  },
  {
    "nl": "make a comma-separated string from a list `myList`",
    "cmd": "myList = ','.join(map(str, myList))",
    "question_id": "44778-82",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#map",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": "myList = .join(map(str, myList))",
    "canonical_cmd": "VAR_STR = ','.join(map(str, VAR_STR))"
  },
  {
    "nl": "remove substring 'bag,' from a string 'lamp, bag, mirror'",
    "cmd": "print('lamp, bag, mirror'.replace('bag,', ''))",
    "question_id": "18454570-61",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.replace"
    ],
    "clean_cmd": "print(.replace(, ))",
    "canonical_cmd": "print('VAR_STR'.replace('VAR_STR', ''))"
  },
  {
    "nl": "Reverse the order of words, delimited by `.`, in string `s`",
    "cmd": "\"\"\".\"\"\".join(s.split('.')[::-1])",
    "question_id": "4357787-45",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.join",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": ".join(s.split()[::-])",
    "canonical_cmd": "\"\"\"VAR_STR\"\"\".join(VAR_STR.split('VAR_STR')[::-1])"
  },
  {
    "nl": "convert epoch time represented as milliseconds `s` to string using format '%Y-%m-%d %H:%M:%S.%f'",
    "cmd": "datetime.datetime.fromtimestamp(s).strftime('%Y-%m-%d %H:%M:%S.%f')",
    "question_id": "21787496-34",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.datetime#datetime.datetime.fromtimestamp",
      "python.library.datetime#datetime.datetime.strftime"
    ],
    "clean_cmd": "datetime.datetime.fromtimestamp(s).strftime()",
    "canonical_cmd": "datetime.datetime.fromtimestamp(VAR_STR).strftime('VAR_STR')"
  },
  {
    "nl": "parse milliseconds epoch time '1236472051807' to format '%Y-%m-%d %H:%M:%S'",
    "cmd": "time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(1236472051807 / 1000.0))",
    "question_id": "21787496-31",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.time#time.gmtime",
      "python.library.time#time.strftime"
    ],
    "clean_cmd": "time.strftime(, time.gmtime( / 1000.0))",
    "canonical_cmd": "time.strftime('VAR_STR', time.gmtime(1236472051807 / 1000.0))"
  },
  {
    "nl": "sum elements at index `column` of each list in list `data`",
    "cmd": "print(sum(row[column] for row in data))",
    "question_id": "15352457-25",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sum"
    ],
    "clean_cmd": "print(sum(row[column]  row  data))",
    "canonical_cmd": "print(sum(row[VAR_STR] for row in VAR_STR))"
  },
  {
    "nl": "sum columns of a list `array`",
    "cmd": "[sum(row[i] for row in array) for i in range(len(array[0]))]",
    "question_id": "15352457-58",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len",
      "python.library.functions#range",
      "python.library.functions#sum"
    ],
    "clean_cmd": "[sum(row[i]  row  array)  i  range(len(array[]))]",
    "canonical_cmd": "[sum(row[i] for row in VAR_STR) for i in range(len(VAR_STR[0]))]"
  },
  {
    "nl": "combine list of dictionaries `dicts` with the same keys in each list to a single dictionary",
    "cmd": "dict((k, [d[k] for d in dicts]) for k in dicts[0])",
    "question_id": "11533274-85",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict"
    ],
    "clean_cmd": "dict((k, [d[k]  d  dicts])  k  dicts[])",
    "canonical_cmd": "dict((k, [d[k] for d in VAR_STR]) for k in VAR_STR[0])"
  },
  {
    "nl": "Merge a nested dictionary `dicts` into a flat dictionary by concatenating nested values with the same key `k`",
    "cmd": "{k: [d[k] for d in dicts] for k in dicts[0]}",
    "question_id": "11533274-2",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "{k: [d[k]  d  dicts]  k  dicts[]}",
    "canonical_cmd": "{VAR_STR: [d[VAR_STR] for d in VAR_STR] for VAR_STR in VAR_STR[0]}"
  },
  {
    "nl": "How do I get the url parameter in a Flask view",
    "cmd": "request.args['myParam']",
    "question_id": "14026704-65",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "request.args[]",
    "canonical_cmd": "request.args['myParam']"
  },
  {
    "nl": "Insert directory 'apps' into directory `__file__`",
    "cmd": "sys.path.insert(1, os.path.join(os.path.dirname(__file__), 'apps'))",
    "question_id": "2354166-0",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os.path#os.path.dirname",
      "python.library.os.path#os.path.join"
    ],
    "clean_cmd": "sys.path.insert(, os.path.join(os.path.dirname(__file__), ))",
    "canonical_cmd": "sys.path.insert(1, os.path.join(os.path.dirname(VAR_STR), 'VAR_STR'))"
  },
  {
    "nl": "modify sys.path for python module `subdir`",
    "cmd": "sys.path.append(os.path.join(os.path.dirname(__file__), 'subdir'))",
    "question_id": "2354166-42",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os.path#os.path.dirname",
      "python.library.os.path#os.path.join"
    ],
    "clean_cmd": "sys.path.append(os.path.join(os.path.dirname(__file__), ))",
    "canonical_cmd": "sys.path.append(os.path.join(os.path.dirname(__file__), 'VAR_STR'))"
  },
  {
    "nl": "Insert a 'None' value into a SQLite3 table.",
    "cmd": "db.execute(\"INSERT INTO present VALUES('test2', ?, 10)\", (None,))",
    "question_id": "20211942-18",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.msilib#msilib.View.Execute"
    ],
    "clean_cmd": "db.execute(, (None,))",
    "canonical_cmd": "db.execute(\"INSERT INTO present VALUES('test2', ?, 10)\", (None,))"
  },
  {
    "nl": "flatten list `list_of_menuitems`",
    "cmd": "[image for menuitem in list_of_menuitems for image in menuitem]",
    "question_id": "406121-90",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[image  menuitem  list_of_menuitems  image  menuitem]",
    "canonical_cmd": "[image for menuitem in VAR_STR for image in menuitem]"
  },
  {
    "nl": "append elements of a set `b` to a list `a`",
    "cmd": "a.extend(b)",
    "question_id": "4741537-60",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.collections#collections.deque.extend"
    ],
    "clean_cmd": "a.extend(b)",
    "canonical_cmd": "VAR_STR.extend(VAR_STR)"
  },
  {
    "nl": "Append elements of a set to a list in Python",
    "cmd": "a.extend(list(b))",
    "question_id": "4741537-84",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list",
      "python.library.collections#collections.deque.extend"
    ],
    "clean_cmd": "a.extend(list(b))",
    "canonical_cmd": "a.extend(list(b))"
  },
  {
    "nl": "upload file using FTP",
    "cmd": "ftp.storlines('STOR ' + filename, open(filename, 'r'))",
    "question_id": "17438096-75",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.request#open",
      "python.library.ftplib#ftplib.FTP.storlines"
    ],
    "clean_cmd": "ftp.storlines( + filename, open(filename, ))",
    "canonical_cmd": "ftp.storlines('STOR ' + filename, open(filename, 'r'))"
  },
  {
    "nl": "add one to the hidden web element with id 'XYZ' with selenium python script",
    "cmd": "browser.execute_script(\"document.getElementById('XYZ').value+='1'\")",
    "question_id": "15049182-63",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "browser.execute_script()",
    "canonical_cmd": "browser.execute_script(\"document.getElementById('XYZ').value+='1'\")"
  },
  {
    "nl": "create array containing the maximum value of respective elements of array `[2, 3, 4]` and array `[1, 5, 2]`",
    "cmd": "np.maximum([2, 3, 4], [1, 5, 2])",
    "question_id": "28742436-20",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.maximum"
    ],
    "clean_cmd": "np.maximum([, , ], [, , ])",
    "canonical_cmd": "np.maximum([VAR_STR], [VAR_STR])"
  },
  {
    "nl": "print a list `l` and move first 3 elements to the end of the list",
    "cmd": "print(l[3:] + l[:3])",
    "question_id": "34280147-9",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "print(l[:] + l[:])",
    "canonical_cmd": "print(VAR_STR[3:] + VAR_STR[:3])"
  },
  {
    "nl": "loop over files in directory '.'",
    "cmd": "for fn in os.listdir('.'):\n    if os.path.isfile(fn):\n        pass",
    "question_id": "11801309-71",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os.path#os.path.isfile",
      "python.library.os#os.listdir"
    ],
    "clean_cmd": " fn  os.listdir(): os.path.isfile(fn):",
    "canonical_cmd": "for fn in os.listdir('VAR_STR'):\n    if os.path.isfile(fn):\n        pass"
  },
  {
    "nl": "loop over files in directory `source`",
    "cmd": "for (root, dirs, filenames) in os.walk(source):\n    for f in filenames:\n        pass",
    "question_id": "11801309-27",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.walk"
    ],
    "clean_cmd": " (root, dirs, filenames)  os.walk(source): f  filenames:",
    "canonical_cmd": "for root, dirs, filenames in os.walk(VAR_STR):\n    for f in filenames:\n        pass"
  },
  {
    "nl": "Google App Engine execute GQL query 'SELECT * FROM Schedule WHERE station = $1' with parameter `foo.key()`",
    "cmd": "db.GqlQuery('SELECT * FROM Schedule WHERE station = $1', foo.key())",
    "question_id": "852055-44",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.zoneinfo#zoneinfo.ZoneInfo.key"
    ],
    "clean_cmd": "db.GqlQuery(, foo.key())",
    "canonical_cmd": "db.GqlQuery('VAR_STR', foo.key())"
  },
  {
    "nl": "filter rows in pandas starting with alphabet 'f' using regular expression.",
    "cmd": "df.b.str.contains('^f')",
    "question_id": "15325182-98",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.series.str.contains"
    ],
    "clean_cmd": "df.b.str.contains()",
    "canonical_cmd": "df.b.str.contains('^f')"
  },
  {
    "nl": "pandas: delete rows in dataframe `df` based on multiple columns values",
    "cmd": "df.set_index(list('BC')).drop(tuples, errors='ignore').reset_index()",
    "question_id": "38535931-67",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.set_index",
      "python.library.functions#list",
      "pandas.reference.api.pandas.dataframe.reset_index",
      "pandas.reference.api.pandas.dataframe.drop"
    ],
    "clean_cmd": "df.set_index(list()).drop(tuples, errors=).reset_index()",
    "canonical_cmd": "VAR_STR.set_index(list('BC')).drop(tuples, errors='ignore').reset_index()"
  },
  {
    "nl": "format the variables `self.goals` and `self.penalties` using string formatting",
    "cmd": "\"\"\"({:d} goals, ${:d})\"\"\".format(self.goals, self.penalties)",
    "question_id": "13945749-18",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#format"
    ],
    "clean_cmd": ".format(self.goals, self.penalties)",
    "canonical_cmd": "\"\"\"({:d} goals, ${:d})\"\"\".format(self.goals, self.penalties)"
  },
  {
    "nl": "format string \"({} goals, ${})\" with variables `goals` and `penalties`",
    "cmd": "\"\"\"({} goals, ${})\"\"\".format(self.goals, self.penalties)",
    "question_id": "13945749-85",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#format"
    ],
    "clean_cmd": ".format(self.goals, self.penalties)",
    "canonical_cmd": "\"\"\"VAR_STR\"\"\".format(self.VAR_STR, self.VAR_STR)"
  },
  {
    "nl": "format string \"({0.goals} goals, ${0.penalties})\"",
    "cmd": "\"\"\"({0.goals} goals, ${0.penalties})\"\"\".format(self)",
    "question_id": "13945749-39",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#format"
    ],
    "clean_cmd": ".format(self)",
    "canonical_cmd": "\"\"\"VAR_STR\"\"\".format(self)"
  },
  {
    "nl": "convert list of lists `L` to list of integers",
    "cmd": "[int(''.join(str(d) for d in x)) for x in L]",
    "question_id": "18524642-58",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#int",
      "python.library.stdtypes#str",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": "[int(.join(str(d)  d  x))  x  L]",
    "canonical_cmd": "[int(''.join(str(d) for d in x)) for x in VAR_STR]"
  },
  {
    "nl": "combine elements of each list in list `L` into digits of a single integer",
    "cmd": "[''.join(str(d) for d in x) for x in L]",
    "question_id": "18524642-5",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": "[.join(str(d)  d  x)  x  L]",
    "canonical_cmd": "[''.join(str(d) for d in x) for x in VAR_STR]"
  },
  {
    "nl": "convert a list of lists `L` to list of integers",
    "cmd": "L = [int(''.join([str(y) for y in x])) for x in L]",
    "question_id": "18524642-53",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#int",
      "python.library.stdtypes#str",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": "L = [int(.join([str(y)  y  x]))  x  L]",
    "canonical_cmd": "VAR_STR = [int(''.join([str(y) for y in x])) for x in VAR_STR]"
  },
  {
    "nl": "write the elements of list `lines` concatenated by special character '\\n' to file `myfile`",
    "cmd": "myfile.write('\\n'.join(lines))",
    "question_id": "7138686-35",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.join",
      "python.library.os#os.write"
    ],
    "clean_cmd": "myfile.write(.join(lines))",
    "canonical_cmd": "VAR_STR.write('VAR_STR'.join(VAR_STR))"
  },
  {
    "nl": "removing an element from a list based on a predicate 'X' or 'N'",
    "cmd": "[x for x in ['AAT', 'XAC', 'ANT', 'TTA'] if 'X' not in x and 'N' not in x]",
    "question_id": "1866343-84",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[x  x  [, , , ]     x     x]",
    "canonical_cmd": "[x for x in ['AAT', 'XAC', 'ANT', 'TTA'] if 'VAR_STR' not in x and 'VAR_STR' not in\n    x]"
  },
  {
    "nl": "Remove duplicate words from a string `text` using regex",
    "cmd": "text = re.sub('\\\\b(\\\\w+)( \\\\1\\\\b)+', '\\\\1', text)",
    "question_id": "17238587-5",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "text = re.sub(, , text)",
    "canonical_cmd": "VAR_STR = re.sub('\\\\b(\\\\w+)( \\\\1\\\\b)+', '\\\\1', VAR_STR)"
  },
  {
    "nl": "search for string that matches regular expression pattern '(?<!Distillr)\\\\\\\\AcroTray\\\\.exe' in string 'C:\\\\SomeDir\\\\AcroTray.exe'",
    "cmd": "re.search('(?<!Distillr)\\\\\\\\AcroTray\\\\.exe', 'C:\\\\SomeDir\\\\AcroTray.exe')",
    "question_id": "15534223-34",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.search"
    ],
    "clean_cmd": "re.search(, )",
    "canonical_cmd": "re.search('VAR_STR', 'VAR_STR')"
  },
  {
    "nl": "split string 'QH QD JC KD JS' into a list on white spaces",
    "cmd": "\"\"\"QH QD JC KD JS\"\"\".split()",
    "question_id": "5453026-19",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": ".split()",
    "canonical_cmd": "\"\"\"VAR_STR\"\"\".split()"
  },
  {
    "nl": "search for occurrences of regex pattern '>.*<' in xml string `line`",
    "cmd": "print(re.search('>.*<', line).group(0))",
    "question_id": "18168684-38",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.search",
      "python.library.re#re.Match.group"
    ],
    "clean_cmd": "print(re.search(, line).group())",
    "canonical_cmd": "print(re.search('VAR_STR', VAR_STR).group(0))"
  },
  {
    "nl": "erase all the contents of a file `filename`",
    "cmd": "open(filename, 'w').close()",
    "question_id": "4914277-19",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.request#open"
    ],
    "clean_cmd": "open(filename, ).close()",
    "canonical_cmd": "open(VAR_STR, 'w').close()"
  },
  {
    "nl": "convert a string into datetime using the format '%Y-%m-%d %H:%M:%S.%f'",
    "cmd": "datetime.datetime.strptime(string_date, '%Y-%m-%d %H:%M:%S.%f')",
    "question_id": "19068269-99",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.datetime#datetime.datetime.strptime"
    ],
    "clean_cmd": "datetime.datetime.strptime(string_date, )",
    "canonical_cmd": "datetime.datetime.strptime(string_date, 'VAR_STR')"
  },
  {
    "nl": "find the index of a list with the first element equal to '332' within the list of lists `thelist`",
    "cmd": "[index for index, item in enumerate(thelist) if item[0] == '332']",
    "question_id": "20683167-59",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#enumerate"
    ],
    "clean_cmd": "[index  index, item  enumerate(thelist)  item[] == ]",
    "canonical_cmd": "[index for index, item in enumerate(VAR_STR) if item[0] == 'VAR_STR']"
  },
  {
    "nl": "subscript text 'H20' with '2' as subscripted in matplotlib labels for arrays 'x' and 'y'.",
    "cmd": "plt.plot(x, y, label='H\\u2082O')",
    "question_id": "17138464-49",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.plot"
    ],
    "clean_cmd": "plt.plot(x, y, label=)",
    "canonical_cmd": "plt.plot(VAR_STR, VAR_STR, label='H\u2082O')"
  },
  {
    "nl": "subscript text 'H20' with '2' as subscripted in matplotlib labels for arrays 'x' and 'y'.",
    "cmd": "plt.plot(x, y, label='$H_2O$')",
    "question_id": "17138464-97",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.plot"
    ],
    "clean_cmd": "plt.plot(x, y, label=)",
    "canonical_cmd": "plt.plot(VAR_STR, VAR_STR, label='$H_2O$')"
  },
  {
    "nl": "loop over a list `mylist` if sublists length equals 3",
    "cmd": "[x for x in mylist if len(x) == 3]",
    "question_id": "9138112-44",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len"
    ],
    "clean_cmd": "[x  x  mylist  len(x) == ]",
    "canonical_cmd": "[x for x in VAR_STR if len(x) == 3]"
  },
  {
    "nl": "initialize a list `lst` of 100 objects Object()",
    "cmd": "lst = [Object() for _ in range(100)]",
    "question_id": "1807026-63",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#range",
      "python.library.functions#object"
    ],
    "clean_cmd": "lst = [Object()  _  range()]",
    "canonical_cmd": "VAR_STR = [Object() for _ in range(100)]"
  },
  {
    "nl": "create list `lst` containing 100 instances of object `Object`",
    "cmd": "lst = [Object() for i in range(100)]",
    "question_id": "1807026-43",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#range",
      "python.library.functions#object"
    ],
    "clean_cmd": "lst = [Object()  i  range()]",
    "canonical_cmd": "VAR_STR = [VAR_STR() for i in range(100)]"
  },
  {
    "nl": "get the content of child tag with`href` attribute whose parent has css `someclass`",
    "cmd": "self.driver.find_element_by_css_selector('.someclass a').get_attribute('href')",
    "question_id": "19664253-74",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.test#test.support.get_attribute"
    ],
    "clean_cmd": "self.driver.find_element_by_css_selector().get_attribute()",
    "canonical_cmd": "self.driver.find_element_by_css_selector('.someclass a').get_attribute('VAR_STR')"
  },
  {
    "nl": "joining data from dataframe `df1` with data from dataframe `df2` based on matching values of column 'Date_Time' in both dataframes",
    "cmd": "df1.merge(df2, on='Date_Time')",
    "question_id": "13793321-59",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.merge"
    ],
    "clean_cmd": "df1.merge(df2, on=)",
    "canonical_cmd": "VAR_STR.merge(VAR_STR, on='VAR_STR')"
  },
  {
    "nl": "use `%s` operator to print variable values `str1` inside a string",
    "cmd": "'first string is: %s, second one is: %s' % (str1, 'geo.tif')",
    "question_id": "3367288-60",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": " % (str1, )",
    "canonical_cmd": "'first string is: %s, second one is: %s' % (VAR_STR, 'geo.tif')"
  },
  {
    "nl": "Split a string by a delimiter in python",
    "cmd": "[x.strip() for x in '2.MATCHES $$TEXT$$ STRING'.split('$$TEXT$$')]",
    "question_id": "3475251-5",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.strip",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "[x.strip()  x  .split()]",
    "canonical_cmd": "[x.strip() for x in '2.MATCHES $$TEXT$$ STRING'.split('$$TEXT$$')]"
  },
  {
    "nl": "check if directory `directory ` exists and create it if necessary",
    "cmd": "if (not os.path.exists(directory)):\n    os.makedirs(directory)",
    "question_id": "273192-100",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os.path#os.path.exists",
      "python.library.os#os.makedirs"
    ],
    "clean_cmd": " ( os.path.exists(directory)):os.makedirs(directory)",
    "canonical_cmd": "if not os.path.exists(VAR_STR):\n    os.makedirs(VAR_STR)"
  },
  {
    "nl": "check if a directory `path` exists and create it if necessary",
    "cmd": "try:\n    os.makedirs(path)\nexcept OSError:\n    if (not os.path.isdir(path)):\n        raise",
    "question_id": "273192-60",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os.path#os.path.isdir",
      "python.library.os#os.makedirs"
    ],
    "clean_cmd": ":os.makedirs(path) OSError: ( os.path.isdir(path)):",
    "canonical_cmd": "try:\n    os.makedirs(VAR_STR)\nexcept OSError:\n    if not os.VAR_STR.isdir(VAR_STR):\n        raise"
  },
  {
    "nl": "check if a directory `path` exists and create it if necessary",
    "cmd": "distutils.dir_util.mkpath(path)",
    "question_id": "273192-76",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "distutils.dir_util.mkpath(path)",
    "canonical_cmd": "distutils.dir_util.mkpath(VAR_STR)"
  },
  {
    "nl": "check if a directory `path` exists and create it if necessary",
    "cmd": "try:\n    os.makedirs(path)\nexcept OSError as exception:\n    if (exception.errno != errno.EEXIST):\n        raise",
    "question_id": "273192-21",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.makedirs"
    ],
    "clean_cmd": ":os.makedirs(path) OSError  exception: (exception.errno != errno.EEXIST):",
    "canonical_cmd": "try:\n    os.makedirs(VAR_STR)\nexcept OSError as exception:\n    if exception.errno != errno.EEXIST:\n        raise"
  },
  {
    "nl": "Replace a separate word 'H3' by 'H1' in a string 'text'",
    "cmd": "re.sub('\\\\bH3\\\\b', 'H1', text)",
    "question_id": "18785032-56",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "re.sub(, , text)",
    "canonical_cmd": "re.sub('\\\\bH3\\\\b', 'VAR_STR', VAR_STR)"
  },
  {
    "nl": "substitute ASCII letters in string 'aas30dsa20' with empty string ''",
    "cmd": "re.sub('\\\\D', '', 'aas30dsa20')",
    "question_id": "1450897-93",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "re.sub(, , )",
    "canonical_cmd": "re.sub('\\\\D', 'VAR_STR', 'VAR_STR')"
  },
  {
    "nl": "get digits only from a string `aas30dsa20` using lambda function",
    "cmd": "\"\"\"\"\"\".join([x for x in 'aas30dsa20' if x.isdigit()])",
    "question_id": "1450897-99",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.isdigit",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": ".join([x  x    x.isdigit()])",
    "canonical_cmd": "\"\"\"\"\"\".join([x for x in 'VAR_STR' if x.isdigit()])"
  },
  {
    "nl": "get a dictionary `records` of key-value pairs in PyMongo cursor `cursor`",
    "cmd": "records = dict((record['_id'], record) for record in cursor)",
    "question_id": "4928274-56",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict"
    ],
    "clean_cmd": "records = dict((record[], record)  record  cursor)",
    "canonical_cmd": "VAR_STR = dict((record['_id'], record) for record in VAR_STR)"
  },
  {
    "nl": "Create new matrix object  by concatenating data from matrix A and matrix B",
    "cmd": "np.concatenate((A, B))",
    "question_id": "20180210-37",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.concatenate"
    ],
    "clean_cmd": "np.concatenate((A, B))",
    "canonical_cmd": "np.concatenate((A, B))"
  },
  {
    "nl": "concat two matrices `A` and `B` in numpy",
    "cmd": "np.vstack((A, B))",
    "question_id": "20180210-35",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.vstack"
    ],
    "clean_cmd": "np.vstack((A, B))",
    "canonical_cmd": "np.vstack((VAR_STR, VAR_STR))"
  },
  {
    "nl": "find the key associated with the largest value in dictionary `x` whilst key is non-zero value",
    "cmd": "max(k for k, v in x.items() if v != 0)",
    "question_id": "1555968-67",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#max",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "max(k  k, v  x.items()  v != )",
    "canonical_cmd": "max(k for k, v in VAR_STR.items() if v != 0)"
  },
  {
    "nl": "get the largest key whose not associated with value of 0 in dictionary `x`",
    "cmd": "(k for k, v in x.items() if v != 0)",
    "question_id": "1555968-13",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "(k  k, v  x.items()  v != )",
    "canonical_cmd": "(k for k, v in VAR_STR.items() if v != 0)"
  },
  {
    "nl": "get the largest key in a dictionary `x` with non-zero value",
    "cmd": "max(k for k, v in x.items() if v != 0)",
    "question_id": "1555968-62",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#max",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "max(k  k, v  x.items()  v != )",
    "canonical_cmd": "max(k for k, v in VAR_STR.items() if v != 0)"
  },
  {
    "nl": "Put the curser at beginning of the file",
    "cmd": "file.seek(0)",
    "question_id": "17021863-48",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.io#io.IOBase.seek"
    ],
    "clean_cmd": "file.seek()",
    "canonical_cmd": "file.seek(0)"
  },
  {
    "nl": "remove key 'ele' from dictionary `d`",
    "cmd": "del d['ele']",
    "question_id": "4175686-32",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": " d[]",
    "canonical_cmd": "del VAR_STR['VAR_STR']"
  },
  {
    "nl": "Update datetime field in `MyModel` to be the existing `timestamp` plus 100 years",
    "cmd": "MyModel.objects.update(timestamp=F('timestamp') + timedelta(days=36524.25))",
    "question_id": "5871168-82",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.timedelta",
      "python.library.turtle#turtle.update"
    ],
    "clean_cmd": "MyModel.objects.update(timestamp=F() + timedelta(days=36524.25))",
    "canonical_cmd": "VAR_STR.objects.update(VAR_STR=F('VAR_STR') + timedelta(days=36524.25))"
  },
  {
    "nl": "merge list `['it']` and list `['was']` and list `['annoying']` into one list",
    "cmd": "['it'] + ['was'] + ['annoying']",
    "question_id": "11574195-24",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[] + [] + []",
    "canonical_cmd": "[VAR_STR] + [VAR_STR] + [VAR_STR]"
  },
  {
    "nl": "increment a value with leading zeroes in a number `x`",
    "cmd": "str(int(x) + 1).zfill(len(x))",
    "question_id": "587647-44",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len",
      "python.library.functions#int",
      "python.library.stdtypes#str",
      "python.library.stdtypes#str.zfill"
    ],
    "clean_cmd": "str(int(x) + ).zfill(len(x))",
    "canonical_cmd": "str(int(VAR_STR) + 1).zfill(len(VAR_STR))"
  },
  {
    "nl": "check if a pandas dataframe `df`'s index is sorted",
    "cmd": "all(df.index[:-1] <= df.index[1:])",
    "question_id": "17315881-13",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#all"
    ],
    "clean_cmd": "all(df.index[:-] &lt;= df.index[:])",
    "canonical_cmd": "all(VAR_STR.index[:-1] <= VAR_STR.index[1:])"
  },
  {
    "nl": "insert data from a string `testfield` to sqlite db `c`",
    "cmd": "c.execute(\"INSERT INTO test VALUES (?, 'bar')\", (testfield,))",
    "question_id": "14695134-57",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.msilib#msilib.View.Execute"
    ],
    "clean_cmd": "c.execute(, (testfield,))",
    "canonical_cmd": "VAR_STR.execute(\"INSERT INTO test VALUES (?, 'bar')\", (VAR_STR,))"
  },
  {
    "nl": "decode string \"\\\\x89\\\\n\" into a normal string",
    "cmd": "\"\"\"\\\\x89\\\\n\"\"\".decode('string_escape')",
    "question_id": "24242433-92",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#bytearray.decode"
    ],
    "clean_cmd": ".decode()",
    "canonical_cmd": "\"\"\"VAR_STR\"\"\".decode('string_escape')"
  },
  {
    "nl": "convert a raw string `raw_string` into a normal string",
    "cmd": "raw_string.decode('string_escape')",
    "question_id": "24242433-9",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#bytearray.decode"
    ],
    "clean_cmd": "raw_string.decode()",
    "canonical_cmd": "VAR_STR.decode('string_escape')"
  },
  {
    "nl": "convert a raw string `raw_byte_string` into a normal string",
    "cmd": "raw_byte_string.decode('unicode_escape')",
    "question_id": "24242433-10",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#bytearray.decode"
    ],
    "clean_cmd": "raw_byte_string.decode()",
    "canonical_cmd": "VAR_STR.decode('unicode_escape')"
  },
  {
    "nl": "split a string `s` with into all strings of repeated characters",
    "cmd": "[m.group(0) for m in re.finditer('(\\\\d)\\\\1*', s)]",
    "question_id": "22882922-50",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.finditer",
      "python.library.re#re.Match.group"
    ],
    "clean_cmd": "[m.group()  m  re.finditer(, s)]",
    "canonical_cmd": "[m.group(0) for m in re.finditer('(\\\\d)\\\\1*', VAR_STR)]"
  },
  {
    "nl": "scatter a plot with x, y position of `np.random.randn(100)` and face color equal to none",
    "cmd": "plt.scatter(np.random.randn(100), np.random.randn(100), facecolors='none')",
    "question_id": "4143502-27",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.random.generated.numpy.random.randn"
    ],
    "clean_cmd": "plt.scatter(np.random.randn(), np.random.randn(), facecolors=)",
    "canonical_cmd": "plt.scatter(np.random.randn(100), np.random.randn(100), facecolors='none')"
  },
  {
    "nl": "do a scatter plot with empty circles",
    "cmd": "plt.plot(np.random.randn(100), np.random.randn(100), 'o', mfc='none')",
    "question_id": "4143502-6",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.random.generated.numpy.random.randn"
    ],
    "clean_cmd": "plt.plot(np.random.randn(), np.random.randn(), , mfc=)",
    "canonical_cmd": "plt.plot(np.random.randn(100), np.random.randn(100), 'o', mfc='none')"
  },
  {
    "nl": "filter rows containing key word `ball` in column `ids`",
    "cmd": "df[df['ids'].str.contains('ball')]",
    "question_id": "27975069-4",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.series.str.contains"
    ],
    "clean_cmd": "df[df[].str.contains()]",
    "canonical_cmd": "df[df['VAR_STR'].str.contains('VAR_STR')]"
  },
  {
    "nl": "convert index at level 0 into a column in dataframe `df`",
    "cmd": "df.reset_index(level=0, inplace=True)",
    "question_id": "20461165-28",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.reset_index"
    ],
    "clean_cmd": "df.reset_index(level=, inplace=True)",
    "canonical_cmd": "VAR_STR.reset_index(level=0, inplace=True)"
  },
  {
    "nl": "Add indexes in a data frame `df` to a column `index1`",
    "cmd": "df['index1'] = df.index",
    "question_id": "20461165-64",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "df[] = df.index",
    "canonical_cmd": "VAR_STR['VAR_STR'] = VAR_STR.index"
  },
  {
    "nl": "convert pandas index in a dataframe to columns",
    "cmd": "df.reset_index(level=['tick', 'obs'])",
    "question_id": "20461165-11",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.reset_index"
    ],
    "clean_cmd": "df.reset_index(level=[, ])",
    "canonical_cmd": "df.reset_index(level=['tick', 'obs'])"
  },
  {
    "nl": "Get reverse of list items from list 'b' using extended slicing",
    "cmd": "[x[::-1] for x in b]",
    "question_id": "4685571-89",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[x[::-]  x  b]",
    "canonical_cmd": "[x[::-1] for x in VAR_STR]"
  },
  {
    "nl": "convert list `list_of_ints` into a comma separated string",
    "cmd": "\"\"\",\"\"\".join([str(i) for i in list_of_ints])",
    "question_id": "438684-58",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": ".join([str(i)  i  list_of_ints])",
    "canonical_cmd": "\"\"\",\"\"\".join([str(i) for i in VAR_STR])"
  },
  {
    "nl": "Send a post request with raw data `DATA` and basic authentication with `username` and `password`",
    "cmd": "requests.post(url, data=DATA, headers=HEADERS_DICT, auth=(username, password))",
    "question_id": "8519922-8",
    "cmd_name": "conala",
    "oracle_man": [
      "pygame.ref.fastevent#pygame.fastevent.post"
    ],
    "clean_cmd": "requests.post(url, data=DATA, headers=HEADERS_DICT, auth=(username, password))",
    "canonical_cmd": "requests.post(url, data=VAR_STR, headers=HEADERS_DICT, auth=(VAR_STR, VAR_STR))"
  },
  {
    "nl": "Iterate ove list `[1, 2, 3]` using list comprehension",
    "cmd": "print([item for item in [1, 2, 3]])",
    "question_id": "22365172-32",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "print([item  item  [, , ]])",
    "canonical_cmd": "print([item for item in [VAR_STR]])"
  },
  {
    "nl": "extract all the values with keys 'x' and 'y' from a list of dictionaries `d` to list of tuples",
    "cmd": "[(x['x'], x['y']) for x in d]",
    "question_id": "12300912-9",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[(x[], x[])  x  d]",
    "canonical_cmd": "[(VAR_STR['VAR_STR'], VAR_STR['VAR_STR']) for VAR_STR in VAR_STR]"
  },
  {
    "nl": "get the filename without the extension from file 'hemanth.txt'",
    "cmd": "print(os.path.splitext(os.path.basename('hemanth.txt'))[0])",
    "question_id": "678236-88",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os.path#os.path.basename",
      "python.library.os.path#os.path.splitext"
    ],
    "clean_cmd": "print(os.path.splitext(os.path.basename())[])",
    "canonical_cmd": "print(os.path.splitext(os.path.basename('VAR_STR'))[0])"
  },
  {
    "nl": "create a dictionary by adding each two adjacent elements in tuple `x` as key/value pair to it",
    "cmd": "dict(x[i:i + 2] for i in range(0, len(x), 2))",
    "question_id": "2597166-54",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len",
      "python.library.functions#range",
      "python.library.stdtypes#dict"
    ],
    "clean_cmd": "dict(x[i:i + ]  i  range(, len(x), ))",
    "canonical_cmd": "dict(VAR_STR[i:i + 2] for i in range(0, len(VAR_STR), 2))"
  },
  {
    "nl": "create a list containing flattened list `[['A', 'B', 'C'], ['D', 'E', 'F'], ['G', 'H', 'I']]`",
    "cmd": "values = sum([['A', 'B', 'C'], ['D', 'E', 'F'], ['G', 'H', 'I']], [])",
    "question_id": "7895449-25",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sum"
    ],
    "clean_cmd": "values = sum([[, , ], [, , ], [, , ]], [])",
    "canonical_cmd": "values = sum([VAR_STR], [])"
  },
  {
    "nl": "select rows in a dataframe `df` column 'closing_price' between two values 99 and 101",
    "cmd": "df = df[(df['closing_price'] >= 99) & (df['closing_price'] <= 101)]",
    "question_id": "31617845-12",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "df = df[(df[] &gt;= ) &amp; (df[] &lt;= )]",
    "canonical_cmd": "VAR_STR = VAR_STR[(VAR_STR['VAR_STR'] >= 99) & (VAR_STR['VAR_STR'] <= 101)]"
  },
  {
    "nl": "replace all occurences of newlines `\\n` with `<br>` in dataframe `df`",
    "cmd": "df.replace({'\\n': '<br>'}, regex=True)",
    "question_id": "25698710-50",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.replace"
    ],
    "clean_cmd": "df.replace({: }, regex=True)",
    "canonical_cmd": "VAR_STR.replace({'VAR_STR': 'VAR_STR'}, regex=True)"
  },
  {
    "nl": "replace all occurrences of a string `\\n` by string `<br>` in a pandas data frame `df`",
    "cmd": "df.replace({'\\n': '<br>'}, regex=True)",
    "question_id": "25698710-24",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.replace"
    ],
    "clean_cmd": "df.replace({: }, regex=True)",
    "canonical_cmd": "VAR_STR.replace({'VAR_STR': 'VAR_STR'}, regex=True)"
  },
  {
    "nl": "create a list containing each two adjacent letters in string `word` as its elements",
    "cmd": "[(x + y) for x, y in zip(word, word[1:])]",
    "question_id": "41923858-90",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip"
    ],
    "clean_cmd": "[(x + y)  x, y  zip(word, word[:])]",
    "canonical_cmd": "[(x + y) for x, y in zip(VAR_STR, VAR_STR[1:])]"
  },
  {
    "nl": "Get a list of pairs from a string `word` using lambda function",
    "cmd": "list(map(lambda x, y: x + y, word[:-1], word[1:]))",
    "question_id": "41923858-58",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#map",
      "python.library.functions#list"
    ],
    "clean_cmd": "list(map( x, y: x + y, word[:-], word[:]))",
    "canonical_cmd": "list(map(lambda x, y: x + y, VAR_STR[:-1], VAR_STR[1:]))"
  },
  {
    "nl": "extract a url from a string `myString`",
    "cmd": "print(re.findall('(https?://[^\\\\s]+)', myString))",
    "question_id": "9760588-64",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "print(re.findall(, myString))",
    "canonical_cmd": "print(re.findall('(https?://[^\\\\s]+)', VAR_STR))"
  },
  {
    "nl": "extract a url from a string `myString`",
    "cmd": "print(re.search('(?P<url>https?://[^\\\\s]+)', myString).group('url'))",
    "question_id": "9760588-30",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.search",
      "python.library.re#re.Match.group"
    ],
    "clean_cmd": "print(re.search(, myString).group())",
    "canonical_cmd": "print(re.search('(?P<url>https?://[^\\\\s]+)', VAR_STR).group('url'))"
  },
  {
    "nl": "remove all special characters, punctuation and spaces from a string `mystring` using regex",
    "cmd": "re.sub('[^A-Za-z0-9]+', '', mystring)",
    "question_id": "5843518-46",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "re.sub(, , mystring)",
    "canonical_cmd": "re.sub('[^A-Za-z0-9]+', '', VAR_STR)"
  },
  {
    "nl": "create a DatetimeIndex containing 13 periods of the second friday of each month starting from date '2016-01-01'",
    "cmd": "pd.date_range('2016-01-01', freq='WOM-2FRI', periods=13)",
    "question_id": "36674519-26",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.date_range"
    ],
    "clean_cmd": "pd.date_range(, freq=, periods=)",
    "canonical_cmd": "pd.date_range('VAR_STR', freq='WOM-2FRI', periods=13)"
  },
  {
    "nl": "Create multidimensional array `matrix` with 3 rows and 2 columns in python",
    "cmd": "matrix = [[a, b], [c, d], [e, f]]",
    "question_id": "508657-65",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "matrix = [[a, b], [c, d], [e, f]]",
    "canonical_cmd": "VAR_STR = [[a, b], [c, d], [e, f]]"
  },
  {
    "nl": "replace spaces with underscore",
    "cmd": "mystring.replace(' ', '_')",
    "question_id": "1007481-89",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.replace"
    ],
    "clean_cmd": "mystring.replace(, )",
    "canonical_cmd": "mystring.replace(' ', '_')"
  },
  {
    "nl": "get an absolute file path of file 'mydir/myfile.txt'",
    "cmd": "os.path.abspath('mydir/myfile.txt')",
    "question_id": "51520-53",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os.path#os.path.abspath"
    ],
    "clean_cmd": "os.path.abspath()",
    "canonical_cmd": "os.path.abspath('VAR_STR')"
  },
  {
    "nl": "split string `my_string` on white spaces",
    "cmd": "\"\"\" \"\"\".join(my_string.split())",
    "question_id": "1249786-22",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.join",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": ".join(my_string.split())",
    "canonical_cmd": "\"\"\" \"\"\".join(VAR_STR.split())"
  },
  {
    "nl": "get filename without extension from file `filename`",
    "cmd": "os.path.splitext(filename)[0]",
    "question_id": "4444923-74",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os.path#os.path.splitext"
    ],
    "clean_cmd": "os.path.splitext(filename)[]",
    "canonical_cmd": "os.path.splitext(VAR_STR)[0]"
  },
  {
    "nl": "get a list containing the sum of each element `i` in list `l` plus the previous elements",
    "cmd": "[sum(l[:i]) for i, _ in enumerate(l)]",
    "question_id": "13728486-26",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#enumerate",
      "python.library.functions#sum"
    ],
    "clean_cmd": "[sum(l[:i])  i, _  enumerate(l)]",
    "canonical_cmd": "[sum(VAR_STR[:VAR_STR]) for VAR_STR, _ in enumerate(VAR_STR)]"
  },
  {
    "nl": "split a string `Docs/src/Scripts/temp` by `/` keeping `/` in the result",
    "cmd": "\"\"\"Docs/src/Scripts/temp\"\"\".replace('/', '/\\x00/').split('\\x00')",
    "question_id": "9743134-38",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.replace",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": ".replace(, ).split()",
    "canonical_cmd": "\"\"\"VAR_STR\"\"\".replace('VAR_STR', '/\\x00/').split('\\x00')"
  },
  {
    "nl": "copy all values in a column 'B' to a new column 'D' in a pandas data frame 'df'",
    "cmd": "df['D'] = df['B']",
    "question_id": "32675861-45",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "df[] = df[]",
    "canonical_cmd": "VAR_STR['VAR_STR'] = VAR_STR['VAR_STR']"
  },
  {
    "nl": "find a value within nested json 'data' where the key inside another key 'B' is unknown.",
    "cmd": "list(data['A']['B'].values())[0]['maindata'][0]['Info']",
    "question_id": "14227561-68",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list",
      "python.library.stdtypes#dict.values"
    ],
    "clean_cmd": "list(data[][].values())[][][][]",
    "canonical_cmd": "list(VAR_STR['A']['VAR_STR'].values())[0]['maindata'][0]['Info']"
  },
  {
    "nl": "check characters of string `string` are true predication of function `predicate`",
    "cmd": "all(predicate(x) for x in string)",
    "question_id": "14858916-19",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#all"
    ],
    "clean_cmd": "all(predicate(x)  x  string)",
    "canonical_cmd": "all(VAR_STR(x) for x in VAR_STR)"
  },
  {
    "nl": "convert string `user_input` into a list of integers `user_list`",
    "cmd": "user_list = [int(number) for number in user_input.split(',')]",
    "question_id": "6378889-70",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#int",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "user_list = [int(number)  number  user_input.split()]",
    "canonical_cmd": "VAR_STR = [int(number) for number in VAR_STR.split(',')]"
  },
  {
    "nl": "Get a list of integers by splitting  a string `user` with comma",
    "cmd": "[int(s) for s in user.split(',')]",
    "question_id": "6378889-60",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#int",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "[int(s)  s  user.split()]",
    "canonical_cmd": "[int(s) for s in VAR_STR.split(',')]"
  },
  {
    "nl": "Sorting a Python list by two criteria",
    "cmd": "sorted(list, key=lambda x: (x[0], -x[1]))",
    "question_id": "5212870-42",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted"
    ],
    "clean_cmd": "sorted(list, key= x: (x[], -x[]))",
    "canonical_cmd": "sorted(list, key=lambda x: (x[0], -x[1]))"
  },
  {
    "nl": "sort a list of objects `ut`, based on a function `cmpfun` in descending order",
    "cmd": "ut.sort(key=cmpfun, reverse=True)",
    "question_id": "403421-17",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#list.sort"
    ],
    "clean_cmd": "ut.sort(key=cmpfun, reverse=True)",
    "canonical_cmd": "VAR_STR.sort(key=VAR_STR, reverse=True)"
  },
  {
    "nl": "reverse list `ut` based on the `count` attribute of each object",
    "cmd": "ut.sort(key=lambda x: x.count, reverse=True)",
    "question_id": "403421-16",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#list.sort"
    ],
    "clean_cmd": "ut.sort(key= x: x.count, reverse=True)",
    "canonical_cmd": "VAR_STR.sort(key=lambda x: x.VAR_STR, reverse=True)"
  },
  {
    "nl": "sort a list of objects `ut` in reverse order by their `count` property",
    "cmd": "ut.sort(key=lambda x: x.count, reverse=True)",
    "question_id": "403421-6",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#list.sort"
    ],
    "clean_cmd": "ut.sort(key= x: x.count, reverse=True)",
    "canonical_cmd": "VAR_STR.sort(key=lambda x: x.VAR_STR, reverse=True)"
  },
  {
    "nl": "click a href button 'Send' with selenium",
    "cmd": "driver.find_element_by_partial_link_text('Send').click()",
    "question_id": "19601086-79",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "driver.find_element_by_partial_link_text().click()",
    "canonical_cmd": "driver.find_element_by_partial_link_text('VAR_STR').click()"
  },
  {
    "nl": "click a href button having text `Send InMail` with selenium",
    "cmd": "driver.findElement(By.linkText('Send InMail')).click()",
    "question_id": "19601086-21",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "driver.findElement(By.linkText()).click()",
    "canonical_cmd": "driver.findElement(By.linkText('VAR_STR')).click()"
  },
  {
    "nl": "click a href button with text 'Send InMail' with selenium",
    "cmd": "driver.find_element_by_link_text('Send InMail').click()",
    "question_id": "19601086-18",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "driver.find_element_by_link_text().click()",
    "canonical_cmd": "driver.find_element_by_link_text('VAR_STR').click()"
  },
  {
    "nl": "cast an int `i` to a string and concat to string 'ME'",
    "cmd": "'ME' + str(i)",
    "question_id": "3944876-69",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str"
    ],
    "clean_cmd": " + str(i)",
    "canonical_cmd": "'VAR_STR' + str(VAR_STR)"
  },
  {
    "nl": "Sorting data in DataFrame Pandas",
    "cmd": "df.sort_values(['System_num', 'Dis'])",
    "question_id": "40903174-8",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.sort_values"
    ],
    "clean_cmd": "df.sort_values([, ])",
    "canonical_cmd": "df.sort_values(['System_num', 'Dis'])"
  },
  {
    "nl": "sort a list `l` by length of value in tuple",
    "cmd": "l.sort(key=lambda t: len(t[1]), reverse=True)",
    "question_id": "19729928-41",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len",
      "python.library.stdtypes#list.sort"
    ],
    "clean_cmd": "l.sort(key= t: len(t[]), reverse=True)",
    "canonical_cmd": "VAR_STR.sort(key=lambda t: len(t[1]), reverse=True)"
  },
  {
    "nl": "split string `s` by words that ends with 'd'",
    "cmd": "re.findall('\\\\b(\\\\w+)d\\\\b', s)",
    "question_id": "31371879-21",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "re.findall(, s)",
    "canonical_cmd": "re.findall('\\\\b(\\\\w+)d\\\\b', VAR_STR)"
  },
  {
    "nl": "convert elements of each tuple in list `l` into a string  separated by character `@`",
    "cmd": "\"\"\" \"\"\".join([('%d@%d' % t) for t in l])",
    "question_id": "4284648-18",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": ".join([( % t)  t  l])",
    "canonical_cmd": "\"\"\" \"\"\".join([('%d@%d' % t) for t in VAR_STR])"
  },
  {
    "nl": "convert each tuple in list `l` to a string with '@' separating the tuples' elements",
    "cmd": "\"\"\" \"\"\".join([('%d@%d' % (t[0], t[1])) for t in l])",
    "question_id": "4284648-27",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": ".join([( % (t[], t[]))  t  l])",
    "canonical_cmd": "\"\"\" \"\"\".join([('%d@%d' % (t[0], t[1])) for t in VAR_STR])"
  },
  {
    "nl": "get the html from the current web page of a Selenium driver",
    "cmd": "driver.execute_script('return document.documentElement.outerHTML;')",
    "question_id": "26809954-5",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "driver.execute_script()",
    "canonical_cmd": "driver.execute_script('return document.documentElement.outerHTML;')"
  },
  {
    "nl": "Get all matches with regex pattern `\\\\d+[xX]` in list of string `teststr`",
    "cmd": "[i for i in teststr if re.search('\\\\d+[xX]', i)]",
    "question_id": "29696641-55",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.search"
    ],
    "clean_cmd": "[i  i  teststr  re.search(, i)]",
    "canonical_cmd": "[i for i in VAR_STR if re.search('VAR_STR', i)]"
  },
  {
    "nl": "select values from column 'A' for which corresponding values in column 'B' will be greater than 50, and in column 'C' - equal 900 in dataframe `df`",
    "cmd": "df['A'][(df['B'] > 50) & (df['C'] == 900)]",
    "question_id": "15315452-50",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "df[][(df[] &gt; ) &amp; (df[] == )]",
    "canonical_cmd": "VAR_STR['VAR_STR'][(VAR_STR['VAR_STR'] > 50) & (VAR_STR['VAR_STR'] == 900)]"
  },
  {
    "nl": "Sort dictionary `o` in ascending order based on its keys and items",
    "cmd": "sorted(o.items())",
    "question_id": "4642501-94",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "sorted(o.items())",
    "canonical_cmd": "sorted(VAR_STR.items())"
  },
  {
    "nl": "get sorted list of keys of dict `d`",
    "cmd": "sorted(d)",
    "question_id": "4642501-41",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted"
    ],
    "clean_cmd": "sorted(d)",
    "canonical_cmd": "sorted(VAR_STR)"
  },
  {
    "nl": "How to sort dictionaries by keys in Python",
    "cmd": "sorted(d.items())",
    "question_id": "4642501-26",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "sorted(d.items())",
    "canonical_cmd": "sorted(d.items())"
  },
  {
    "nl": "convert string \"1\" into integer",
    "cmd": "int('1')",
    "question_id": "642154-86",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#int"
    ],
    "clean_cmd": "int()",
    "canonical_cmd": "int('VAR_STR')"
  },
  {
    "nl": "function to convert strings into integers",
    "cmd": "int()",
    "question_id": "642154-48",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#int"
    ],
    "clean_cmd": "int()",
    "canonical_cmd": "int()"
  },
  {
    "nl": "convert items in `T1` to integers",
    "cmd": "T2 = [map(int, x) for x in T1]",
    "question_id": "642154-35",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#map"
    ],
    "clean_cmd": "T2 = [map(int, x)  x  T1]",
    "canonical_cmd": "T2 = [map(int, x) for x in VAR_STR]"
  },
  {
    "nl": "call a shell script `./test.sh` using subprocess",
    "cmd": "subprocess.call(['./test.sh'])",
    "question_id": "3777301-68",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.subprocess#subprocess.call"
    ],
    "clean_cmd": "subprocess.call([])",
    "canonical_cmd": "subprocess.call(['VAR_STR'])"
  },
  {
    "nl": "call a shell script `notepad` using subprocess",
    "cmd": "subprocess.call(['notepad'])",
    "question_id": "3777301-80",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.subprocess#subprocess.call"
    ],
    "clean_cmd": "subprocess.call([])",
    "canonical_cmd": "subprocess.call(['VAR_STR'])"
  },
  {
    "nl": "combine lists `l1` and `l2`  by alternating their elements",
    "cmd": "[val for pair in zip(l1, l2) for val in pair]",
    "question_id": "7946798-38",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip"
    ],
    "clean_cmd": "[val  pair  zip(l1, l2)  val  pair]",
    "canonical_cmd": "[val for pair in zip(VAR_STR, VAR_STR) for val in pair]"
  },
  {
    "nl": "parse tab-delimited CSV file 'text.txt' into a list",
    "cmd": "lol = list(csv.reader(open('text.txt', 'rb'), delimiter='\\t'))",
    "question_id": "7856296-87",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.csv#csv.reader",
      "python.library.functions#list",
      "python.library.urllib.request#open"
    ],
    "clean_cmd": "lol = list(csv.reader(open(, ), delimiter=))",
    "canonical_cmd": "lol = list(csv.reader(open('VAR_STR', 'rb'), delimiter='\\t'))"
  },
  {
    "nl": "group a list of dicts `LD` into one dict by key",
    "cmd": "print(dict(zip(LD[0], zip(*[list(d.values()) for d in LD]))))",
    "question_id": "5558418-19",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip",
      "python.library.stdtypes#dict",
      "python.library.functions#list",
      "python.library.stdtypes#dict.values"
    ],
    "clean_cmd": "print(dict(zip(LD[], zip(*[list(d.values())  d  LD]))))",
    "canonical_cmd": "print(dict(zip(VAR_STR[0], zip(*[list(d.values()) for d in VAR_STR]))))"
  },
  {
    "nl": "How do I sum the first value in each tuple in a list of tuples in Python?",
    "cmd": "sum([pair[0] for pair in list_of_pairs])",
    "question_id": "638048-68",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sum"
    ],
    "clean_cmd": "sum([pair[]  pair  list_of_pairs])",
    "canonical_cmd": "sum([pair[0] for pair in list_of_pairs])"
  },
  {
    "nl": "convert unicode string u\"{'code1':1,'code2':1}\" into dictionary",
    "cmd": "d = ast.literal_eval(\"{'code1':1,'code2':1}\")",
    "question_id": "14950260-11",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.ast#ast.literal_eval"
    ],
    "clean_cmd": "d = ast.literal_eval()",
    "canonical_cmd": "d = ast.literal_eval('VAR_STR')"
  },
  {
    "nl": "find all words in a string `mystring` that start with the `$` sign",
    "cmd": "[word for word in mystring.split() if word.startswith('$')]",
    "question_id": "11416772-97",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.startswith",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "[word  word  mystring.split()  word.startswith()]",
    "canonical_cmd": "[word for word in VAR_STR.split() if word.startswith('VAR_STR')]"
  },
  {
    "nl": "remove any url within string `text`",
    "cmd": "text = re.sub('^https?:\\\\/\\\\/.*[\\\\r\\\\n]*', '', text, flags=re.MULTILINE)",
    "question_id": "11331982-33",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "text = re.sub(, , text, flags=re.MULTILINE)",
    "canonical_cmd": "VAR_STR = re.sub('^https?:\\\\/\\\\/.*[\\\\r\\\\n]*', '', VAR_STR, flags=re.MULTILINE)"
  },
  {
    "nl": "running r script '/pathto/MyrScript.r' from python",
    "cmd": "subprocess.call(['/usr/bin/Rscript', '--vanilla', '/pathto/MyrScript.r'])",
    "question_id": "19894365-51",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.subprocess#subprocess.call"
    ],
    "clean_cmd": "subprocess.call([, , ])",
    "canonical_cmd": "subprocess.call(['/usr/bin/Rscript', '--vanilla', 'VAR_STR'])"
  },
  {
    "nl": "run r script '/usr/bin/Rscript --vanilla /pathto/MyrScript.r'",
    "cmd": "subprocess.call('/usr/bin/Rscript --vanilla /pathto/MyrScript.r', shell=True)",
    "question_id": "19894365-72",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.subprocess#subprocess.call"
    ],
    "clean_cmd": "subprocess.call(, shell=True)",
    "canonical_cmd": "subprocess.call('VAR_STR', shell=True)"
  },
  {
    "nl": "replacing nan in the dataframe `df` with row average",
    "cmd": "df.fillna(df.mean(axis=1), axis=1)",
    "question_id": "33058590-67",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.fillna",
      "pandas.reference.api.pandas.dataframe.mean"
    ],
    "clean_cmd": "df.fillna(df.mean(axis=), axis=)",
    "canonical_cmd": "VAR_STR.fillna(VAR_STR.mean(axis=1), axis=1)"
  },
  {
    "nl": "Convert unix timestamp '1347517370' to formatted string '%Y-%m-%d %H:%M:%S'",
    "cmd": "time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(1347517370))",
    "question_id": "12400256-14",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.time#time.localtime",
      "python.library.time#time.strftime"
    ],
    "clean_cmd": "time.strftime(, time.localtime())",
    "canonical_cmd": "time.strftime('VAR_STR', time.localtime(1347517370))"
  },
  {
    "nl": "Call a base class's class method `do` from derived class `Derived`",
    "cmd": "super(Derived, cls).do(a)",
    "question_id": "1269217-89",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#super"
    ],
    "clean_cmd": "super(Derived, cls).do(a)",
    "canonical_cmd": "super(VAR_STR, cls).VAR_STR(a)"
  },
  {
    "nl": "separate words delimited by one or more spaces into a list",
    "cmd": "re.split(' +', 'hello world sample text')",
    "question_id": "4383082-35",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.split"
    ],
    "clean_cmd": "re.split(, )",
    "canonical_cmd": "re.split(' +', 'hello world sample text')"
  },
  {
    "nl": "length of longest element in list `words`",
    "cmd": "len(max(words, key=len))",
    "question_id": "14637696-27",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len",
      "python.library.functions#max"
    ],
    "clean_cmd": "len(max(words, key=len))",
    "canonical_cmd": "len(max(VAR_STR, key=len))"
  },
  {
    "nl": "get the value associated with unicode key 'from_user' of first dictionary in list `result`",
    "cmd": "result[0]['from_user']",
    "question_id": "3933478-38",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "result[][]",
    "canonical_cmd": "VAR_STR[0]['VAR_STR']"
  },
  {
    "nl": "Retrieve each line from a file 'File.txt' as a list",
    "cmd": "[line.split() for line in open('File.txt')]",
    "question_id": "39112645-91",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.request#open",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "[line.split()  line  open()]",
    "canonical_cmd": "[line.split() for line in open('VAR_STR')]"
  },
  {
    "nl": "swap keys with values in a dictionary `a`",
    "cmd": "res = dict((v, k) for k, v in a.items())",
    "question_id": "1031851-75",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "res = dict((v, k)  k, v  a.items())",
    "canonical_cmd": "res = dict((v, k) for k, v in VAR_STR.items())"
  },
  {
    "nl": "Open a file `path/to/FILE_NAME.ext` in write mode",
    "cmd": "new_file = open('path/to/FILE_NAME.ext', 'w')",
    "question_id": "8577137-50",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.request#open"
    ],
    "clean_cmd": "new_file = open(, )",
    "canonical_cmd": "new_file = open('VAR_STR', 'w')"
  },
  {
    "nl": "How to count distinct values in a column of a pandas group by object?",
    "cmd": "df.groupby(['col1', 'col2'])['col3'].nunique().reset_index()",
    "question_id": "17926273-6",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.groupby",
      "pandas.reference.api.pandas.dataframe.reset_index",
      "pandas.reference.api.pandas.dataframe.nunique"
    ],
    "clean_cmd": "df.groupby([, ])[].nunique().reset_index()",
    "canonical_cmd": "df.groupby(['col1', 'col2'])['col3'].nunique().reset_index()"
  },
  {
    "nl": "Check if any key in the dictionary `dict1` starts with the string `EMP$$`",
    "cmd": "any(key.startswith('EMP$$') for key in dict1)",
    "question_id": "3735814-44",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#any",
      "python.library.stdtypes#str.startswith"
    ],
    "clean_cmd": "any(key.startswith()  key  dict1)",
    "canonical_cmd": "any(key.startswith('VAR_STR') for key in VAR_STR)"
  },
  {
    "nl": "create list of values from dictionary `dict1` that have a key that starts with 'EMP$$'",
    "cmd": "[value for key, value in list(dict1.items()) if key.startswith('EMP$$')]",
    "question_id": "3735814-56",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list",
      "python.library.stdtypes#str.startswith",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "[value  key, value  list(dict1.items())  key.startswith()]",
    "canonical_cmd": "[value for key, value in list(VAR_STR.items()) if key.startswith('VAR_STR')]"
  },
  {
    "nl": "print elements of list `list` seperated by tabs `\\t`",
    "cmd": "print('\\t'.join(map(str, list)))",
    "question_id": "4048964-99",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#map",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": "print(.join(map(str, list)))",
    "canonical_cmd": "print('VAR_STR'.join(map(str, VAR_STR)))"
  },
  {
    "nl": "print unicode string '\\xd0\\xbf\\xd1\\x80\\xd0\\xb8' with utf-8",
    "cmd": "print('\\xd0\\xbf\\xd1\\x80\\xd0\\xb8'.encode('raw_unicode_escape'))",
    "question_id": "3182716-62",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.encode"
    ],
    "clean_cmd": "print(.encode())",
    "canonical_cmd": "print('VAR_STR'.encode('raw_unicode_escape'))"
  },
  {
    "nl": "Encode a latin character in string `Sopet\\xc3\\xb3n` properly",
    "cmd": "'Sopet\\xc3\\xb3n'.encode('latin-1').decode('utf-8')",
    "question_id": "3182716-21",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.encode",
      "python.library.stdtypes#bytearray.decode"
    ],
    "clean_cmd": ".encode().decode()",
    "canonical_cmd": "\"\"\"VAR_STR\"\"\".encode('latin-1').decode('utf-8')"
  },
  {
    "nl": "regex, find \"n\"s only in the middle of string `s`",
    "cmd": "re.findall('n(?<=[^n]n)n+(?=[^n])(?i)', s)",
    "question_id": "35622945-39",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "re.findall(, s)",
    "canonical_cmd": "re.findall('n(?<=[^n]n)n+(?=[^n])(?i)', VAR_STR)"
  },
  {
    "nl": "display the float `1/3*100` as a percentage",
    "cmd": "print('{0:.0f}%'.format(1.0 / 3 * 100))",
    "question_id": "5306756-0",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#format"
    ],
    "clean_cmd": "print(.format(1.0 /  * ))",
    "canonical_cmd": "print('{0:.0f}%'.format(1.0 / 3 * 100))"
  },
  {
    "nl": "sort a list of dictionary `mylist` by the key `title`",
    "cmd": "mylist.sort(key=lambda x: x['title'])",
    "question_id": "2878084-9",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#list.sort"
    ],
    "clean_cmd": "mylist.sort(key= x: x[])",
    "canonical_cmd": "VAR_STR.sort(key=lambda x: x['VAR_STR'])"
  },
  {
    "nl": "sort a list `l` of dicts by dict value 'title'",
    "cmd": "l.sort(key=lambda x: x['title'])",
    "question_id": "2878084-61",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#list.sort"
    ],
    "clean_cmd": "l.sort(key= x: x[])",
    "canonical_cmd": "VAR_STR.sort(key=lambda x: x['VAR_STR'])"
  },
  {
    "nl": "sort a list of dictionaries by the value of keys 'title', 'title_url', 'id' in ascending order.",
    "cmd": "l.sort(key=lambda x: (x['title'], x['title_url'], x['id']))",
    "question_id": "2878084-50",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#list.sort"
    ],
    "clean_cmd": "l.sort(key= x: (x[], x[], x[]))",
    "canonical_cmd": "l.sort(key=lambda x: (x['VAR_STR'], x['VAR_STR'], x['VAR_STR']))"
  },
  {
    "nl": "write records in dataframe `df` to table 'test' in schema 'a_schema'",
    "cmd": "df.to_sql('test', engine, schema='a_schema')",
    "question_id": "24189150-87",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.to_sql"
    ],
    "clean_cmd": "df.to_sql(, engine, schema=)",
    "canonical_cmd": "VAR_STR.to_sql('VAR_STR', engine, schema='VAR_STR')"
  },
  {
    "nl": "Extract brackets from string `s`",
    "cmd": "brackets = re.sub('[^(){}[\\\\]]', '', s)",
    "question_id": "30766151-61",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "brackets = re.sub(, , s)",
    "canonical_cmd": "brackets = re.sub('[^(){}[\\\\]]', '', VAR_STR)"
  },
  {
    "nl": "remove duplicate elements from list 'L'",
    "cmd": "list(dict((x[0], x) for x in L).values())",
    "question_id": "1143379-23",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict",
      "python.library.functions#list",
      "python.library.stdtypes#dict.values"
    ],
    "clean_cmd": "list(dict((x[], x)  x  L).values())",
    "canonical_cmd": "list(dict((x[0], x) for x in VAR_STR).values())"
  },
  {
    "nl": "read a file `file` without newlines",
    "cmd": "[line.rstrip('\\n') for line in file]",
    "question_id": "12330522-51",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.rstrip"
    ],
    "clean_cmd": "[line.rstrip()  line  file]",
    "canonical_cmd": "[line.rstrip('\\n') for line in VAR_STR]"
  },
  {
    "nl": "get the position of item 1 in `testlist`",
    "cmd": "[i for (i, x) in enumerate(testlist) if (x == 1)]",
    "question_id": "364621-80",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#enumerate"
    ],
    "clean_cmd": "[i  (i, x)  enumerate(testlist)  (x == )]",
    "canonical_cmd": "[i for i, x in enumerate(VAR_STR) if x == 1]"
  },
  {
    "nl": "get the position of item 1 in `testlist`",
    "cmd": "[i for (i, x) in enumerate(testlist) if (x == 1)]",
    "question_id": "364621-61",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#enumerate"
    ],
    "clean_cmd": "[i  (i, x)  enumerate(testlist)  (x == )]",
    "canonical_cmd": "[i for i, x in enumerate(VAR_STR) if x == 1]"
  },
  {
    "nl": "get the position of item 1 in `testlist`",
    "cmd": "for i in [i for (i, x) in enumerate(testlist) if (x == 1)]:\n    pass",
    "question_id": "364621-20",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#enumerate"
    ],
    "clean_cmd": " i  [i  (i, x)  enumerate(testlist)  (x == )]:",
    "canonical_cmd": "for i in [i for i, x in enumerate(VAR_STR) if x == 1]:\n    pass"
  },
  {
    "nl": "get the position of item 1 in `testlist`",
    "cmd": "for i in (i for (i, x) in enumerate(testlist) if (x == 1)):\n    pass",
    "question_id": "364621-36",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#enumerate"
    ],
    "clean_cmd": " i  (i  (i, x)  enumerate(testlist)  (x == )):",
    "canonical_cmd": "for i in (i for i, x in enumerate(VAR_STR) if x == 1):\n    pass"
  },
  {
    "nl": "get the position of item 1 in `testlist`",
    "cmd": "gen = (i for (i, x) in enumerate(testlist) if (x == 1))\nfor i in gen:\n    pass",
    "question_id": "364621-75",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#enumerate"
    ],
    "clean_cmd": "gen = (i  (i, x)  enumerate(testlist)  (x == )) i  gen:",
    "canonical_cmd": "gen = (i for i, x in enumerate(VAR_STR) if x == 1)\nfor i in gen:\n    pass"
  },
  {
    "nl": "get the position of item `element` in list `testlist`",
    "cmd": "print(testlist.index(element))",
    "question_id": "364621-47",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.index"
    ],
    "clean_cmd": "print(testlist.index(element))",
    "canonical_cmd": "print(VAR_STR.index(VAR_STR))"
  },
  {
    "nl": "get the position of item `element` in list `testlist`",
    "cmd": "try:\n    print(testlist.index(element))\nexcept ValueError:\n    pass",
    "question_id": "364621-83",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.index"
    ],
    "clean_cmd": ":print(testlist.index(element)) ValueError:",
    "canonical_cmd": "try:\n    print(VAR_STR.index(VAR_STR))\nexcept ValueError:\n    pass"
  },
  {
    "nl": "find the first element of the tuple with the maximum second element in a list of tuples `lis`",
    "cmd": "max(lis, key=lambda item: item[1])[0]",
    "question_id": "13145368-76",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#max"
    ],
    "clean_cmd": "max(lis, key= item: item[])[]",
    "canonical_cmd": "max(VAR_STR, key=lambda item: item[1])[0]"
  },
  {
    "nl": "get the item at index 0 from the tuple that has maximum value at index 1 in list `lis`",
    "cmd": "max(lis, key=itemgetter(1))[0]",
    "question_id": "13145368-44",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#max",
      "python.library.operator#operator.itemgetter"
    ],
    "clean_cmd": "max(lis, key=itemgetter())[]",
    "canonical_cmd": "max(VAR_STR, key=itemgetter(1))[0]"
  },
  {
    "nl": "Make a delay of 1 second",
    "cmd": "time.sleep(1)",
    "question_id": "2689189-26",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.time#time.sleep"
    ],
    "clean_cmd": "time.sleep()",
    "canonical_cmd": "time.sleep(1)"
  },
  {
    "nl": "convert list of tuples `L` to a string",
    "cmd": "\"\"\", \"\"\".join('(' + ', '.join(i) + ')' for i in L)",
    "question_id": "12485244-21",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": ".join( + .join(i) +   i  L)",
    "canonical_cmd": "\"\"\", \"\"\".join('(' + ', '.join(i) + ')' for i in VAR_STR)"
  },
  {
    "nl": "Django set default value of field `b` equal to '0000000'",
    "cmd": "b = models.CharField(max_length=7, default='0000000', editable=False)",
    "question_id": "755857-66",
    "cmd_name": "conala",
    "oracle_man": [
      "django.ref.forms.fields#django.forms.CharField"
    ],
    "clean_cmd": "b = models.CharField(max_length=, default=, editable=False)",
    "canonical_cmd": "VAR_STR = models.CharField(max_length=7, default='VAR_STR', editable=False)"
  },
  {
    "nl": "convert a list  into a generator object",
    "cmd": "(n for n in [1, 2, 3, 5])",
    "question_id": "16041405-85",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "(n  n  [, , , ])",
    "canonical_cmd": "(n for n in [1, 2, 3, 5])"
  },
  {
    "nl": "remove elements from list `oldlist` that have an index number mentioned in list `removelist`",
    "cmd": "newlist = [v for i, v in enumerate(oldlist) if i not in removelist]",
    "question_id": "18837607-60",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#enumerate"
    ],
    "clean_cmd": "newlist = [v  i, v  enumerate(oldlist)  i   removelist]",
    "canonical_cmd": "newlist = [v for i, v in enumerate(VAR_STR) if i not in VAR_STR]"
  },
  {
    "nl": "Open a file `yourfile.txt` in write mode",
    "cmd": "f = open('yourfile.txt', 'w')",
    "question_id": "4710067-84",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.request#open"
    ],
    "clean_cmd": "f = open(, )",
    "canonical_cmd": "f = open('VAR_STR', 'w')"
  },
  {
    "nl": "convert tuple of tuples `(('aa',), ('bb',), ('cc',))` to tuple",
    "cmd": "from functools import reduce\nreduce(lambda a, b: a + b, (('aa',), ('bb',), ('cc',)))",
    "question_id": "8171751-92",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "from functools import reducereduce( a, b: a + b, ((,), (,), (,)))",
    "canonical_cmd": "from functools import reduce\nreduce(lambda a, b: a + b, (VAR_STR))"
  },
  {
    "nl": "convert tuple of tuples `(('aa',), ('bb',), ('cc',))` to list in one line",
    "cmd": "map(lambda a: a[0], (('aa',), ('bb',), ('cc',)))",
    "question_id": "8171751-59",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#map"
    ],
    "clean_cmd": "map( a: a[], ((,), (,), (,)))",
    "canonical_cmd": "map(lambda a: a[0], (VAR_STR))"
  },
  {
    "nl": "Python Pandas: How to replace a characters in a column of a dataframe?",
    "cmd": "df['range'].replace(',', '-', inplace=True)",
    "question_id": "28986489-84",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.replace"
    ],
    "clean_cmd": "df[].replace(, , inplace=True)",
    "canonical_cmd": "df['range'].replace(',', '-', inplace=True)"
  },
  {
    "nl": "unzip the list `[('a', 1), ('b', 2), ('c', 3), ('d', 4)]`",
    "cmd": "zip(*[('a', 1), ('b', 2), ('c', 3), ('d', 4)])",
    "question_id": "19339-13",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip"
    ],
    "clean_cmd": "zip(*[(, ), (, ), (, ), (, )])",
    "canonical_cmd": "zip(*[VAR_STR])"
  },
  {
    "nl": "unzip the list `[('a', 1), ('b', 2), ('c', 3), ('d', 4)]`",
    "cmd": "zip(*[('a', 1), ('b', 2), ('c', 3), ('d', 4)])",
    "question_id": "19339-3",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip"
    ],
    "clean_cmd": "zip(*[(, ), (, ), (, ), (, )])",
    "canonical_cmd": "zip(*[VAR_STR])"
  },
  {
    "nl": "unzip list `original`",
    "cmd": "result = ([a for (a, b) in original], [b for (a, b) in original])",
    "question_id": "19339-46",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "result = ([a  (a, b)  original], [b  (a, b)  original])",
    "canonical_cmd": "result = [a for a, b in VAR_STR], [b for a, b in VAR_STR]"
  },
  {
    "nl": "unzip list `original` and return a generator",
    "cmd": "result = ((a for (a, b) in original), (b for (a, b) in original))",
    "question_id": "19339-60",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "result = ((a  (a, b)  original), (b  (a, b)  original))",
    "canonical_cmd": "result = (a for a, b in VAR_STR), (b for a, b in VAR_STR)"
  },
  {
    "nl": "unzip list `[('a', 1), ('b', 2), ('c', 3), ('d', 4), ('e', )]`",
    "cmd": "zip(*[('a', 1), ('b', 2), ('c', 3), ('d', 4), ('e',)])",
    "question_id": "19339-8",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip"
    ],
    "clean_cmd": "zip(*[(, ), (, ), (, ), (, ), (,)])",
    "canonical_cmd": "zip(*[('a', 1), ('b', 2), ('c', 3), ('d', 4), ('e',)])"
  },
  {
    "nl": "unzip list `[('a', 1), ('b', 2), ('c', 3), ('d', 4), ('e', )]` and fill empty results with None",
    "cmd": "map(None, *[('a', 1), ('b', 2), ('c', 3), ('d', 4), ('e',)])",
    "question_id": "19339-17",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#map"
    ],
    "clean_cmd": "map(None, *[(, ), (, ), (, ), (, ), (,)])",
    "canonical_cmd": "map(None, *[('a', 1), ('b', 2), ('c', 3), ('d', 4), ('e',)])"
  },
  {
    "nl": "Add key \"mynewkey\" to dictionary `d` with value \"mynewvalue\"",
    "cmd": "d['mynewkey'] = 'mynewvalue'",
    "question_id": "1024847-42",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "d[] = ",
    "canonical_cmd": "VAR_STR['VAR_STR'] = 'VAR_STR'"
  },
  {
    "nl": "Add key 'a' to dictionary `data` with value 1",
    "cmd": "data.update({'a': 1, })",
    "question_id": "1024847-44",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict.update"
    ],
    "clean_cmd": "data.update({: , })",
    "canonical_cmd": "VAR_STR.update({'VAR_STR': 1})"
  },
  {
    "nl": "Add key 'a' to dictionary `data` with value 1",
    "cmd": "data.update(dict(a=1))",
    "question_id": "1024847-96",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict",
      "python.library.stdtypes#dict.update"
    ],
    "clean_cmd": "data.update(dict(a=))",
    "canonical_cmd": "VAR_STR.update(dict(VAR_STR=1))"
  },
  {
    "nl": "Add key 'a' to dictionary `data` with value 1",
    "cmd": "data.update(a=1)",
    "question_id": "1024847-96",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict",
      "python.library.stdtypes#dict.update"
    ],
    "clean_cmd": "data.update(a=)",
    "canonical_cmd": "VAR_STR.update(VAR_STR=1)"
  },
  {
    "nl": "find maximal value in matrix `matrix`",
    "cmd": "max([max(i) for i in matrix])",
    "question_id": "35837346-37",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#max"
    ],
    "clean_cmd": "max([max(i)  i  matrix])",
    "canonical_cmd": "max([max(i) for i in VAR_STR])"
  },
  {
    "nl": "Round number `answer` to 2 precision after the decimal point",
    "cmd": "answer = str(round(answer, 2))",
    "question_id": "20457038-38",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#round",
      "python.library.stdtypes#str"
    ],
    "clean_cmd": "answer = str(round(answer, ))",
    "canonical_cmd": "VAR_STR = str(round(VAR_STR, 2))"
  },
  {
    "nl": "extract ip address from an html string",
    "cmd": "ip = re.findall('[0-9]+(?:\\\\.[0-9]+){3}', s)",
    "question_id": "2890896-87",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "ip = re.findall(, s)",
    "canonical_cmd": "ip = re.findall('[0-9]+(?:\\\\.[0-9]+){3}', s)"
  },
  {
    "nl": "append each line in file `myfile` into a list",
    "cmd": "[x for x in myfile.splitlines() if x != '']",
    "question_id": "2545397-74",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.splitlines"
    ],
    "clean_cmd": "[x  x  myfile.splitlines()  x != ]",
    "canonical_cmd": "[x for x in VAR_STR.splitlines() if x != '']"
  },
  {
    "nl": "Get a list of integers `lst` from a file `filename.txt`",
    "cmd": "lst = map(int, open('filename.txt').readlines())",
    "question_id": "2545397-42",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#map",
      "python.library.urllib.request#open",
      "python.library.io#io.IOBase.readlines"
    ],
    "clean_cmd": "lst = map(int, open().readlines())",
    "canonical_cmd": "VAR_STR = map(int, open('VAR_STR').readlines())"
  },
  {
    "nl": "Python split a string using regex",
    "cmd": "re.findall('(.+?):(.+?)\\\\b ?', text)",
    "question_id": "16330838-72",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "re.findall(, text)",
    "canonical_cmd": "re.findall('(.+?):(.+?)\\\\b ?', text)"
  },
  {
    "nl": "generate all 2-element subsets of tuple `(1, 2, 3)`",
    "cmd": "list(itertools.combinations((1, 2, 3), 2))",
    "question_id": "7378180-24",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.itertools#itertools.combinations",
      "python.library.functions#list"
    ],
    "clean_cmd": "list(itertools.combinations((, , ), ))",
    "canonical_cmd": "list(itertools.combinations((VAR_STR), 2))"
  },
  {
    "nl": "Get a new list `list2`by removing empty list from a list of lists `list1`",
    "cmd": "list2 = [x for x in list1 if x != []]",
    "question_id": "4842956-95",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "list2 = [x  x  list1  x != []]",
    "canonical_cmd": "VAR_STR = [x for x in VAR_STR if x != []]"
  },
  {
    "nl": "Create `list2` to contain the lists from list `list1` excluding the empty lists from `list1`",
    "cmd": "list2 = [x for x in list1 if x]",
    "question_id": "4842956-35",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "list2 = [x  x  list1  x]",
    "canonical_cmd": "VAR_STR = [x for x in VAR_STR if x]"
  },
  {
    "nl": "get all text that is not enclosed within square brackets in string `example_str`",
    "cmd": "re.findall('(.*?)\\\\[.*?\\\\]', example_str)",
    "question_id": "17284947-68",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "re.findall(, example_str)",
    "canonical_cmd": "re.findall('(.*?)\\\\[.*?\\\\]', VAR_STR)"
  },
  {
    "nl": "Use a regex to get all text in a string `example_str` that is not surrounded by square brackets",
    "cmd": "re.findall('(.*?)(?:\\\\[.*?\\\\]|$)', example_str)",
    "question_id": "17284947-88",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "re.findall(, example_str)",
    "canonical_cmd": "re.findall('(.*?)(?:\\\\[.*?\\\\]|$)', VAR_STR)"
  },
  {
    "nl": "get whatever is between parentheses as a single match, and any char outside as an individual match in string '(zyx)bc'",
    "cmd": "re.findall('\\\\(.+?\\\\)|\\\\w', '(zyx)bc')",
    "question_id": "14182339-77",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "re.findall(, )",
    "canonical_cmd": "re.findall('\\\\(.+?\\\\)|\\\\w', 'VAR_STR')"
  },
  {
    "nl": "match regex '\\\\((.*?)\\\\)|(\\\\w)' with string '(zyx)bc'",
    "cmd": "re.findall('\\\\((.*?)\\\\)|(\\\\w)', '(zyx)bc')",
    "question_id": "14182339-26",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "re.findall(, )",
    "canonical_cmd": "re.findall('VAR_STR', 'VAR_STR')"
  },
  {
    "nl": "match multiple regex patterns with the alternation operator `|` in a string `(zyx)bc`",
    "cmd": "re.findall('\\\\(.*?\\\\)|\\\\w', '(zyx)bc')",
    "question_id": "14182339-43",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "re.findall(, )",
    "canonical_cmd": "re.findall('\\\\(.*?\\\\)|\\\\w', 'VAR_STR')"
  },
  {
    "nl": "formate each string cin list `elements` into pattern '%{0}%'",
    "cmd": "elements = ['%{0}%'.format(element) for element in elements]",
    "question_id": "7126916-65",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#format"
    ],
    "clean_cmd": "elements = [.format(element)  element  elements]",
    "canonical_cmd": "VAR_STR = ['VAR_STR'.format(element) for element in VAR_STR]"
  },
  {
    "nl": "get list of values from dictionary 'mydict' w.r.t. list of keys 'mykeys'",
    "cmd": "[mydict[x] for x in mykeys]",
    "question_id": "18453566-41",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[mydict[x]  x  mykeys]",
    "canonical_cmd": "[VAR_STR[x] for x in VAR_STR]"
  },
  {
    "nl": "convert list `[('Name', 'Joe'), ('Age', 22)]` into a dictionary",
    "cmd": "dict([('Name', 'Joe'), ('Age', 22)])",
    "question_id": "12692135-58",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict"
    ],
    "clean_cmd": "dict([(, ), (, )])",
    "canonical_cmd": "dict([VAR_STR])"
  },
  {
    "nl": "average each two columns of array `data`",
    "cmd": "data.reshape(-1, j).mean(axis=1).reshape(data.shape[0], -1)",
    "question_id": "14401047-34",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.reshape",
      "python.library.statistics#statistics.mean"
    ],
    "clean_cmd": "data.reshape(-, j).mean(axis=).reshape(data.shape[], -)",
    "canonical_cmd": "VAR_STR.reshape(-1, j).mean(axis=1).reshape(VAR_STR.shape[0], -1)"
  },
  {
    "nl": "double backslash escape all double quotes in string `s`",
    "cmd": "print(s.encode('unicode-escape').replace('\"', '\\\\\"'))",
    "question_id": "18886596-10",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.encode",
      "python.library.stdtypes#str.replace"
    ],
    "clean_cmd": "print(s.encode().replace(, ))",
    "canonical_cmd": "print(VAR_STR.encode('unicode-escape').replace('\"', '\\\\\"'))"
  },
  {
    "nl": "split a string into a list of words and whitespace",
    "cmd": "re.split('(\\\\W+)', s)",
    "question_id": "5932059-97",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.split"
    ],
    "clean_cmd": "re.split(, s)",
    "canonical_cmd": "re.split('(\\\\W+)', s)"
  },
  {
    "nl": "plotting stacked barplots on a panda data frame",
    "cmd": "df.plot(kind='barh', stacked=True)",
    "question_id": "9938130-67",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.plot"
    ],
    "clean_cmd": "df.plot(kind=, stacked=True)",
    "canonical_cmd": "df.plot(kind='barh', stacked=True)"
  },
  {
    "nl": "reverse the keys and values in a dictionary `myDictionary`",
    "cmd": "{i[1]: i[0] for i in list(myDictionary.items())}",
    "question_id": "35945473-75",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "{i[]: i[]  i  list(myDictionary.items())}",
    "canonical_cmd": "{i[1]: i[0] for i in list(VAR_STR.items())}"
  },
  {
    "nl": "check if object `obj` is a string",
    "cmd": "isinstance(obj, str)",
    "question_id": "1303243-70",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#isinstance"
    ],
    "clean_cmd": "isinstance(obj, str)",
    "canonical_cmd": "isinstance(VAR_STR, str)"
  },
  {
    "nl": "check if object `o` is a string",
    "cmd": "isinstance(o, str)",
    "question_id": "1303243-15",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#isinstance"
    ],
    "clean_cmd": "isinstance(o, str)",
    "canonical_cmd": "isinstance(VAR_STR, str)"
  },
  {
    "nl": "check if object `o` is a string",
    "cmd": "(type(o) is str)",
    "question_id": "1303243-31",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#type"
    ],
    "clean_cmd": "(type(o)  str)",
    "canonical_cmd": "type(VAR_STR) is str"
  },
  {
    "nl": "check if object `o` is a string",
    "cmd": "isinstance(o, str)",
    "question_id": "1303243-32",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#isinstance"
    ],
    "clean_cmd": "isinstance(o, str)",
    "canonical_cmd": "isinstance(VAR_STR, str)"
  },
  {
    "nl": "check if `obj_to_test` is a string",
    "cmd": "isinstance(obj_to_test, str)",
    "question_id": "1303243-47",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#isinstance"
    ],
    "clean_cmd": "isinstance(obj_to_test, str)",
    "canonical_cmd": "isinstance(VAR_STR, str)"
  },
  {
    "nl": "append list `list1` to `list2`",
    "cmd": "list2.extend(list1)",
    "question_id": "8177079-17",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.collections#collections.deque.extend"
    ],
    "clean_cmd": "list2.extend(list1)",
    "canonical_cmd": "VAR_STR.extend(VAR_STR)"
  },
  {
    "nl": "append list `mylog` to `list1`",
    "cmd": "list1.extend(mylog)",
    "question_id": "8177079-3",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.collections#collections.deque.extend"
    ],
    "clean_cmd": "list1.extend(mylog)",
    "canonical_cmd": "VAR_STR.extend(VAR_STR)"
  },
  {
    "nl": "append list `a` to `c`",
    "cmd": "c.extend(a)",
    "question_id": "8177079-50",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.collections#collections.deque.extend"
    ],
    "clean_cmd": "c.extend(a)",
    "canonical_cmd": "VAR_STR.extend(VAR_STR)"
  },
  {
    "nl": "append items in list `mylog` to `list1`",
    "cmd": "for line in mylog:\n    list1.append(line)",
    "question_id": "8177079-4",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.append"
    ],
    "clean_cmd": " line  mylog:list1.append(line)",
    "canonical_cmd": "for line in VAR_STR:\n    VAR_STR.append(line)"
  },
  {
    "nl": "append a tuple of elements from list `a` with indexes '[0][0] [0][2]' to list `b`",
    "cmd": "b.append((a[0][0], a[0][2]))",
    "question_id": "4126227-85",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.append"
    ],
    "clean_cmd": "b.append((a[][], a[][]))",
    "canonical_cmd": "VAR_STR.append((VAR_STR[0][0], VAR_STR[0][2]))"
  },
  {
    "nl": "Initialize `SECRET_KEY` in flask config with `Your_secret_string `",
    "cmd": "app.config['SECRET_KEY'] = 'Your_secret_string'",
    "question_id": "34902378-5",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "app.config[] = ",
    "canonical_cmd": "app.config['VAR_STR'] = 'VAR_STR'"
  },
  {
    "nl": "find the index of an element 'MSFT' in a list `stocks_list`",
    "cmd": "[x for x in range(len(stocks_list)) if stocks_list[x] == 'MSFT']",
    "question_id": "1762484-85",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len",
      "python.library.functions#range"
    ],
    "clean_cmd": "[x  x  range(len(stocks_list))  stocks_list[x] == ]",
    "canonical_cmd": "[x for x in range(len(VAR_STR)) if VAR_STR[x] == 'VAR_STR']"
  },
  {
    "nl": "remove symbols from a string `s`",
    "cmd": "re.sub('[^\\\\w]', ' ', s)",
    "question_id": "875968-40",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "re.sub(, , s)",
    "canonical_cmd": "re.sub('[^\\\\w]', ' ', VAR_STR)"
  },
  {
    "nl": "Get the current directory of a script",
    "cmd": "os.path.basename(os.path.dirname(os.path.realpath(__file__)))",
    "question_id": "31258561-66",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os.path#os.path.basename",
      "python.library.os.path#os.path.dirname",
      "python.library.os.path#os.path.realpath"
    ],
    "clean_cmd": "os.path.basename(os.path.dirname(os.path.realpath(__file__)))",
    "canonical_cmd": "os.path.basename(os.path.dirname(os.path.realpath(__file__)))"
  },
  {
    "nl": "Find octal characters matches from a string `str` using regex",
    "cmd": "print(re.findall(\"'\\\\\\\\[0-7]{1,3}'\", str))",
    "question_id": "34750084-45",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "print(re.findall(, str))",
    "canonical_cmd": "print(re.findall(\"'\\\\\\\\[0-7]{1,3}'\", VAR_STR))"
  },
  {
    "nl": "split string `input` based on occurrences of regex pattern '[ ](?=[A-Z]+\\\\b)'",
    "cmd": "re.split('[ ](?=[A-Z]+\\\\b)', input)",
    "question_id": "13209288-55",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.split"
    ],
    "clean_cmd": "re.split(, input)",
    "canonical_cmd": "re.split('VAR_STR', VAR_STR)"
  },
  {
    "nl": "Split string `input` at every space followed by an upper-case letter",
    "cmd": "re.split('[ ](?=[A-Z])', input)",
    "question_id": "13209288-65",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.split"
    ],
    "clean_cmd": "re.split(, input)",
    "canonical_cmd": "re.split('[ ](?=[A-Z])', VAR_STR)"
  },
  {
    "nl": "send multipart encoded file `files` to url `url` with headers `headers` and metadata `data`",
    "cmd": "r = requests.post(url, files=files, headers=headers, data=data)",
    "question_id": "24642040-8",
    "cmd_name": "conala",
    "oracle_man": [
      "pygame.ref.fastevent#pygame.fastevent.post"
    ],
    "clean_cmd": "r = requests.post(url, files=files, headers=headers, data=data)",
    "canonical_cmd": "r = requests.post(VAR_STR, VAR_STR=VAR_STR, VAR_STR=VAR_STR, VAR_STR=VAR_STR)"
  },
  {
    "nl": "write bytes `bytes_` to a file `filename` in python 3",
    "cmd": "open('filename', 'wb').write(bytes_)",
    "question_id": "4290716-97",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.request#open",
      "python.library.os#os.write"
    ],
    "clean_cmd": "open(, ).write(bytes_)",
    "canonical_cmd": "open('VAR_STR', 'wb').write(VAR_STR)"
  },
  {
    "nl": "get a list from a list `lst` with values mapped into a dictionary `dct`",
    "cmd": "[dct[k] for k in lst]",
    "question_id": "33078554-39",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[dct[k]  k  lst]",
    "canonical_cmd": "[VAR_STR[k] for k in VAR_STR]"
  },
  {
    "nl": "find duplicate names in column 'name' of the dataframe `x`",
    "cmd": "x.set_index('name').index.get_duplicates()",
    "question_id": "15247628-33",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.set_index"
    ],
    "clean_cmd": "x.set_index().index.get_duplicates()",
    "canonical_cmd": "VAR_STR.set_index('VAR_STR').index.get_duplicates()"
  },
  {
    "nl": "truncate float 1.923328437452 to 3 decimal places",
    "cmd": "round(1.923328437452, 3)",
    "question_id": "783897-89",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#round"
    ],
    "clean_cmd": "round(1.923328437452, )",
    "canonical_cmd": "round(1.923328437452, 3)"
  },
  {
    "nl": "sort list `li` in descending order based on the date value in second element of each list in list `li`",
    "cmd": "sorted(li, key=lambda x: datetime.strptime(x[1], '%d/%m/%Y'), reverse=True)",
    "question_id": "22859493-62",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.datetime#datetime.datetime.strptime"
    ],
    "clean_cmd": "sorted(li, key= x: datetime.strptime(x[], ), reverse=True)",
    "canonical_cmd": "sorted(VAR_STR, key=lambda x: datetime.strptime(x[1], '%d/%m/%Y'), reverse=True)"
  },
  {
    "nl": "place the radial ticks in plot `ax` at 135 degrees",
    "cmd": "ax.set_rlabel_position(135)",
    "question_id": "29394552-74",
    "cmd_name": "conala",
    "oracle_man": [
      "matplotlib.projections_api#matplotlib.projections.polar.PolarAxes.set_rlabel_position"
    ],
    "clean_cmd": "ax.set_rlabel_position()",
    "canonical_cmd": "VAR_STR.set_rlabel_position(135)"
  },
  {
    "nl": "check if path `my_path` is an absolute path",
    "cmd": "os.path.isabs(my_path)",
    "question_id": "3320406-28",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os.path#os.path.isabs"
    ],
    "clean_cmd": "os.path.isabs(my_path)",
    "canonical_cmd": "os.path.isabs(VAR_STR)"
  },
  {
    "nl": "pandas dataframe get first row of each group by 'id'",
    "cmd": "df.groupby('id').first()",
    "question_id": "20067636-10",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.groupby",
      "pandas.reference.api.pandas.dataframe.first"
    ],
    "clean_cmd": "df.groupby().first()",
    "canonical_cmd": "df.groupby('VAR_STR').first()"
  },
  {
    "nl": "extract attributes 'src=\"js/([^\"]*\\\\bjquery\\\\b[^\"]*)\"' from string `data`",
    "cmd": "re.findall('src=\"js/([^\"]*\\\\bjquery\\\\b[^\"]*)\"', data)",
    "question_id": "30759776-2",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "re.findall(, data)",
    "canonical_cmd": "re.findall('VAR_STR', VAR_STR)"
  },
  {
    "nl": "Sum integers contained in strings in list `['', '3.4', '', '', '1.0']`",
    "cmd": "sum(int(float(item)) for item in [_f for _f in ['', '3.4', '', '', '1.0'] if _f])",
    "question_id": "25388796-87",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#float",
      "python.library.functions#int",
      "python.library.functions#sum"
    ],
    "clean_cmd": "sum(int(float(item))  item  [_f  _f  [, , , , ]  _f])",
    "canonical_cmd": "sum(int(float(item)) for item in [_f for _f in [VAR_STR] if\n    _f])"
  },
  {
    "nl": "make a barplot of data in column `group` of dataframe `df` colour-coded according to list `color`",
    "cmd": "df['group'].plot(kind='bar', color=['r', 'g', 'b', 'r', 'g', 'b', 'r'])",
    "question_id": "18897261-72",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.plot"
    ],
    "clean_cmd": "df[].plot(kind=, color=[, , , , , , ])",
    "canonical_cmd": "VAR_STR['VAR_STR'].plot(kind='bar', VAR_STR=['r', 'g', 'b', 'r', 'g', 'b', 'r'])"
  },
  {
    "nl": "find all matches of regex pattern '([a-fA-F\\\\d]{32})' in string `data`",
    "cmd": "re.findall('([a-fA-F\\\\d]{32})', data)",
    "question_id": "373194-29",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "re.findall(, data)",
    "canonical_cmd": "re.findall('VAR_STR', VAR_STR)"
  },
  {
    "nl": "Get the length of list `my_list`",
    "cmd": "len(my_list)",
    "question_id": "518021-20",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len"
    ],
    "clean_cmd": "len(my_list)",
    "canonical_cmd": "len(VAR_STR)"
  },
  {
    "nl": "Getting the length of array `l`",
    "cmd": "len(l)",
    "question_id": "518021-61",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len"
    ],
    "clean_cmd": "len(l)",
    "canonical_cmd": "len(VAR_STR)"
  },
  {
    "nl": "Getting the length of array `s`",
    "cmd": "len(s)",
    "question_id": "518021-63",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len"
    ],
    "clean_cmd": "len(s)",
    "canonical_cmd": "len(VAR_STR)"
  },
  {
    "nl": "Getting the length of `my_tuple`",
    "cmd": "len(my_tuple)",
    "question_id": "518021-87",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len"
    ],
    "clean_cmd": "len(my_tuple)",
    "canonical_cmd": "len(VAR_STR)"
  },
  {
    "nl": "Getting the length of `my_string`",
    "cmd": "len(my_string)",
    "question_id": "518021-86",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#len"
    ],
    "clean_cmd": "len(my_string)",
    "canonical_cmd": "len(VAR_STR)"
  },
  {
    "nl": "remove escape character from string \"\\\\a\"",
    "cmd": "\"\"\"\\\\a\"\"\".decode('string_escape')",
    "question_id": "40452956-17",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#bytearray.decode"
    ],
    "clean_cmd": ".decode()",
    "canonical_cmd": "\"\"\"VAR_STR\"\"\".decode('string_escape')"
  },
  {
    "nl": "replace each 'a' with 'b' and each 'b' with 'a' in the string 'obama' in a single pass.",
    "cmd": "\"\"\"obama\"\"\".replace('a', '%temp%').replace('b', 'a').replace('%temp%', 'b')",
    "question_id": "8687018-68",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.replace"
    ],
    "clean_cmd": ".replace(, ).replace(, ).replace(, )",
    "canonical_cmd": "\"\"\"VAR_STR\"\"\".replace('VAR_STR', '%temp%').replace('VAR_STR', 'VAR_STR').replace(\n    '%temp%', 'VAR_STR')"
  },
  {
    "nl": "remove directory tree '/folder_name'",
    "cmd": "shutil.rmtree('/folder_name')",
    "question_id": "303200-79",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.shutil#shutil.rmtree"
    ],
    "clean_cmd": "shutil.rmtree()",
    "canonical_cmd": "shutil.rmtree('VAR_STR')"
  },
  {
    "nl": "create a new column `weekday` in pandas data frame `data` based on the values in column `my_dt`",
    "cmd": "data['weekday'] = data['my_dt'].apply(lambda x: x.weekday())",
    "question_id": "13740672-79",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.series.apply"
    ],
    "clean_cmd": "data[] = data[].apply( x: x.weekday())",
    "canonical_cmd": "VAR_STR['VAR_STR'] = VAR_STR['VAR_STR'].apply(lambda x: x.VAR_STR())"
  },
  {
    "nl": "reverse sort Counter `x` by values",
    "cmd": "sorted(x, key=x.get, reverse=True)",
    "question_id": "20950650-8",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted"
    ],
    "clean_cmd": "sorted(x, key=x.get, reverse=True)",
    "canonical_cmd": "sorted(VAR_STR, key=VAR_STR.get, reverse=True)"
  },
  {
    "nl": "reverse sort counter `x` by value",
    "cmd": "sorted(list(x.items()), key=lambda pair: pair[1], reverse=True)",
    "question_id": "20950650-18",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.functions#list",
      "python.library.stdtypes#dict.items"
    ],
    "clean_cmd": "sorted(list(x.items()), key= pair: pair[], reverse=True)",
    "canonical_cmd": "sorted(list(VAR_STR.items()), key=lambda pair: pair[1], reverse=True)"
  },
  {
    "nl": "append a numpy array 'b' to a numpy array 'a'",
    "cmd": "np.vstack((a, b))",
    "question_id": "9775297-68",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.vstack"
    ],
    "clean_cmd": "np.vstack((a, b))",
    "canonical_cmd": "np.vstack((VAR_STR, VAR_STR))"
  },
  {
    "nl": "fetch address information for host 'google.com' ion port 80",
    "cmd": "print(socket.getaddrinfo('google.com', 80))",
    "question_id": "2805231-96",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.socket#socket.getaddrinfo"
    ],
    "clean_cmd": "print(socket.getaddrinfo(, ))",
    "canonical_cmd": "print(socket.getaddrinfo('VAR_STR', 80))"
  },
  {
    "nl": "add a column 'day' with value 'sat' to dataframe `df`",
    "cmd": "df.xs('sat', level='day', drop_level=False)",
    "question_id": "17552997-64",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.xs"
    ],
    "clean_cmd": "df.xs(, level=, drop_level=False)",
    "canonical_cmd": "VAR_STR.xs('VAR_STR', level='VAR_STR', drop_level=False)"
  },
  {
    "nl": "Flask set folder 'wherever' as the default template folder",
    "cmd": "Flask(__name__, template_folder='wherever')",
    "question_id": "13598363-99",
    "cmd_name": "conala",
    "oracle_man": [
      "flask.api.index#flask.Flask"
    ],
    "clean_cmd": "Flask(__name__, template_folder=)",
    "canonical_cmd": "Flask(__name__, template_folder='VAR_STR')"
  },
  {
    "nl": "How do I INSERT INTO t1 (SELECT * FROM t2) in SQLAlchemy?",
    "cmd": "session.execute('INSERT INTO t1 (SELECT * FROM t2)')",
    "question_id": "1849375-50",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.msilib#msilib.View.Execute"
    ],
    "clean_cmd": "session.execute()",
    "canonical_cmd": "session.execute('INSERT INTO t1 (SELECT * FROM t2)')"
  },
  {
    "nl": "sort a list of lists 'c2' such that third row comes first",
    "cmd": "c2.sort(key=lambda row: row[2])",
    "question_id": "3398589-41",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#list.sort"
    ],
    "clean_cmd": "c2.sort(key= row: row[])",
    "canonical_cmd": "VAR_STR.sort(key=lambda row: row[2])"
  },
  {
    "nl": "Sorting a list of lists in Python",
    "cmd": "c2.sort(key=lambda row: (row[2], row[1], row[0]))",
    "question_id": "3398589-16",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#list.sort"
    ],
    "clean_cmd": "c2.sort(key= row: (row[], row[], row[]))",
    "canonical_cmd": "c2.sort(key=lambda row: (row[2], row[1], row[0]))"
  },
  {
    "nl": "Sorting a list of lists in Python",
    "cmd": "c2.sort(key=lambda row: (row[2], row[1]))",
    "question_id": "3398589-90",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#list.sort"
    ],
    "clean_cmd": "c2.sort(key= row: (row[], row[]))",
    "canonical_cmd": "c2.sort(key=lambda row: (row[2], row[1]))"
  },
  {
    "nl": "set font `Arial` to display non-ascii characters in matplotlib",
    "cmd": "matplotlib.rc('font', **{'sans-serif': 'Arial', 'family': 'sans-serif'})",
    "question_id": "10960463-71",
    "cmd_name": "conala",
    "oracle_man": [
      "matplotlib._as_gen.matplotlib.pyplot.rc"
    ],
    "clean_cmd": "matplotlib.rc(, **{: , : })",
    "canonical_cmd": "matplotlib.rc('font', **{'sans-serif': 'VAR_STR', 'family': 'sans-serif'})"
  },
  {
    "nl": "Convert  DateTime column 'date' of pandas dataframe 'df' to ordinal",
    "cmd": "df['date'].apply(lambda x: x.toordinal())",
    "question_id": "20576618-72",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.timestamp.toordinal",
      "pandas.reference.api.pandas.series.apply"
    ],
    "clean_cmd": "df[].apply( x: x.toordinal())",
    "canonical_cmd": "VAR_STR['VAR_STR'].apply(lambda x: x.toordinal())"
  },
  {
    "nl": "get html source of Selenium WebElement `element`",
    "cmd": "element.get_attribute('innerHTML')",
    "question_id": "7263824-68",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.test#test.support.get_attribute"
    ],
    "clean_cmd": "element.get_attribute()",
    "canonical_cmd": "VAR_STR.get_attribute('innerHTML')"
  },
  {
    "nl": "open a 'gnome' terminal from python script and run  'sudo apt-get update' command.",
    "cmd": "os.system('gnome-terminal -e \\'bash -c \"sudo apt-get update; exec bash\"\\'')",
    "question_id": "7574841-76",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.system"
    ],
    "clean_cmd": "os.system()",
    "canonical_cmd": "os.system('gnome-terminal -e \\'bash -c \"sudo apt-get update; exec bash\"\\'')"
  },
  {
    "nl": "add an item with key 'third_key' and value 1 to an dictionary `my_dict`",
    "cmd": "my_dict.update({'third_key': 1})",
    "question_id": "10487278-57",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#dict.update"
    ],
    "clean_cmd": "my_dict.update({: })",
    "canonical_cmd": "VAR_STR.update({'VAR_STR': 1})"
  },
  {
    "nl": "declare an array",
    "cmd": "my_list = []",
    "question_id": "10487278-5",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "my_list = []",
    "canonical_cmd": "my_list = []"
  },
  {
    "nl": "Insert item `12` to a list `my_list`",
    "cmd": "my_list.append(12)",
    "question_id": "10487278-47",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.append"
    ],
    "clean_cmd": "my_list.append()",
    "canonical_cmd": "VAR_STR.append(12)"
  },
  {
    "nl": "add an entry 'wuggah' at the beginning of list `myList`",
    "cmd": "myList.insert(0, 'wuggah')",
    "question_id": "10155684-8",
    "cmd_name": "conala",
    "oracle_man": [
      "numpy.reference.generated.numpy.insert"
    ],
    "clean_cmd": "myList.insert(, )",
    "canonical_cmd": "VAR_STR.insert(0, 'VAR_STR')"
  },
  {
    "nl": "convert a hex-string representation to actual bytes",
    "cmd": "\"\"\"\\\\xF3\\\\xBE\\\\x80\\\\x80\"\"\".replace('\\\\x', '').decode('hex')",
    "question_id": "3519125-47",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#bytearray.decode",
      "python.library.stdtypes#str.replace"
    ],
    "clean_cmd": ".replace(, ).decode()",
    "canonical_cmd": "\"\"\"\\\\xF3\\\\xBE\\\\x80\\\\x80\"\"\".replace('\\\\x', '').decode('hex')"
  },
  {
    "nl": "select the last column of dataframe `df`",
    "cmd": "df[df.columns[-1]]",
    "question_id": "40144769-97",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "df[df.columns[-]]",
    "canonical_cmd": "VAR_STR[VAR_STR.columns[-1]]"
  },
  {
    "nl": "get the first value from dataframe `df` where column 'Letters' is equal to 'C'",
    "cmd": "df.loc[df['Letters'] == 'C', 'Letters'].values[0]",
    "question_id": "30787901-86",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.loc"
    ],
    "clean_cmd": "df.loc[df[] == , ].values[]",
    "canonical_cmd": "VAR_STR.loc[VAR_STR['VAR_STR'] == 'VAR_STR', 'VAR_STR'].values[0]"
  },
  {
    "nl": "get the type of `i`",
    "cmd": "type(i)",
    "question_id": "402504-5",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#type"
    ],
    "clean_cmd": "type(i)",
    "canonical_cmd": "type(VAR_STR)"
  },
  {
    "nl": "determine the type of variable `v`",
    "cmd": "type(v)",
    "question_id": "402504-77",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#type"
    ],
    "clean_cmd": "type(v)",
    "canonical_cmd": "type(VAR_STR)"
  },
  {
    "nl": "determine the type of variable `v`",
    "cmd": "type(v)",
    "question_id": "402504-78",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#type"
    ],
    "clean_cmd": "type(v)",
    "canonical_cmd": "type(VAR_STR)"
  },
  {
    "nl": "determine the type of variable `v`",
    "cmd": "type(v)",
    "question_id": "402504-9",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#type"
    ],
    "clean_cmd": "type(v)",
    "canonical_cmd": "type(VAR_STR)"
  },
  {
    "nl": "determine the type of variable `v`",
    "cmd": "type(v)",
    "question_id": "402504-40",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#type"
    ],
    "clean_cmd": "type(v)",
    "canonical_cmd": "type(VAR_STR)"
  },
  {
    "nl": "get the type of variable `variable_name`",
    "cmd": "print(type(variable_name))",
    "question_id": "402504-29",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#type"
    ],
    "clean_cmd": "print(type(variable_name))",
    "canonical_cmd": "print(type(VAR_STR))"
  },
  {
    "nl": "get the 5th item of a generator",
    "cmd": "next(itertools.islice(range(10), 5, 5 + 1))",
    "question_id": "2300756-79",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.itertools#itertools.islice",
      "python.library.functions#range",
      "python.library.functions#next"
    ],
    "clean_cmd": "next(itertools.islice(range(), ,  + ))",
    "canonical_cmd": "next(itertools.islice(range(10), 5, 5 + 1))"
  },
  {
    "nl": "Print a string `word` with string format",
    "cmd": "print('\"{}\"'.format(word))",
    "question_id": "20056548-25",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#format"
    ],
    "clean_cmd": "print(.format(word))",
    "canonical_cmd": "print('\"{}\"'.format(VAR_STR))"
  },
  {
    "nl": "join a list of strings `list` using a space ' '",
    "cmd": "\"\"\" \"\"\".join(list)",
    "question_id": "8546245-44",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": ".join(list)",
    "canonical_cmd": "\"\"\" \"\"\".join(VAR_STR)"
  },
  {
    "nl": "create list `y` containing two empty lists",
    "cmd": "y = [[] for n in range(2)]",
    "question_id": "2276416-66",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#range"
    ],
    "clean_cmd": "y = [[]  n  range()]",
    "canonical_cmd": "VAR_STR = [[] for n in range(2)]"
  },
  {
    "nl": "read a file 'C:/name/MyDocuments/numbers' into a list `data`",
    "cmd": "data = [line.strip() for line in open('C:/name/MyDocuments/numbers', 'r')]",
    "question_id": "3925614-88",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.request#open",
      "python.library.stdtypes#str.strip"
    ],
    "clean_cmd": "data = [line.strip()  line  open(, )]",
    "canonical_cmd": "VAR_STR = [line.strip() for line in open('VAR_STR', 'r')]"
  },
  {
    "nl": "Drop rows of pandas dataframe `df` having NaN in column at index \"1\"",
    "cmd": "df.dropna(subset=[1])",
    "question_id": "13413590-6",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.dropna"
    ],
    "clean_cmd": "df.dropna(subset=[])",
    "canonical_cmd": "VAR_STR.dropna(subset=[1])"
  },
  {
    "nl": "get elements from list `myList`, that have a field `n` value 30",
    "cmd": "[x for x in myList if x.n == 30]",
    "question_id": "598398-3",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "[x  x  myList  x.n == ]",
    "canonical_cmd": "[x for x in VAR_STR if x.VAR_STR == 30]"
  },
  {
    "nl": "print \".\" without newline",
    "cmd": "sys.stdout.write('.')",
    "question_id": "493386-37",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.write"
    ],
    "clean_cmd": "sys.stdout.write()",
    "canonical_cmd": "sys.stdout.write('VAR_STR')"
  },
  {
    "nl": "round off the float that is the product of `2.52 * 100` and convert it to an int",
    "cmd": "int(round(2.51 * 100))",
    "question_id": "6569528-54",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#int",
      "python.library.functions#round"
    ],
    "clean_cmd": "int(round(2.51 * ))",
    "canonical_cmd": "int(round(2.51 * 100))"
  },
  {
    "nl": "plot dataframe `df` without a legend",
    "cmd": "df.plot(legend=False)",
    "question_id": "20865487-86",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.plot"
    ],
    "clean_cmd": "df.plot(legend=False)",
    "canonical_cmd": "VAR_STR.plot(legend=False)"
  },
  {
    "nl": "loop through the IP address range \"192.168.x.x\"",
    "cmd": "for i in range(256):\n    for j in range(256):\n        ip = ('192.168.%d.%d' % (i, j))\n        print(ip)",
    "question_id": "13368659-96",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#range"
    ],
    "clean_cmd": " i  range(): j  range():ip = ( % (i, j))print(ip)",
    "canonical_cmd": "for i in range(256):\n    for j in range(256):\n        ip = '192.168.%d.%d' % (i, j)\n        print(ip)"
  },
  {
    "nl": "loop through the IP address range \"192.168.x.x\"",
    "cmd": "for (i, j) in product(list(range(256)), list(range(256))):\n    pass",
    "question_id": "13368659-83",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#range",
      "python.library.functions#list"
    ],
    "clean_cmd": " (i, j)  product(list(range()), list(range())):",
    "canonical_cmd": "for i, j in product(list(range(256)), list(range(256))):\n    pass"
  },
  {
    "nl": "loop through the IP address range \"192.168.x.x\"",
    "cmd": "generator = iter_iprange('192.168.1.1', '192.168.255.255', step=1)",
    "question_id": "13368659-30",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "generator = iter_iprange(, , step=)",
    "canonical_cmd": "generator = iter_iprange('192.168.1.1', '192.168.255.255', step=1)"
  },
  {
    "nl": "Sum the corresponding decimal values for binary values of each boolean element in list `x`",
    "cmd": "sum(1 << i for i, b in enumerate(x) if b)",
    "question_id": "4065737-88",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#enumerate",
      "python.library.functions#sum"
    ],
    "clean_cmd": "sum( &lt;&lt; i  i, b  enumerate(x)  b)",
    "canonical_cmd": "sum(1 << i for i, b in enumerate(VAR_STR) if b)"
  },
  {
    "nl": "write multiple strings `line1`, `line2` and `line3` in one line in a file `target`",
    "cmd": "target.write('%r\\n%r\\n%r\\n' % (line1, line2, line3))",
    "question_id": "8691311-3",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.write"
    ],
    "clean_cmd": "target.write( % (line1, line2, line3))",
    "canonical_cmd": "VAR_STR.write('%r\\n%r\\n%r\\n' % (VAR_STR, VAR_STR, VAR_STR))"
  },
  {
    "nl": "Convert list of lists `data` into a flat list",
    "cmd": "[y for x in data for y in (x if isinstance(x, list) else [x])]",
    "question_id": "10632111-94",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#isinstance"
    ],
    "clean_cmd": "[y  x  data  y  (x  isinstance(x, list)  [x])]",
    "canonical_cmd": "[y for x in VAR_STR for y in (x if isinstance(x, list) else [x])]"
  },
  {
    "nl": "Print new line character as `\\n` in a string `foo\\nbar`",
    "cmd": "print('foo\\nbar'.encode('string_escape'))",
    "question_id": "15392730-45",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.encode"
    ],
    "clean_cmd": "print(.encode())",
    "canonical_cmd": "print('VAR_STR'.encode('string_escape'))"
  },
  {
    "nl": "remove last comma character ',' in string `s`",
    "cmd": "\"\"\"\"\"\".join(s.rsplit(',', 1))",
    "question_id": "1010961-75",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.rsplit",
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": ".join(s.rsplit(, ))",
    "canonical_cmd": "\"\"\"\"\"\".join(VAR_STR.rsplit('VAR_STR', 1))"
  },
  {
    "nl": "calculate the mean of each element in array `x` with the element previous to it",
    "cmd": "(x[1:] + x[:-1]) / 2",
    "question_id": "23855976-47",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "(x[:] + x[:-]) / ",
    "canonical_cmd": "(VAR_STR[1:] + VAR_STR[:-1]) / 2"
  },
  {
    "nl": "get an array of the mean of each two consecutive values in numpy array `x`",
    "cmd": "x[:-1] + (x[1:] - x[:-1]) / 2",
    "question_id": "23855976-53",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "x[:-] + (x[:] - x[:-]) / ",
    "canonical_cmd": "VAR_STR[:-1] + (VAR_STR[1:] - VAR_STR[:-1]) / 2"
  },
  {
    "nl": "load data containing `utf-8` from file `new.txt` into numpy array `arr`",
    "cmd": "arr = numpy.fromiter(codecs.open('new.txt', encoding='utf-8'), dtype='<U2')",
    "question_id": "6375343-22",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.codecs#codecs.open",
      "numpy.reference.generated.numpy.fromiter"
    ],
    "clean_cmd": "arr = numpy.fromiter(codecs.open(, encoding=), dtype=)",
    "canonical_cmd": "VAR_STR = numpy.fromiter(codecs.open('VAR_STR', encoding='VAR_STR'), dtype='<U2')"
  },
  {
    "nl": "reverse sort list of dicts `l` by value for key `time`",
    "cmd": "l = sorted(l, key=itemgetter('time'), reverse=True)",
    "question_id": "1547733-9",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.operator#operator.itemgetter"
    ],
    "clean_cmd": "l = sorted(l, key=itemgetter(), reverse=True)",
    "canonical_cmd": "VAR_STR = sorted(VAR_STR, key=itemgetter('VAR_STR'), reverse=True)"
  },
  {
    "nl": "Sort a list of dictionary `l` based on key `time` in descending order",
    "cmd": "l = sorted(l, key=lambda a: a['time'], reverse=True)",
    "question_id": "1547733-81",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted"
    ],
    "clean_cmd": "l = sorted(l, key= a: a[], reverse=True)",
    "canonical_cmd": "VAR_STR = sorted(VAR_STR, key=lambda a: a['VAR_STR'], reverse=True)"
  },
  {
    "nl": "get rows of dataframe `df` that match regex '(Hel|Just)'",
    "cmd": "df.loc[df[0].str.contains('(Hel|Just)')]",
    "question_id": "37080612-2",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.dataframe.loc",
      "pandas.reference.api.pandas.series.str.contains"
    ],
    "clean_cmd": "df.loc[df[].str.contains()]",
    "canonical_cmd": "VAR_STR.loc[VAR_STR[0].str.contains('VAR_STR')]"
  },
  {
    "nl": "find the string in `your_string` between two special characters \"[\" and \"]\"",
    "cmd": "re.search('\\\\[(.*)\\\\]', your_string).group(1)",
    "question_id": "14716342-27",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.search",
      "python.library.re#re.Match.group"
    ],
    "clean_cmd": "re.search(, your_string).group()",
    "canonical_cmd": "re.search('\\\\[(.*)\\\\]', VAR_STR).group(1)"
  },
  {
    "nl": "How to create a list of date string in 'yyyymmdd' format with Python Pandas?",
    "cmd": "[d.strftime('%Y%m%d') for d in pandas.date_range('20130226', '20130302')]",
    "question_id": "18684076-99",
    "cmd_name": "conala",
    "oracle_man": [
      "pandas.reference.api.pandas.date_range",
      "pandas.reference.api.pandas.timestamp.strftime"
    ],
    "clean_cmd": "[d.strftime()  d  pandas.date_range(, )]",
    "canonical_cmd": "[d.strftime('%Y%m%d') for d in pandas.date_range('20130226', '20130302')]"
  },
  {
    "nl": "count number of times string 'brown' occurred in string 'The big brown fox is brown'",
    "cmd": "\"\"\"The big brown fox is brown\"\"\".count('brown')",
    "question_id": "1666700-25",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.count"
    ],
    "clean_cmd": ".count()",
    "canonical_cmd": "\"\"\"VAR_STR\"\"\".count('VAR_STR')"
  },
  {
    "nl": "download the file from url `url` and save it under file `file_name`",
    "cmd": "urllib.request.urlretrieve(url, file_name)",
    "question_id": "7243750-79",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.request#urllib.request.urlretrieve"
    ],
    "clean_cmd": "urllib.request.urlretrieve(url, file_name)",
    "canonical_cmd": "urllib.request.urlretrieve(VAR_STR, VAR_STR)"
  },
  {
    "nl": "split string `text` by space",
    "cmd": "text.split()",
    "question_id": "743806-70",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "text.split()",
    "canonical_cmd": "VAR_STR.split()"
  },
  {
    "nl": "split string `text` by \",\"",
    "cmd": "text.split(',')",
    "question_id": "743806-93",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "text.split()",
    "canonical_cmd": "VAR_STR.split('VAR_STR')"
  },
  {
    "nl": "Split string `line` into a list by whitespace",
    "cmd": "line.split()",
    "question_id": "743806-18",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "line.split()",
    "canonical_cmd": "VAR_STR.split()"
  },
  {
    "nl": "replace dot characters  '.' associated with ascii letters in list `s` with space ' '",
    "cmd": "[re.sub('(?<!\\\\d)\\\\.(?!\\\\d)', ' ', i) for i in s]",
    "question_id": "35044115-64",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "[re.sub(, , i)  i  s]",
    "canonical_cmd": "[re.sub('(?<!\\\\d)\\\\.(?!\\\\d)', ' ', i) for i in VAR_STR]"
  },
  {
    "nl": "sort list `list_of_strings` based on second index of each string `s`",
    "cmd": "sorted(list_of_strings, key=lambda s: s.split(',')[1])",
    "question_id": "38388799-16",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "sorted(list_of_strings, key= s: s.split()[])",
    "canonical_cmd": "sorted(VAR_STR, key=lambda VAR_STR: VAR_STR.split(',')[1])"
  },
  {
    "nl": "call multiple bash function \u2018vasp\u2019 and \u2018tee tee_output\u2019 using \u2018|\u2019",
    "cmd": "subprocess.check_call('vasp | tee tee_output', shell=True)",
    "question_id": "9609375-86",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.subprocess#subprocess.check_call"
    ],
    "clean_cmd": "subprocess.check_call(, shell=True)",
    "canonical_cmd": "subprocess.check_call('vasp | tee tee_output', shell=True)"
  },
  {
    "nl": "eliminate all strings from list `lst`",
    "cmd": "[element for element in lst if isinstance(element, int)]",
    "question_id": "37004138-32",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#isinstance"
    ],
    "clean_cmd": "[element  element  lst  isinstance(element, int)]",
    "canonical_cmd": "[element for element in VAR_STR if isinstance(element, int)]"
  },
  {
    "nl": "get all the elements except strings from the list 'lst'.",
    "cmd": "[element for element in lst if not isinstance(element, str)]",
    "question_id": "37004138-83",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#isinstance"
    ],
    "clean_cmd": "[element  element  lst   isinstance(element, str)]",
    "canonical_cmd": "[element for element in VAR_STR if not isinstance(element, str)]"
  },
  {
    "nl": "Sort a list of dictionaries `list_to_be_sorted` by the value of the dictionary key `name`",
    "cmd": "newlist = sorted(list_to_be_sorted, key=lambda k: k['name'])",
    "question_id": "72899-95",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted"
    ],
    "clean_cmd": "newlist = sorted(list_to_be_sorted, key= k: k[])",
    "canonical_cmd": "newlist = sorted(VAR_STR, key=lambda k: k['VAR_STR'])"
  },
  {
    "nl": "sort a list of dictionaries `l` by values in key `name` in descending order",
    "cmd": "newlist = sorted(l, key=itemgetter('name'), reverse=True)",
    "question_id": "72899-6",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#sorted",
      "python.library.operator#operator.itemgetter"
    ],
    "clean_cmd": "newlist = sorted(l, key=itemgetter(), reverse=True)",
    "canonical_cmd": "newlist = sorted(VAR_STR, key=itemgetter('VAR_STR'), reverse=True)"
  },
  {
    "nl": "How do I sort a list of dictionaries by values of the dictionary in Python?",
    "cmd": "list_of_dicts.sort(key=operator.itemgetter('name'))",
    "question_id": "72899-76",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.operator#operator.itemgetter",
      "python.library.stdtypes#list.sort"
    ],
    "clean_cmd": "list_of_dicts.sort(key=operator.itemgetter())",
    "canonical_cmd": "list_of_dicts.sort(key=operator.itemgetter('name'))"
  },
  {
    "nl": "How do I sort a list of dictionaries by values of the dictionary in Python?",
    "cmd": "list_of_dicts.sort(key=operator.itemgetter('age'))",
    "question_id": "72899-59",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.operator#operator.itemgetter",
      "python.library.stdtypes#list.sort"
    ],
    "clean_cmd": "list_of_dicts.sort(key=operator.itemgetter())",
    "canonical_cmd": "list_of_dicts.sort(key=operator.itemgetter('age'))"
  },
  {
    "nl": "join together with \",\" elements inside a list indexed with 'category' within a dictionary `trans`",
    "cmd": "\"\"\",\"\"\".join(trans['category'])",
    "question_id": "29881993-76",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": ".join(trans[])",
    "canonical_cmd": "\"\"\"VAR_STR\"\"\".join(VAR_STR['VAR_STR'])"
  },
  {
    "nl": "concatenate array of strings `['A', 'B', 'C', 'D']` into a string",
    "cmd": "\"\"\"\"\"\".join(['A', 'B', 'C', 'D'])",
    "question_id": "34158494-93",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.join"
    ],
    "clean_cmd": ".join([, , , ])",
    "canonical_cmd": "\"\"\"\"\"\".join([VAR_STR])"
  },
  {
    "nl": "Remove all strings from a list a strings `sents` where the values starts with `@$\\t` or `#`",
    "cmd": "[x for x in sents if not x.startswith('@$\\t') and not x.startswith('#')]",
    "question_id": "12666897-36",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.startswith"
    ],
    "clean_cmd": "[x  x  sents   x.startswith()   x.startswith()]",
    "canonical_cmd": "[x for x in VAR_STR if not x.startswith('VAR_STR') and not x.startswith('VAR_STR')]"
  },
  {
    "nl": "convert datetime object `(1970, 1, 1)` to seconds",
    "cmd": "(t - datetime.datetime(1970, 1, 1)).total_seconds()",
    "question_id": "7852855-99",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.datetime#datetime.timedelta.total_seconds"
    ],
    "clean_cmd": "(t - datetime.datetime(, , )).total_seconds()",
    "canonical_cmd": "(t - datetime.datetime(VAR_STR)).total_seconds()"
  },
  {
    "nl": "insert `_suff` before the file extension in `long.file.name.jpg` or replace `_a` with `suff` if it precedes the extension.",
    "cmd": "re.sub('(\\\\_a)?\\\\.([^\\\\.]*)$', '_suff.\\\\2', 'long.file.name.jpg')",
    "question_id": "2763750-47",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "re.sub(, , )",
    "canonical_cmd": "re.sub('(\\\\_a)?\\\\.([^\\\\.]*)$', '_suff.\\\\2', 'VAR_STR')"
  },
  {
    "nl": "reload a module `module`",
    "cmd": "import imp\nimp.reload(module)",
    "question_id": "6420361-29",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.importlib#importlib.reload"
    ],
    "clean_cmd": "import impimp.reload(module)",
    "canonical_cmd": "import imp\nimp.reload(VAR_STR)"
  },
  {
    "nl": "convert int values in list `numlist` to float",
    "cmd": "numlist = [float(x) for x in numlist]",
    "question_id": "9746522-1",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#float"
    ],
    "clean_cmd": "numlist = [float(x)  x  numlist]",
    "canonical_cmd": "VAR_STR = [float(x) for x in VAR_STR]"
  },
  {
    "nl": "Create a list containing all ascii characters as its elements",
    "cmd": "[chr(i) for i in range(127)]",
    "question_id": "5891453-34",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#chr",
      "python.library.functions#range"
    ],
    "clean_cmd": "[chr(i)  i  range()]",
    "canonical_cmd": "[chr(i) for i in range(127)]"
  },
  {
    "nl": "python regex - check for a capital letter with a following lowercase in string `string`",
    "cmd": "re.sub('^[A-Z0-9]*(?![a-z])', '', string)",
    "question_id": "21805490-69",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.sub"
    ],
    "clean_cmd": "re.sub(, , string)",
    "canonical_cmd": "re.sub('^[A-Z0-9]*(?![a-z])', '', VAR_STR)"
  },
  {
    "nl": "write line \"hi there\" to file `f`",
    "cmd": "print('hi there', file=f)",
    "question_id": "6159900-4",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "print(, file=f)",
    "canonical_cmd": "print('VAR_STR', file=VAR_STR)"
  },
  {
    "nl": "write line \"hi there\" to file `myfile`",
    "cmd": "f = open('myfile', 'w')\nf.write('hi there\\n')\nf.close()",
    "question_id": "6159900-27",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.request#open",
      "python.library.os#os.write"
    ],
    "clean_cmd": "f = open(, )f.write()f.close()",
    "canonical_cmd": "f = open('VAR_STR', 'w')\nf.write('hi there\\n')\nf.close()"
  },
  {
    "nl": "write line \"Hello\" to file `somefile.txt`",
    "cmd": "with open('somefile.txt', 'a') as the_file:\n    the_file.write('Hello\\n')",
    "question_id": "6159900-68",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.request#open",
      "python.library.os#os.write"
    ],
    "clean_cmd": " open(, )  the_file:the_file.write()",
    "canonical_cmd": "with open('VAR_STR', 'a') as the_file:\n    the_file.write('Hello\\n')"
  },
  {
    "nl": "convert unicode string `s` to ascii",
    "cmd": "s.encode('iso-8859-15')",
    "question_id": "19527279-73",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.encode"
    ],
    "clean_cmd": "s.encode()",
    "canonical_cmd": "VAR_STR.encode('iso-8859-15')"
  },
  {
    "nl": "Find all numbers and dots from a string `text` using regex",
    "cmd": "re.findall('Test([0-9.]*[0-9]+)', text)",
    "question_id": "356483-47",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "re.findall(, text)",
    "canonical_cmd": "re.findall('Test([0-9.]*[0-9]+)', VAR_STR)"
  },
  {
    "nl": "python regex  to find all numbers and dots from 'text'",
    "cmd": "re.findall('Test([\\\\d.]*\\\\d+)', text)",
    "question_id": "356483-68",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "re.findall(, text)",
    "canonical_cmd": "re.findall('Test([\\\\d.]*\\\\d+)', VAR_STR)"
  },
  {
    "nl": "execute script 'script.ps1' using 'powershell.exe' shell",
    "cmd": "os.system('powershell.exe', 'script.ps1')",
    "question_id": "38081866-23",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.os#os.system"
    ],
    "clean_cmd": "os.system(, )",
    "canonical_cmd": "os.system('VAR_STR', 'VAR_STR')"
  },
  {
    "nl": "Sort a list of tuples `b` by third item in the tuple",
    "cmd": "b.sort(key=lambda x: x[1][2])",
    "question_id": "7349646-41",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#list.sort"
    ],
    "clean_cmd": "b.sort(key= x: x[][])",
    "canonical_cmd": "VAR_STR.sort(key=lambda x: x[1][2])"
  },
  {
    "nl": "get a list of all keys in Cassandra database `cf` with pycassa",
    "cmd": "list(cf.get_range().get_keys())",
    "question_id": "2430539-37",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#list"
    ],
    "clean_cmd": "list(cf.get_range().get_keys())",
    "canonical_cmd": "list(VAR_STR.get_range().get_keys())"
  },
  {
    "nl": "get the index of an integer `1` from a list `lst` if the list also contains boolean items",
    "cmd": "next(i for i, x in enumerate(lst) if not isinstance(x, bool) and x == 1)",
    "question_id": "30843103-34",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#isinstance",
      "python.library.functions#enumerate",
      "python.library.functions#next"
    ],
    "clean_cmd": "next(i  i, x  enumerate(lst)   isinstance(x, bool)  x == )",
    "canonical_cmd": "next(i for i, x in enumerate(VAR_STR) if not isinstance(x, bool) and x == 1)"
  },
  {
    "nl": "subtract 13 from every number in a list `a`",
    "cmd": "a[:] = [(x - 13) for x in a]",
    "question_id": "4918425-91",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "a[:] = [(x - )  x  a]",
    "canonical_cmd": "VAR_STR[:] = [(x - 13) for x in VAR_STR]"
  },
  {
    "nl": "choose a random file from the directory contents of the C drive, `C:\\\\`",
    "cmd": "random.choice(os.listdir('C:\\\\'))",
    "question_id": "701402-88",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.random#random.choice",
      "python.library.os#os.listdir"
    ],
    "clean_cmd": "random.choice(os.listdir())",
    "canonical_cmd": "random.choice(os.listdir('VAR_STR'))"
  },
  {
    "nl": "Get all urls within text `s`",
    "cmd": "re.findall('\"(http.*?)\"', s, re.MULTILINE | re.DOTALL)",
    "question_id": "30551576-22",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "re.findall(, s, re.MULTILINE | re.DOTALL)",
    "canonical_cmd": "re.findall('\"(http.*?)\"', VAR_STR, re.MULTILINE | re.DOTALL)"
  },
  {
    "nl": "match urls whose domain doesn't start with `t` from string `document` using regex",
    "cmd": "re.findall('http://[^t][^s\"]+\\\\.html', document)",
    "question_id": "30551576-58",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.re#re.findall"
    ],
    "clean_cmd": "re.findall(, document)",
    "canonical_cmd": "re.findall('http://[^t][^s\"]+\\\\.html', VAR_STR)"
  },
  {
    "nl": "split a string `mystring` considering the spaces ' '",
    "cmd": "mystring.replace(' ', '! !').split('!')",
    "question_id": "113534-21",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.stdtypes#str.replace",
      "python.library.stdtypes#str.split"
    ],
    "clean_cmd": "mystring.replace(, ).split()",
    "canonical_cmd": "VAR_STR.replace(' ', '! !').split('!')"
  },
  {
    "nl": "open file `path` with mode 'r'",
    "cmd": "open(path, 'r')",
    "question_id": "5838735-0",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.urllib.request#open"
    ],
    "clean_cmd": "open(path, )",
    "canonical_cmd": "open(VAR_STR, 'VAR_STR')"
  },
  {
    "nl": "sum elements at the same index in list `data`",
    "cmd": "[[sum(item) for item in zip(*items)] for items in zip(*data)]",
    "question_id": "36003967-69",
    "cmd_name": "conala",
    "oracle_man": [
      "python.library.functions#zip",
      "python.library.functions#sum"
    ],
    "clean_cmd": "[[sum(item)  item  zip(*items)]  items  zip(*data)]",
    "canonical_cmd": "[[sum(item) for item in zip(*items)] for items in zip(*VAR_STR)]"
  },
  {
    "nl": "add a new axis to array `a`",
    "cmd": "a[:, (np.newaxis)]",
    "question_id": "7635237-89",
    "cmd_name": "conala",
    "oracle_man": [],
    "clean_cmd": "a[:, (np.newaxis)]",
    "canonical_cmd": "VAR_STR[:, (np.newaxis)]"
  }
]