[
  {
    "db_id": "bike_racing",
    "question": "How many bikes are heavier than 780 grams?",
    "query": "SELECT count(*) FROM bike WHERE weight  >  780",
    "query_output": 3,
    "converted": {
      "jq": {
        "kind": "success",
        "jq": [
          "[.cyclists[].owned_bikes[].bike.weight] \n| unique \n| map(select(. > 780)) \n| length",
          "[.cyclists[].owned_bikes[].bike]\n| unique_by(.id)\n| map(select(.weight > 780))\n| length"
        ],
        "candidates": {
          "[.cyclists[]?.owned_bikes[]?.bike? | select(.weight > 780)] | length": [
            9
          ],
          "[.cyclists[].owned_bikes[].bike | select(.weight > 780)] | length": [
            9
          ],
          "[.cyclists[].owned_bikes[].bike.weight] \n| unique \n| map(select(. > 780)) \n| length": [
            3
          ],
          "[.cyclists[].owned_bikes[].bike.id as $id \n  | {id: $id, weight: .cyclists[].owned_bikes[].bike.weight}] \n| unique_by(.id) \n| map(select(.weight > 780)) \n| length": [
            5
          ],
          "[.cyclists[].owned_bikes[].bike]\n| unique_by(.id)\n| map(select(.weight > 780))\n| length": [
            3
          ]
        }
      }
    }
  },
  {
    "db_id": "bike_racing",
    "question": "List the product names and weights of the bikes in ascending order of price.",
    "query": "SELECT product_name ,  weight FROM bike ORDER BY price ASC",
    "query_output": [
      {
        "product_name": "CANYON AEROAD CF SLX 8.0 DI2",
        "weight": 880
      },
      {
        "product_name": "Ibis",
        "weight": 800
      },
      {
        "product_name": "Ibis ||",
        "weight": 760
      },
      {
        "product_name": "CANNONDALE SUPERSIX EVO HI-MOD DURA ACE",
        "weight": 850
      },
      {
        "product_name": "GIANT TCR ADVANCED SL 0",
        "weight": 750
      },
      {
        "product_name": "BIANCHI SPECIALISSIMA",
        "weight": 780
      }
    ],
    "converted": {
      "jq": {
        "kind": "failure",
        "jq": [],
        "candidates": {
          "[\n  .cyclists[]\n  | .owned_bikes[]\n  | .bike\n]\n| unique_by(.id)\n| sort_by(.price)\n| map({product_name, weight})": [
            [
              {
                "product_name": "CANYON AEROAD CF SLX 8.0 DI2",
                "weight": 880
              },
              {
                "product_name": "Ibis",
                "weight": 800
              },
              {
                "product_name": "CANNONDALE SUPERSIX EVO HI-MOD DURA ACE",
                "weight": 850
              },
              {
                "product_name": "GIANT TCR ADVANCED SL 0",
                "weight": 750
              },
              {
                "product_name": "BIANCHI SPECIALISSIMA",
                "weight": 780
              }
            ]
          ],
          "[\n  .cyclists[]\n  | .owned_bikes[]\n  | .bike\n]\n| unique_by(.id)\n| sort_by(.price)\n| map({product_name, weight})\n| .[]": [
            {
              "product_name": "CANYON AEROAD CF SLX 8.0 DI2",
              "weight": 880
            },
            {
              "product_name": "Ibis",
              "weight": 800
            },
            {
              "product_name": "CANNONDALE SUPERSIX EVO HI-MOD DURA ACE",
              "weight": 850
            },
            {
              "product_name": "GIANT TCR ADVANCED SL 0",
              "weight": 750
            },
            {
              "product_name": "BIANCHI SPECIALISSIMA",
              "weight": 780
            }
          ]
        }
      },
      "python": {
        "kind": "failure",
        "python": [],
        "candidates": {
          "def transform(data):\n    # Set for unique bike IDs to avoid duplicates, if needed\n    bikes = []\n    seen = set()  # (bike_id)\n\n    for cyclist in data.get(\"cyclists\", []):\n        for owned in cyclist.get(\"owned_bikes\", []):\n            bike = owned.get(\"bike\", {})\n            bike_id = bike.get(\"id\")\n            # Prevent duplicates if same bike appears multiple times\n            if bike_id not in seen:\n                seen.add(bike_id)\n                bikes.append({\n                    \"product_name\": bike.get(\"product_name\"),\n                    \"weight\": bike.get(\"weight\"),\n                    \"price\": bike.get(\"price\")\n                })\n\n    # Sort bikes by price ascending\n    bikes_sorted = sorted(bikes, key=lambda x: x[\"price\"])\n    # Return only product_name and weight\n    result = [\n        {\"product_name\": b[\"product_name\"], \"weight\": b[\"weight\"]}\n        for b in bikes_sorted\n    ]\n    return result": [
            {
              "product_name": "CANYON AEROAD CF SLX 8.0 DI2",
              "weight": 880
            },
            {
              "product_name": "Ibis",
              "weight": 800
            },
            {
              "product_name": "CANNONDALE SUPERSIX EVO HI-MOD DURA ACE",
              "weight": 850
            },
            {
              "product_name": "GIANT TCR ADVANCED SL 0",
              "weight": 750
            },
            {
              "product_name": "BIANCHI SPECIALISSIMA",
              "weight": 780
            }
          ],
          "def transform(data):\n    # Collect all unique bikes by id\n    bike_map = {}\n    for cyclist in data.get(\"cyclists\", []):\n        for owned in cyclist.get(\"owned_bikes\", []):\n            bike = owned.get(\"bike\", {})\n            bike_id = bike.get(\"id\")\n            if bike_id not in bike_map:\n                bike_map[bike_id] = bike\n\n    # Extract relevant info and sort by price ascending\n    bike_list = list(bike_map.values())\n    bike_list_sorted = sorted(bike_list, key=lambda x: x[\"price\"])\n    return [\n        {\"product_name\": b[\"product_name\"], \"weight\": b[\"weight\"]}\n        for b in bike_list_sorted\n    ]": [
            {
              "product_name": "CANYON AEROAD CF SLX 8.0 DI2",
              "weight": 880
            },
            {
              "product_name": "Ibis",
              "weight": 800
            },
            {
              "product_name": "CANNONDALE SUPERSIX EVO HI-MOD DURA ACE",
              "weight": 850
            },
            {
              "product_name": "GIANT TCR ADVANCED SL 0",
              "weight": 750
            },
            {
              "product_name": "BIANCHI SPECIALISSIMA",
              "weight": 780
            }
          ],
          "def transform(data):\n    # Build a list of unique bikes by (id, product_name, weight, price, material)\n    # in case different bikes have the same id in the edge case\n    bikes = {}\n    for cyclist in data.get(\"cyclists\", []):\n        for owned in cyclist.get(\"owned_bikes\", []):\n            bike = owned.get(\"bike\", {})\n            bike_key = bike.get(\"id\")\n            if bike_key is not None and bike_key not in bikes:\n                bikes[bike_key] = {\n                    \"product_name\": bike.get(\"product_name\"),\n                    \"weight\": bike.get(\"weight\"),\n                    \"price\": bike.get(\"price\")\n                }\n\n    # Sort the bikes by price ASC\n    bikes_sorted = sorted(bikes.values(), key=lambda b: (b[\"price\"], b[\"product_name\"]))\n    # Output only required fields\n    return [\n        {\"product_name\": b[\"product_name\"], \"weight\": b[\"weight\"]}\n        for b in bikes_sorted\n    ]": [
            {
              "product_name": "CANYON AEROAD CF SLX 8.0 DI2",
              "weight": 880
            },
            {
              "product_name": "Ibis",
              "weight": 800
            },
            {
              "product_name": "CANNONDALE SUPERSIX EVO HI-MOD DURA ACE",
              "weight": 850
            },
            {
              "product_name": "GIANT TCR ADVANCED SL 0",
              "weight": 750
            },
            {
              "product_name": "BIANCHI SPECIALISSIMA",
              "weight": 780
            }
          ],
          "def transform(data):\n    # The SQL query just queries all bikes, not just unique bikes;\n    # But in this JSON, bikes are only present inside cyclists->owned_bikes->bike,\n    # so we must collect all such bikes as in the \"bike\" table, de-duplicated by all their fields (or by id).\n    # Since the output is matching, the original logic is correct, but let's ensure we're not missing anything else:\n    # We will simply collect all bikes by id (as a \"table\") and order by price as requested.\n    bike_table = []\n    seen_ids = set()\n    for cyclist in data.get(\"cyclists\", []):\n        for owned in cyclist.get(\"owned_bikes\", []):\n            bike = owned.get(\"bike\", {})\n            bid = bike.get(\"id\")\n            if bid is not None and bid not in seen_ids:\n                seen_ids.add(bid)\n                bike_table.append(bike)\n    # order by price ascending\n    bike_table.sort(key=lambda x: x[\"price\"])\n    return [{\"product_name\": b[\"product_name\"], \"weight\": b[\"weight\"]} for b in bike_table]": [
            {
              "product_name": "CANYON AEROAD CF SLX 8.0 DI2",
              "weight": 880
            },
            {
              "product_name": "Ibis",
              "weight": 800
            },
            {
              "product_name": "CANNONDALE SUPERSIX EVO HI-MOD DURA ACE",
              "weight": 850
            },
            {
              "product_name": "GIANT TCR ADVANCED SL 0",
              "weight": 750
            },
            {
              "product_name": "BIANCHI SPECIALISSIMA",
              "weight": 780
            }
          ]
        }
      }
    }
  },
  {
    "db_id": "bike_racing",
    "question": "List the heat, name, and nation for all the cyclists.",
    "query": "SELECT heat ,  name ,  nation FROM cyclist",
    "query_output": [
      {
        "heat": 4,
        "name": "Bradley Wiggins",
        "nation": "Great Britain"
      },
      {
        "heat": 3,
        "name": "Hayden Roulston",
        "nation": "New Zealand"
      },
      {
        "heat": 1,
        "name": "Steven Burke",
        "nation": "Great Britain"
      },
      {
        "heat": 2,
        "name": "Alexei Markov",
        "nation": "Russia"
      },
      {
        "heat": 1,
        "name": "Volodymyr Dyudya",
        "nation": "Ukraine"
      },
      {
        "heat": 2,
        "name": "Antonio Tauler",
        "nation": "Spain"
      },
      {
        "heat": 4,
        "name": "Alexander Serov",
        "nation": "Russia"
      },
      {
        "heat": 3,
        "name": "Taylor Phinney",
        "nation": "United States"
      }
    ],
    "converted": {
      "jq": {
        "kind": "success",
        "jq": [
          ".cyclists | map({heat, name, nation})[]"
        ],
        "candidates": {
          ".cyclists | map({heat, name, nation})": [
            [
              {
                "heat": 4,
                "name": "Bradley Wiggins",
                "nation": "Great Britain"
              },
              {
                "heat": 3,
                "name": "Hayden Roulston",
                "nation": "New Zealand"
              },
              {
                "heat": 1,
                "name": "Steven Burke",
                "nation": "Great Britain"
              },
              {
                "heat": 2,
                "name": "Alexei Markov",
                "nation": "Russia"
              },
              {
                "heat": 1,
                "name": "Volodymyr Dyudya",
                "nation": "Ukraine"
              },
              {
                "heat": 2,
                "name": "Antonio Tauler",
                "nation": "Spain"
              },
              {
                "heat": 4,
                "name": "Alexander Serov",
                "nation": "Russia"
              },
              {
                "heat": 3,
                "name": "Taylor Phinney",
                "nation": "United States"
              }
            ]
          ],
          ".cyclists | map({heat, name, nation}) | .": [
            [
              {
                "heat": 4,
                "name": "Bradley Wiggins",
                "nation": "Great Britain"
              },
              {
                "heat": 3,
                "name": "Hayden Roulston",
                "nation": "New Zealand"
              },
              {
                "heat": 1,
                "name": "Steven Burke",
                "nation": "Great Britain"
              },
              {
                "heat": 2,
                "name": "Alexei Markov",
                "nation": "Russia"
              },
              {
                "heat": 1,
                "name": "Volodymyr Dyudya",
                "nation": "Ukraine"
              },
              {
                "heat": 2,
                "name": "Antonio Tauler",
                "nation": "Spain"
              },
              {
                "heat": 4,
                "name": "Alexander Serov",
                "nation": "Russia"
              },
              {
                "heat": 3,
                "name": "Taylor Phinney",
                "nation": "United States"
              }
            ]
          ],
          ".cyclists | map({heat, name, nation})[]": [
            {
              "heat": 4,
              "name": "Bradley Wiggins",
              "nation": "Great Britain"
            },
            {
              "heat": 3,
              "name": "Hayden Roulston",
              "nation": "New Zealand"
            },
            {
              "heat": 1,
              "name": "Steven Burke",
              "nation": "Great Britain"
            },
            {
              "heat": 2,
              "name": "Alexei Markov",
              "nation": "Russia"
            },
            {
              "heat": 1,
              "name": "Volodymyr Dyudya",
              "nation": "Ukraine"
            },
            {
              "heat": 2,
              "name": "Antonio Tauler",
              "nation": "Spain"
            },
            {
              "heat": 4,
              "name": "Alexander Serov",
              "nation": "Russia"
            },
            {
              "heat": 3,
              "name": "Taylor Phinney",
              "nation": "United States"
            }
          ]
        }
      }
    }
  },
  {
    "db_id": "bike_racing",
    "question": "What are the maximum and minimum weight of all bikes?",
    "query": "SELECT max(weight) ,  min(weight) FROM bike",
    "query_output": {
      "max(weight)": 880,
      "min(weight)": 750
    },
    "converted": {
      "jq": {
        "kind": "success",
        "jq": [
          "{\n  \"max(weight)\": (.cyclists\n    | map(.owned_bikes[]?.bike.weight)\n    | flatten\n    | max),\n  \"min(weight)\": (.cyclists\n    | map(.owned_bikes[]?.bike.weight)\n    | flatten\n    | min)\n}"
        ],
        "candidates": {
          "{\n  \"max(weight)\": (.cyclists\n    | map(.owned_bikes[]?.bike.weight)\n    | flatten\n    | max),\n  \"min(weight)\": (.cyclists\n    | map(.owned_bikes[]?.bike.weight)\n    | flatten\n    | min)\n}": [
            {
              "max(weight)": 880,
              "min(weight)": 750
            }
          ]
        }
      }
    }
  },
  {
    "db_id": "bike_racing",
    "question": "What is the average price of the bikes made of material 'Carbon CC'?",
    "query": "SELECT avg(price) FROM bike WHERE material  =  'Carbon CC'",
    "query_output": 7532.666666666667,
    "converted": {
      "jq": {
        "kind": "failure",
        "jq": [],
        "candidates": {
          "[\n  (\n    [ .cyclists[]?.owned_bikes[]?.bike? | select(.material == \"Carbon CC\") | .price ] as $prices\n    | if ($prices|length) > 0 then ($prices|add)/($prices|length) else null end\n  )\n][0]": [
            6710.555555555556
          ],
          "[.cyclists[].owned_bikes[].bike | select(.material == \"Carbon CC\") | .price] as $prices | ($prices | add / ($prices | length))": [
            6710.555555555556
          ],
          "[.cyclists[].owned_bikes[].bike | select(.material == \"Carbon CC\") | .price] as $prices\n| ($prices | length as $len | if $len > 0 then ($prices | map(select(. != null)) | add) / $len else null end)": [
            6710.555555555556
          ],
          "[.cyclists[].owned_bikes[].bike | select(.material == \"Carbon CC\") | .price] \n| if length == 0 then null else (map(select(. != null)) | add / length) end": [
            6710.555555555556
          ]
        }
      },
      "python": {
        "kind": "success",
        "python": [
          "def transform(data):\n    # Collect unique bikes with material 'Carbon CC'\n    unique_bikes = {}\n    for cyclist in data.get(\"cyclists\", []):\n        for owned_bike in cyclist.get(\"owned_bikes\", []):\n            bike = owned_bike.get(\"bike\", {})\n            if bike.get(\"material\") == \"Carbon CC\":\n                bike_id = bike[\"id\"]\n                if bike_id not in unique_bikes:\n                    unique_bikes[bike_id] = bike[\"price\"]\n    prices = list(unique_bikes.values())\n    if not prices:\n        return None\n    return sum(prices) / len(prices)"
        ],
        "candidates": {
          "def transform(data):\n    prices = []\n    for cyclist in data.get(\"cyclists\", []):\n        for owned_bike in cyclist.get(\"owned_bikes\", []):\n            bike = owned_bike.get(\"bike\", {})\n            if bike.get(\"material\") == \"Carbon CC\":\n                prices.append(bike.get(\"price\", 0))\n    if not prices:\n        return None\n    return sum(prices) / len(prices)": 6710.555555555556,
          "def transform(data):\n    # Collect unique bikes with material 'Carbon CC'\n    unique_bikes = {}\n    for cyclist in data.get(\"cyclists\", []):\n        for owned_bike in cyclist.get(\"owned_bikes\", []):\n            bike = owned_bike.get(\"bike\", {})\n            if bike.get(\"material\") == \"Carbon CC\":\n                bike_id = bike[\"id\"]\n                if bike_id not in unique_bikes:\n                    unique_bikes[bike_id] = bike[\"price\"]\n    prices = list(unique_bikes.values())\n    if not prices:\n        return None\n    return sum(prices) / len(prices)": 7532.666666666667
        }
      }
    }
  },
  {
    "db_id": "bike_racing",
    "question": "What are the name and result of the cyclists not from 'Russia' ?",
    "query": "SELECT name ,  RESULT FROM cyclist WHERE nation != 'Russia'",
    "query_output": [
      {
        "name": "Bradley Wiggins",
        "result": "4:16.571"
      },
      {
        "name": "Hayden Roulston",
        "result": "4:19.232"
      },
      {
        "name": "Steven Burke",
        "result": "4:21.558"
      },
      {
        "name": "Volodymyr Dyudya",
        "result": "4:22.471"
      },
      {
        "name": "Antonio Tauler",
        "result": "4:24.974"
      },
      {
        "name": "Taylor Phinney",
        "result": "4:26.644"
      }
    ],
    "converted": {
      "jq": {
        "kind": "failure",
        "jq": [],
        "candidates": {
          ".cyclists \n| map(select(.nation != \"Russia\") | {name, result})": [
            [
              {
                "name": "Bradley Wiggins",
                "result": 256.571
              },
              {
                "name": "Hayden Roulston",
                "result": 259.23199999999997
              },
              {
                "name": "Steven Burke",
                "result": 261.558
              },
              {
                "name": "Volodymyr Dyudya",
                "result": 262.471
              },
              {
                "name": "Antonio Tauler",
                "result": 264.974
              },
              {
                "name": "Taylor Phinney",
                "result": 266.644
              }
            ]
          ],
          ".cyclists \n| map(select(.nation != \"Russia\") | {name, result: (.result | tostring)})": [
            [
              {
                "name": "Bradley Wiggins",
                "result": "256.571"
              },
              {
                "name": "Hayden Roulston",
                "result": "259.23199999999997"
              },
              {
                "name": "Steven Burke",
                "result": "261.558"
              },
              {
                "name": "Volodymyr Dyudya",
                "result": "262.471"
              },
              {
                "name": "Antonio Tauler",
                "result": "264.974"
              },
              {
                "name": "Taylor Phinney",
                "result": "266.644"
              }
            ]
          ]
        }
      },
      "python": {
        "kind": "success",
        "python": [
          "def transform(data):\n    output = []\n    for cyclist in data.get(\"cyclists\", []):\n        if cyclist.get(\"nation\") != \"Russia\":\n            name = cyclist.get(\"name\")\n            res = cyclist.get(\"result\")\n            # convert from float seconds to M:SS.mmm string if necessary\n            if isinstance(res, (int, float)):\n                minutes = int(res // 60)\n                seconds = res - (minutes * 60)\n                result_str = f\"{minutes}:{seconds:06.3f}\"\n            else:\n                result_str = res\n            output.append({\n                \"name\": name,\n                \"result\": result_str\n            })\n    return output"
        ],
        "candidates": {
          "def transform(data):\n    output = []\n    for cyclist in data.get(\"cyclists\", []):\n        if cyclist.get(\"nation\") != \"Russia\":\n            output.append({\n                \"name\": cyclist.get(\"name\"),\n                \"result\": cyclist.get(\"result\")\n            })\n    return output": [
            {
              "name": "Bradley Wiggins",
              "result": 256.571
            },
            {
              "name": "Hayden Roulston",
              "result": 259.23199999999997
            },
            {
              "name": "Steven Burke",
              "result": 261.558
            },
            {
              "name": "Volodymyr Dyudya",
              "result": 262.471
            },
            {
              "name": "Antonio Tauler",
              "result": 264.974
            },
            {
              "name": "Taylor Phinney",
              "result": 266.644
            }
          ],
          "def transform(data):\n    output = []\n    for cyclist in data.get(\"cyclists\", []):\n        if cyclist.get(\"nation\") != \"Russia\":\n            name = cyclist.get(\"name\")\n            res = cyclist.get(\"result\")\n            # convert from float seconds to M:SS.mmm string if necessary\n            if isinstance(res, (int, float)):\n                minutes = int(res // 60)\n                seconds = res - (minutes * 60)\n                result_str = f\"{minutes}:{seconds:06.3f}\"\n            else:\n                result_str = res\n            output.append({\n                \"name\": name,\n                \"result\": result_str\n            })\n    return output": [
            {
              "name": "Bradley Wiggins",
              "result": "4:16.571"
            },
            {
              "name": "Hayden Roulston",
              "result": "4:19.232"
            },
            {
              "name": "Steven Burke",
              "result": "4:21.558"
            },
            {
              "name": "Volodymyr Dyudya",
              "result": "4:22.471"
            },
            {
              "name": "Antonio Tauler",
              "result": "4:24.974"
            },
            {
              "name": "Taylor Phinney",
              "result": "4:26.644"
            }
          ]
        }
      }
    }
  },
  {
    "db_id": "bike_racing",
    "question": "What are the distinct ids and product names of the bikes that are purchased after year 2015?",
    "query": "SELECT DISTINCT T1.id ,  T1.product_name FROM bike AS T1 JOIN cyclists_own_bikes AS T2 ON T1.id  =  T2.bike_id WHERE T2.purchase_year  >  2015",
    "query_output": [
      {
        "id": 3,
        "product_name": "CANYON AEROAD CF SLX 8.0 DI2"
      },
      {
        "id": 4,
        "product_name": "GIANT TCR ADVANCED SL 0"
      },
      {
        "id": 5,
        "product_name": "Ibis"
      }
    ],
    "converted": {
      "jq": {
        "kind": "success",
        "jq": [
          "[.cyclists[]\n | .owned_bikes[]\n | select(.purchase_year > 2015)\n | {id: .bike.id, product_name: .bike.product_name}]\n | unique[]"
        ],
        "candidates": {
          "[\n  .cyclists[]\n  .owned_bikes[]\n  | select(.purchase_year > 2015)\n  .bike\n  | {id, product_name}\n]\n| unique": [
            [
              {
                "id": 3,
                "product_name": "CANYON AEROAD CF SLX 8.0 DI2"
              },
              {
                "id": 4,
                "product_name": "GIANT TCR ADVANCED SL 0"
              },
              {
                "id": 5,
                "product_name": "Ibis"
              }
            ]
          ],
          "[.cyclists[]\n | .owned_bikes[]\n | select(.purchase_year > 2015)\n | {id: .bike.id, product_name: .bike.product_name}]\n | unique": [
            [
              {
                "id": 3,
                "product_name": "CANYON AEROAD CF SLX 8.0 DI2"
              },
              {
                "id": 4,
                "product_name": "GIANT TCR ADVANCED SL 0"
              },
              {
                "id": 5,
                "product_name": "Ibis"
              }
            ]
          ],
          "[.cyclists[]\n | .owned_bikes[]\n | select(.purchase_year > 2015)\n | {id: .bike.id, product_name: .bike.product_name}]\n | unique[]": [
            {
              "id": 3,
              "product_name": "CANYON AEROAD CF SLX 8.0 DI2"
            },
            {
              "id": 4,
              "product_name": "GIANT TCR ADVANCED SL 0"
            },
            {
              "id": 5,
              "product_name": "Ibis"
            }
          ],
          "[.cyclists[]\n | .owned_bikes[]\n | select(.purchase_year > 2015)\n | {id: .bike.id, product_name: .bike.product_name}]\n | unique_by(.id, .product_name)": [
            [
              {
                "id": 3,
                "product_name": "CANYON AEROAD CF SLX 8.0 DI2"
              },
              {
                "id": 4,
                "product_name": "GIANT TCR ADVANCED SL 0"
              },
              {
                "id": 5,
                "product_name": "Ibis"
              }
            ]
          ]
        }
      }
    }
  },
  {
    "db_id": "bike_racing",
    "question": "What are the ids and names of racing bikes that are purchased by at least 4 cyclists?",
    "query": "SELECT T1.id ,  T1.product_name FROM bike AS T1 JOIN cyclists_own_bikes AS T2 ON T1.id  =  T2.bike_id GROUP BY T1.id HAVING count(*)  >=  4",
    "query_output": [
      {
        "id": 4,
        "product_name": "GIANT TCR ADVANCED SL 0"
      },
      {
        "id": 5,
        "product_name": "Ibis"
      }
    ],
    "converted": {
      "jq": {
        "kind": "failure",
        "jq": [],
        "candidates": {
          "[\n  .cyclists\n  | map(.owned_bikes[]?.bike)\n  | map(select(. != null))\n  | group_by(.id)\n  | map({\n      id: .[0].id,\n      product_name: .[0].product_name,\n      count: length\n    })\n  | map(select(.count >= 4))\n  | map({id, product_name})\n]\n| .[0]": [
            [
              {
                "id": 4,
                "product_name": "GIANT TCR ADVANCED SL 0"
              },
              {
                "id": 5,
                "product_name": "Ibis"
              }
            ]
          ],
          ".cyclists\n| map(.owned_bikes[]?.bike)\n| map(select(. != null))\n| group_by(.id)\n| map(select(length >= 4) | {id: .[0].id, product_name: .[0].product_name})": [
            [
              {
                "id": 4,
                "product_name": "GIANT TCR ADVANCED SL 0"
              },
              {
                "id": 5,
                "product_name": "Ibis"
              }
            ]
          ],
          ".cyclists\n| map(.owned_bikes[]?.bike)\n| map(select(. != null))\n| group_by(.id)\n| map(select(length >= 4) | {id: .[0].id, product_name: .[0].product_name})\n| .": [
            [
              {
                "id": 4,
                "product_name": "GIANT TCR ADVANCED SL 0"
              },
              {
                "id": 5,
                "product_name": "Ibis"
              }
            ]
          ],
          ".cyclists\n| map(.owned_bikes[]?.bike)\n| map(select(. != null))\n| group_by(.id)\n| map(select(length >= 4) | {id: .[0].id, product_name: .[0].product_name})\n| flatten": [
            [
              {
                "id": 4,
                "product_name": "GIANT TCR ADVANCED SL 0"
              },
              {
                "id": 5,
                "product_name": "Ibis"
              }
            ]
          ],
          ".cyclists\n| map(.owned_bikes[]?.bike)\n| map(select(. != null))\n| group_by(.id)\n| map(select(length >= 4) | {id: .[0].id, product_name: .[0].product_name})\n| add": [
            {
              "id": 5,
              "product_name": "Ibis"
            }
          ],
          "[ .cyclists[] .owned_bikes[]? .bike ]\n| map(select(. != null))\n| group_by(.id)\n| map(select(length >= 4)[0] | {id, product_name})": [
            [
              {
                "id": 4,
                "product_name": "GIANT TCR ADVANCED SL 0"
              },
              {
                "id": 5,
                "product_name": "Ibis"
              }
            ]
          ],
          ".cyclists\n| map(.owned_bikes[]?.bike)\n| flatten\n| group_by(.id)\n| map(select(length >= 4) | {id: .[0].id, product_name: .[0].product_name})": [
            [
              {
                "id": 4,
                "product_name": "GIANT TCR ADVANCED SL 0"
              },
              {
                "id": 5,
                "product_name": "Ibis"
              }
            ]
          ],
          "[.cyclists[] | .owned_bikes[]? | .bike ]\n| group_by(.id)\n| map(select(length >= 4) | {id: .[0].id, product_name: .[0].product_name})": [
            [
              {
                "id": 4,
                "product_name": "GIANT TCR ADVANCED SL 0"
              },
              {
                "id": 5,
                "product_name": "Ibis"
              }
            ]
          ]
        }
      },
      "python": {
        "kind": "success",
        "python": [
          "def transform(data):\n    from collections import defaultdict\n\n    bike_id_to_name = {}\n    bike_count = defaultdict(set)  # bike_id -> set of cyclist_ids who own it\n\n    for cyclist in data['cyclists']:\n        cyclist_id = cyclist['id']\n        for owned in cyclist.get('owned_bikes', []):\n            bike = owned['bike']\n            bike_id = bike['id']\n            bike_id_to_name[bike_id] = bike['product_name']\n            bike_count[bike_id].add(cyclist_id)\n\n    result = []\n    for bike_id, cyclist_ids in bike_count.items():\n        if len(cyclist_ids) >= 4:\n            result.append({\n                \"id\": bike_id,\n                \"product_name\": bike_id_to_name[bike_id]\n            })\n\n    # To ensure consistent output order for testing/comparison, sort by id\n    result.sort(key=lambda x: x['id'])\n    return result"
        ],
        "candidates": {
          "def transform(data):\n    from collections import defaultdict\n\n    bike_id_to_name = {}\n    bike_count = defaultdict(set)  # bike_id -> set of cyclist_ids who own it\n\n    for cyclist in data['cyclists']:\n        cyclist_id = cyclist['id']\n        for owned in cyclist.get('owned_bikes', []):\n            bike = owned['bike']\n            bike_id = bike['id']\n            bike_id_to_name[bike_id] = bike['product_name']\n            bike_count[bike_id].add(cyclist_id)\n\n    result = []\n    for bike_id, cyclist_ids in bike_count.items():\n        if len(cyclist_ids) >= 4:\n            result.append({\n                \"id\": bike_id,\n                \"product_name\": bike_id_to_name[bike_id]\n            })\n\n    # To ensure consistent output order for testing/comparison, sort by id\n    result.sort(key=lambda x: x['id'])\n    return result": [
            {
              "id": 4,
              "product_name": "GIANT TCR ADVANCED SL 0"
            },
            {
              "id": 5,
              "product_name": "Ibis"
            }
          ]
        }
      }
    }
  },
  {
    "db_id": "bike_racing",
    "question": "What are the id and name of the cyclist who owns the most bikes?",
    "query": "SELECT T1.id ,  T1.name FROM cyclist AS T1 JOIN cyclists_own_bikes AS T2 ON T1.id  =  T2.cyclist_id GROUP BY T1.id ORDER BY count(*) DESC LIMIT 1",
    "query_output": {
      "id": 7,
      "name": "Alexander Serov"
    },
    "converted": {
      "jq": {
        "kind": "success",
        "jq": [
          ".cyclists\n| map({id, name, bike_count: (.owned_bikes | length)})\n| max_by(.bike_count)\n| {id, name}"
        ],
        "candidates": {
          ".cyclists\n| map({id, name, bike_count: (.owned_bikes | length)})\n| max_by(.bike_count)\n| {id, name}": [
            {
              "id": 7,
              "name": "Alexander Serov"
            }
          ]
        }
      }
    }
  },
  {
    "db_id": "bike_racing",
    "question": "What are the distinct product names of bikes owned by cyclists from 'Russia' or cyclists from 'Great Britain'?",
    "query": "SELECT DISTINCT T3.product_name FROM cyclist AS T1 JOIN cyclists_own_bikes AS T2 ON T1.id  =  T2.cyclist_id JOIN bike AS T3 ON T2.bike_id  =  T3.id WHERE T1.nation  =  'Russia' OR T1.nation  =  'Great Britain'",
    "query_output": [
      "CANNONDALE SUPERSIX EVO HI-MOD DURA ACE",
      "CANYON AEROAD CF SLX 8.0 DI2",
      "GIANT TCR ADVANCED SL 0",
      "BIANCHI SPECIALISSIMA",
      "Ibis"
    ],
    "converted": {
      "jq": {
        "kind": "failure",
        "jq": [],
        "candidates": {
          ".cyclists\n| map(select(.nation == \"Russia\" or .nation == \"Great Britain\") | .owned_bikes[]?.bike.product_name)\n| flatten\n| unique": [
            [
              "BIANCHI SPECIALISSIMA",
              "CANNONDALE SUPERSIX EVO HI-MOD DURA ACE",
              "CANYON AEROAD CF SLX 8.0 DI2",
              "GIANT TCR ADVANCED SL 0",
              "Ibis"
            ]
          ],
          ".cyclists\n| map(select(.nation == \"Russia\" or .nation == \"Great Britain\"))\n| map(.owned_bikes[]?.bike.product_name)\n| flatten\n| unique": [
            [
              "BIANCHI SPECIALISSIMA",
              "CANNONDALE SUPERSIX EVO HI-MOD DURA ACE",
              "CANYON AEROAD CF SLX 8.0 DI2",
              "GIANT TCR ADVANCED SL 0",
              "Ibis"
            ]
          ],
          ".cyclists\n| [ .[] | select(.nation == \"Russia\" or .nation == \"Great Britain\") | .owned_bikes[]? | .bike.product_name ]\n| unique": [
            [
              "BIANCHI SPECIALISSIMA",
              "CANNONDALE SUPERSIX EVO HI-MOD DURA ACE",
              "CANYON AEROAD CF SLX 8.0 DI2",
              "GIANT TCR ADVANCED SL 0",
              "Ibis"
            ]
          ],
          ".cyclists\n| map(select(.nation == \"Russia\" or .nation == \"Great Britain\") | .owned_bikes[]? | .bike.product_name)\n| flatten\n| unique": [
            [
              "BIANCHI SPECIALISSIMA",
              "CANNONDALE SUPERSIX EVO HI-MOD DURA ACE",
              "CANYON AEROAD CF SLX 8.0 DI2",
              "GIANT TCR ADVANCED SL 0",
              "Ibis"
            ]
          ]
        }
      },
      "python": {
        "kind": "failure",
        "python": [],
        "candidates": {
          "def transform(data):\n    # Find cyclists from Russia or Great Britain\n    target_nations = {'Russia', 'Great Britain'}\n    product_names = set()\n    for cyclist in data.get('cyclists', []):\n        if cyclist.get('nation') in target_nations:\n            # For each owned bike, get product_name\n            for owned_bike in cyclist.get('owned_bikes', []):\n                bike = owned_bike.get('bike', {})\n                name = bike.get('product_name')\n                if name is not None:\n                    product_names.add(name)\n    return list(product_names)": [
            "CANYON AEROAD CF SLX 8.0 DI2",
            "GIANT TCR ADVANCED SL 0",
            "CANNONDALE SUPERSIX EVO HI-MOD DURA ACE",
            "BIANCHI SPECIALISSIMA",
            "Ibis"
          ],
          "def transform(data):\n    # Use an ordered set (dict) to keep insertion order (like SQL DISTINCT in SELECT order)\n    target_nations = {'Russia', 'Great Britain'}\n    ordered_product_names = {}\n    for cyclist in data.get('cyclists', []):\n        if cyclist.get('nation') in target_nations:\n            for owned_bike in cyclist.get('owned_bikes', []):\n                bike = owned_bike.get('bike', {})\n                name = bike.get('product_name')\n                if name is not None and name not in ordered_product_names:\n                    ordered_product_names[name] = None\n    return list(ordered_product_names.keys())": [
            "CANNONDALE SUPERSIX EVO HI-MOD DURA ACE",
            "CANYON AEROAD CF SLX 8.0 DI2",
            "GIANT TCR ADVANCED SL 0",
            "Ibis",
            "BIANCHI SPECIALISSIMA"
          ],
          "def transform(data):\n    # Check if cyclists list exists\n    cyclists = data.get('cyclists', [])\n    target_nations = {'Russia', 'Great Britain'}\n    seen = set()\n    result = []\n    for cyclist in cyclists:\n        if cyclist.get('nation') == 'Russia' or cyclist.get('nation') == 'Great Britain':\n            for owned_bike in cyclist.get('owned_bikes', []):\n                name = owned_bike.get('bike', {}).get('product_name')\n                if name is not None and name not in seen:\n                    result.append(name)\n                    seen.add(name)\n    return result": [
            "CANNONDALE SUPERSIX EVO HI-MOD DURA ACE",
            "CANYON AEROAD CF SLX 8.0 DI2",
            "GIANT TCR ADVANCED SL 0",
            "Ibis",
            "BIANCHI SPECIALISSIMA"
          ],
          "def transform(data):\n    # The SQL semantics: for each (cyclist X owned_bike), keep first occurrence of each product name\n    # across all qualifying cyclists, as they appear in the cyclists array order and in each cyclist's owned_bikes array order\n    result = []\n    seen = set()\n    for cyclist in data.get('cyclists', []):\n        nation = cyclist.get('nation')\n        if nation == 'Russia' or nation == 'Great Britain':\n            for owned_bike in cyclist.get('owned_bikes', []):\n                bike = owned_bike.get('bike', {})\n                name = bike.get('product_name')\n                if name and name not in seen:\n                    result.append(name)\n                    seen.add(name)\n    return result": [
            "CANNONDALE SUPERSIX EVO HI-MOD DURA ACE",
            "CANYON AEROAD CF SLX 8.0 DI2",
            "GIANT TCR ADVANCED SL 0",
            "Ibis",
            "BIANCHI SPECIALISSIMA"
          ]
        }
      }
    }
  },
  {
    "db_id": "bike_racing",
    "question": "How many different levels of heat are there for the cyclists?",
    "query": "SELECT count(DISTINCT heat) FROM cyclist",
    "query_output": 4,
    "converted": {
      "jq": {
        "kind": "success",
        "jq": [
          ".cyclists | map(.heat) | unique | length"
        ],
        "candidates": {
          ".cyclists | map(.heat) | unique | length": [
            4
          ]
        }
      }
    }
  },
  {
    "db_id": "bike_racing",
    "question": "How many cyclists did not purchase any bike after year 2015?",
    "query": "SELECT count(*) FROM cyclist WHERE id NOT IN ( SELECT cyclist_id FROM cyclists_own_bikes WHERE purchase_year  >  2015 )",
    "query_output": 3,
    "converted": {
      "jq": {
        "kind": "success",
        "jq": [
          ".cyclists\n| map(select(all(.owned_bikes[]?.purchase_year; . <= 2015) or (.owned_bikes | length == 0)))\n| length"
        ],
        "candidates": {
          ".cyclists\n| map(select(all(.owned_bikes[]?.purchase_year; . <= 2015) or (.owned_bikes | length == 0)))\n| length": [
            3
          ]
        }
      }
    }
  },
  {
    "db_id": "bike_racing",
    "question": "What are the names of distinct racing bikes that are purchased by the cyclists with better results than '4:21.558' ?",
    "query": "SELECT DISTINCT T3.product_name FROM cyclist AS T1 JOIN cyclists_own_bikes AS T2 ON T1.id  =  T2.cyclist_id JOIN bike AS T3 ON T2.bike_id  =  T3.id WHERE T1.result  <  '4:21.558'",
    "query_output": [
      "CANNONDALE SUPERSIX EVO HI-MOD DURA ACE",
      "CANYON AEROAD CF SLX 8.0 DI2",
      "GIANT TCR ADVANCED SL 0",
      "Ibis"
    ],
    "converted": {
      "jq": {
        "kind": "failure",
        "jq": [],
        "candidates": {
          ".cyclists\n| map(select(.result < 4.21558))\n| map(.owned_bikes[]?.bike.product_name)\n| flatten\n| unique": [
            []
          ],
          ".cyclists\n| map(select(.result < 4.21))\n| map(.owned_bikes[]?.bike.product_name)\n| flatten\n| unique": [
            []
          ],
          ".cyclists\n| map(select(.result < 261.558))\n| map(.owned_bikes[]?.bike.product_name)\n| flatten\n| unique": [
            [
              "CANNONDALE SUPERSIX EVO HI-MOD DURA ACE",
              "CANYON AEROAD CF SLX 8.0 DI2",
              "GIANT TCR ADVANCED SL 0",
              "Ibis"
            ]
          ],
          ".cyclists\n| map(select(.result < 261.558))\n| map(.owned_bikes[]?.bike.product_name)\n| flatten\n| unique\n| .": [
            [
              "CANNONDALE SUPERSIX EVO HI-MOD DURA ACE",
              "CANYON AEROAD CF SLX 8.0 DI2",
              "GIANT TCR ADVANCED SL 0",
              "Ibis"
            ]
          ]
        }
      },
      "python": {
        "kind": "success",
        "python": [
          "def transform(data):\n    # The \"result\" field is a number in the JSON, but in the SQL it compares to '4:21.558' string.\n    # Let's interpret the comparison: if the original is comparing time strings we must convert 4:21.558 to seconds for numeric comparison.\n\n    def parse_time_str(time_str):\n        \"\"\"Parse a time string like '4:21.558' to seconds.\"\"\"\n        # Format: mm:ss.mmm\n        if \".\" in time_str:\n            minsec, msec = time_str.split(\".\")\n            mins, secs = map(int, minsec.split(\":\"))\n            msec = int(msec)\n            return mins * 60 + secs + msec / (10 ** len(msec.__str__()))\n        else:\n            mins, secs = map(int, time_str.split(\":\"))\n            return mins * 60 + secs\n\n    time_threshold = parse_time_str('4:21.558')\n\n    result = set()\n    for cyclist in data.get('cyclists', []):\n        if cyclist['result'] < time_threshold:\n            for ob in cyclist.get('owned_bikes', []):\n                bike = ob.get('bike', {})\n                name = bike.get('product_name')\n                if name:\n                    result.add(name)\n    return sorted(result)"
        ],
        "candidates": {
          "def transform(data):\n    # The \"result\" field is a number in the JSON, but in the SQL it compares to '4:21.558' string.\n    # Let's interpret the comparison: if the original is comparing time strings we must convert 4:21.558 to seconds for numeric comparison.\n\n    def parse_time_str(time_str):\n        \"\"\"Parse a time string like '4:21.558' to seconds.\"\"\"\n        # Format: mm:ss.mmm\n        if \".\" in time_str:\n            minsec, msec = time_str.split(\".\")\n            mins, secs = map(int, minsec.split(\":\"))\n            msec = int(msec)\n            return mins * 60 + secs + msec / (10 ** len(msec.__str__()))\n        else:\n            mins, secs = map(int, time_str.split(\":\"))\n            return mins * 60 + secs\n\n    time_threshold = parse_time_str('4:21.558')\n\n    result = set()\n    for cyclist in data.get('cyclists', []):\n        if cyclist['result'] < time_threshold:\n            for ob in cyclist.get('owned_bikes', []):\n                bike = ob.get('bike', {})\n                name = bike.get('product_name')\n                if name:\n                    result.add(name)\n    return sorted(result)": [
            "CANNONDALE SUPERSIX EVO HI-MOD DURA ACE",
            "CANYON AEROAD CF SLX 8.0 DI2",
            "GIANT TCR ADVANCED SL 0",
            "Ibis"
          ]
        }
      }
    }
  },
  {
    "db_id": "bike_racing",
    "question": "List the name and price of the bike that is owned by both the cyclists named 'Bradley Wiggins' and the cyclist named 'Antonio Tauler'.",
    "query": "SELECT T3.product_name ,  T3.price FROM cyclist AS T1 JOIN cyclists_own_bikes AS T2 ON T1.id  =  T2.cyclist_id JOIN bike AS T3 ON T2.bike_id  =  T3.id WHERE T1.name  =  'Bradley Wiggins' INTERSECT SELECT T3.product_name ,  T3.price FROM cyclist AS T1 JOIN cyclists_own_bikes AS T2 ON T1.id  =  T2.cyclist_id JOIN bike AS T3 ON T2.bike_id  =  T3.id WHERE T1.name  =  'Antonio Tauler'",
    "query_output": null,
    "converted": {
      "jq": {
        "kind": "success",
        "jq": [
          "# Get the list of bikes owned by 'Bradley Wiggins'\ndef bikes_by(name): .cyclists[] | select(.name == name) | .owned_bikes[].bike | {product_name, price};\n\n# Set of bike objects ({product_name, price}) for each cyclist\n[bikes_by(\"Bradley Wiggins\")] as $bw\n| [bikes_by(\"Antonio Tauler\")] as $at\n| ($bw | map(. as $b | select($at | index($b))))\n| unique\n| if length == 0 then null else . end"
        ],
        "candidates": {
          "# Get the list of bikes owned by 'Bradley Wiggins'\ndef bikes_by(name): .cyclists[] | select(.name == name) | .owned_bikes[].bike | {product_name, price};\n\n# Set of bike objects ({product_name, price}) for each cyclist\n[bikes_by(\"Bradley Wiggins\")] as $bw\n| [bikes_by(\"Antonio Tauler\")] as $at\n| ($bw | map(. as $b | select($at | index($b))))\n| unique\n| if length == 0 then null else . end": [
            null
          ]
        }
      }
    }
  },
  {
    "db_id": "bike_racing",
    "question": "Show the name, nation and result for the cyclists who did not purchase any racing bike.",
    "query": "SELECT name ,  nation ,  RESULT FROM cyclist EXCEPT SELECT T1.name ,  T1.nation ,  T1.result FROM cyclist AS T1 JOIN cyclists_own_bikes AS T2 ON T1.id  =  T2.cyclist_id",
    "query_output": {
      "name": "Taylor Phinney",
      "nation": "United States",
      "result": "4:26.644"
    },
    "converted": {
      "jq": {
        "kind": "failure",
        "jq": [],
        "candidates": {
          ".cyclists\n| map(select((.owned_bikes | length) == 0))\n| map({name, nation, result})": [
            [
              {
                "name": "Taylor Phinney",
                "nation": "United States",
                "result": 266.644
              }
            ]
          ],
          ".cyclists\n| map(select((.owned_bikes | length) == 0))\n| map({name, nation, result: (.result|tostring)})\n| .[]": [
            {
              "name": "Taylor Phinney",
              "nation": "United States",
              "result": "266.644"
            }
          ],
          ".cyclists\n| map(select((.owned_bikes | length) == 0))\n| map({name, nation, result})\n| .[]": [
            {
              "name": "Taylor Phinney",
              "nation": "United States",
              "result": 266.644
            }
          ],
          ".cyclists\n| map(select((.owned_bikes | length) == 0))\n| .[]\n| {name, nation, result}": [
            {
              "name": "Taylor Phinney",
              "nation": "United States",
              "result": 266.644
            }
          ],
          ".cyclists\n| map(select((.owned_bikes | length) == 0))\n| .[]\n| {name, nation, result: (if (.result|tostring|test(\":\")) then .result else (.result/60 | floor|tostring) + \":\" + ((.result%60)|tostring) end)}": [
            {
              "name": "Taylor Phinney",
              "nation": "United States",
              "result": "4:26"
            }
          ]
        }
      },
      "python": {
        "kind": "success",
        "python": [
          "def transform(data):\n    # Find the cyclist with no bikes, and convert their 'result' (a float) to the correct string format (m:ss.xxx)\n    for c in data[\"cyclists\"]:\n        if not c[\"owned_bikes\"]:\n            # The 'result' field is a float representing a time; e.g., 266.644 -> '4:26.644'\n            total_seconds = c[\"result\"]\n            minutes = int(total_seconds // 60)\n            seconds = total_seconds - minutes * 60\n            # Format as m:ss.xxx with leading zeros as needed\n            result_str = f\"{minutes}:{seconds:06.3f}\"\n            return {\n                \"name\": c[\"name\"],\n                \"nation\": c[\"nation\"],\n                \"result\": result_str\n            }\n    return None"
        ],
        "candidates": {
          "def transform(data):\n    # We need: name, nation, result for cyclists who did NOT purchase any racing bike\n    # Let's assume \"racing bike\" refers to all bikes (since we have no further filter), so \"owned_bikes\" length must be 0.\n    # The SQL EXCEPT selects those with no corresponding record in cyclists_own_bikes (i.e., owned_bikes is empty).\n    for c in data[\"cyclists\"]:\n        if not c[\"owned_bikes\"]:\n            # In the expected output, result is a string, not a float\n            return {\n                \"name\": c[\"name\"],\n                \"nation\": c[\"nation\"],\n                \"result\": str(c[\"result\"])\n            }\n    return None": {
            "name": "Taylor Phinney",
            "nation": "United States",
            "result": "266.644"
          },
          "def transform(data):\n    # Find the cyclist with no bikes, and convert their 'result' (a float) to the correct string format (m:ss.xxx)\n    for c in data[\"cyclists\"]:\n        if not c[\"owned_bikes\"]:\n            # The 'result' field is a float representing a time; e.g., 266.644 -> '4:26.644'\n            total_seconds = c[\"result\"]\n            minutes = int(total_seconds // 60)\n            seconds = total_seconds - minutes * 60\n            # Format as m:ss.xxx with leading zeros as needed\n            result_str = f\"{minutes}:{seconds:06.3f}\"\n            return {\n                \"name\": c[\"name\"],\n                \"nation\": c[\"nation\"],\n                \"result\": result_str\n            }\n    return None": {
            "name": "Taylor Phinney",
            "nation": "United States",
            "result": "4:26.644"
          }
        }
      }
    }
  },
  {
    "db_id": "bike_racing",
    "question": "What are the names of the bikes that have substring 'fiber' in their material?",
    "query": "SELECT product_name FROM bike WHERE material LIKE \"%fiber%\"",
    "query_output": [
      "CANNONDALE SUPERSIX EVO HI-MOD DURA ACE",
      "CANYON AEROAD CF SLX 8.0 DI2",
      "Ibis ||"
    ],
    "converted": {
      "jq": {
        "kind": "failure",
        "jq": [],
        "candidates": {
          "[\n  .cyclists[]\n  | .owned_bikes[]\n  | .bike\n  | select(.material | test(\"fiber\"; \"i\"))\n  | .product_name\n]\n| unique": [
            [
              "CANNONDALE SUPERSIX EVO HI-MOD DURA ACE",
              "CANYON AEROAD CF SLX 8.0 DI2"
            ]
          ],
          "[\n  .cyclists[]\n  | .owned_bikes[]\n  | .bike\n]\n| unique_by(.id)\n| map(select(.material | test(\"fiber\"; \"i\")))\n| map(.product_name)": [
            [
              "CANNONDALE SUPERSIX EVO HI-MOD DURA ACE",
              "CANYON AEROAD CF SLX 8.0 DI2"
            ]
          ],
          ".cyclists\n| map(.owned_bikes | map(.bike))\n| add\n| map(select(.material | test(\"fiber\"; \"i\")))\n| map(.product_name)": [
            [
              "CANNONDALE SUPERSIX EVO HI-MOD DURA ACE",
              "CANYON AEROAD CF SLX 8.0 DI2",
              "CANYON AEROAD CF SLX 8.0 DI2",
              "CANNONDALE SUPERSIX EVO HI-MOD DURA ACE",
              "CANYON AEROAD CF SLX 8.0 DI2"
            ]
          ]
        }
      },
      "python": {
        "kind": "failure",
        "python": [],
        "candidates": {
          "def transform(data):\n    bikes = set()\n    for cyclist in data.get(\"cyclists\", []):\n        for owned in cyclist.get(\"owned_bikes\", []):\n            bike = owned.get(\"bike\", {})\n            material = bike.get(\"material\", \"\")\n            if \"fiber\" in material:\n                product_name = bike.get(\"product_name\")\n                if product_name:\n                    bikes.add(product_name)\n    return list(bikes)": [
            "CANYON AEROAD CF SLX 8.0 DI2",
            "CANNONDALE SUPERSIX EVO HI-MOD DURA ACE"
          ],
          "def transform(data):\n    seen = set()\n    result = []\n    for cyclist in data.get(\"cyclists\", []):\n        for owned in cyclist.get(\"owned_bikes\", []):\n            bike = owned[\"bike\"]\n            material = bike.get(\"material\", \"\")\n            if \"fiber\" in material:\n                product_name = bike.get(\"product_name\")\n                if product_name and product_name not in seen:\n                    result.append(product_name)\n                    seen.add(product_name)\n    return result": [
            "CANNONDALE SUPERSIX EVO HI-MOD DURA ACE",
            "CANYON AEROAD CF SLX 8.0 DI2"
          ],
          "def transform(data):\n    # Get all product_name of bikes where the material contains 'fiber', preserving first occurrence order\n    seen_ids = set()\n    result = []\n    for cyclist in data.get(\"cyclists\", []):\n        for owned in cyclist.get(\"owned_bikes\", []):\n            bike = owned.get(\"bike\", {})\n            if bike.get(\"id\") in seen_ids:\n                continue\n            material = bike.get(\"material\", \"\")\n            if \"fiber\" in material:\n                result.append(bike.get(\"product_name\"))\n                seen_ids.add(bike.get(\"id\"))\n    return result": [
            "CANNONDALE SUPERSIX EVO HI-MOD DURA ACE",
            "CANYON AEROAD CF SLX 8.0 DI2"
          ],
          "def transform(data):\n    # Collect all unique bikes, keyed by their ID, only if 'fiber' in their material\n    bike_by_id = {}\n    for cyclist in data.get(\"cyclists\", []):\n        for owned in cyclist.get(\"owned_bikes\", []):\n            bike = owned.get(\"bike\", {})\n            bike_id = bike.get(\"id\")\n            material = bike.get(\"material\", \"\")\n            if bike_id is not None and \"fiber\" in material:\n                # First occurrence in dataset\n                if bike_id not in bike_by_id:\n                    bike_by_id[bike_id] = bike.get(\"product_name\")\n    # Sort by bike id (SQL SELECT over base table has no ORDER BY, so ascending id default)\n    return [bike_by_id[k] for k in sorted(bike_by_id.keys())]": [
            "CANNONDALE SUPERSIX EVO HI-MOD DURA ACE",
            "CANYON AEROAD CF SLX 8.0 DI2"
          ]
        }
      }
    }
  },
  {
    "db_id": "bike_racing",
    "question": "How many bikes does each cyclist own? Order by cyclist id.",
    "query": "SELECT cyclist_id ,  count(*) FROM cyclists_own_bikes GROUP BY cyclist_id ORDER BY cyclist_id",
    "query_output": [
      {
        "cyclist_id": 1,
        "count(*)": 2
      },
      {
        "cyclist_id": 2,
        "count(*)": 3
      },
      {
        "cyclist_id": 3,
        "count(*)": 1
      },
      {
        "cyclist_id": 4,
        "count(*)": 1
      },
      {
        "cyclist_id": 5,
        "count(*)": 1
      },
      {
        "cyclist_id": 6,
        "count(*)": 1
      },
      {
        "cyclist_id": 7,
        "count(*)": 5
      }
    ],
    "converted": {
      "jq": {
        "kind": "failure",
        "jq": [],
        "candidates": {
          ".cyclists \n| sort_by(.id) \n| map({cyclist_id: .id, \"count(*)\": (.owned_bikes | length)})": [
            [
              {
                "cyclist_id": 1,
                "count(*)": 2
              },
              {
                "cyclist_id": 2,
                "count(*)": 3
              },
              {
                "cyclist_id": 3,
                "count(*)": 1
              },
              {
                "cyclist_id": 4,
                "count(*)": 1
              },
              {
                "cyclist_id": 5,
                "count(*)": 1
              },
              {
                "cyclist_id": 6,
                "count(*)": 1
              },
              {
                "cyclist_id": 7,
                "count(*)": 5
              },
              {
                "cyclist_id": 8,
                "count(*)": 0
              }
            ]
          ],
          ".cyclists \n| sort_by(.id) \n| map({cyclist_id: .id, \"count(*)\": (.owned_bikes | length)})\n| .": [
            [
              {
                "cyclist_id": 1,
                "count(*)": 2
              },
              {
                "cyclist_id": 2,
                "count(*)": 3
              },
              {
                "cyclist_id": 3,
                "count(*)": 1
              },
              {
                "cyclist_id": 4,
                "count(*)": 1
              },
              {
                "cyclist_id": 5,
                "count(*)": 1
              },
              {
                "cyclist_id": 6,
                "count(*)": 1
              },
              {
                "cyclist_id": 7,
                "count(*)": 5
              },
              {
                "cyclist_id": 8,
                "count(*)": 0
              }
            ]
          ],
          ".cyclists \n| sort_by(.id) \n| map({cyclist_id: .id, \"count(*)\": (.owned_bikes | length)}) \n| flatten": [
            [
              {
                "cyclist_id": 1,
                "count(*)": 2
              },
              {
                "cyclist_id": 2,
                "count(*)": 3
              },
              {
                "cyclist_id": 3,
                "count(*)": 1
              },
              {
                "cyclist_id": 4,
                "count(*)": 1
              },
              {
                "cyclist_id": 5,
                "count(*)": 1
              },
              {
                "cyclist_id": 6,
                "count(*)": 1
              },
              {
                "cyclist_id": 7,
                "count(*)": 5
              },
              {
                "cyclist_id": 8,
                "count(*)": 0
              }
            ]
          ],
          ".cyclists \n| sort_by(.id) \n| map({cyclist_id: .id, \"count(*)\": (.owned_bikes | length)}) \n| .[]": [
            {
              "cyclist_id": 1,
              "count(*)": 2
            },
            {
              "cyclist_id": 2,
              "count(*)": 3
            },
            {
              "cyclist_id": 3,
              "count(*)": 1
            },
            {
              "cyclist_id": 4,
              "count(*)": 1
            },
            {
              "cyclist_id": 5,
              "count(*)": 1
            },
            {
              "cyclist_id": 6,
              "count(*)": 1
            },
            {
              "cyclist_id": 7,
              "count(*)": 5
            },
            {
              "cyclist_id": 8,
              "count(*)": 0
            }
          ],
          ".cyclists \n| sort_by(.id) \n| map({cyclist_id: .id, \"count(*)\": (.owned_bikes | length)})\n| first": [
            {
              "cyclist_id": 1,
              "count(*)": 2
            }
          ],
          ".cyclists\n| sort_by(.id)\n| map({cyclist_id: .id, \"count(*)\": (.owned_bikes | length)})\n| .\n| select(type == \"array\")\n| .": [
            [
              {
                "cyclist_id": 1,
                "count(*)": 2
              },
              {
                "cyclist_id": 2,
                "count(*)": 3
              },
              {
                "cyclist_id": 3,
                "count(*)": 1
              },
              {
                "cyclist_id": 4,
                "count(*)": 1
              },
              {
                "cyclist_id": 5,
                "count(*)": 1
              },
              {
                "cyclist_id": 6,
                "count(*)": 1
              },
              {
                "cyclist_id": 7,
                "count(*)": 5
              },
              {
                "cyclist_id": 8,
                "count(*)": 0
              }
            ]
          ],
          ".cyclists\n| sort_by(.id)\n| map({cyclist_id: .id, \"count(*)\": (.owned_bikes | length)})\n| . as $arr\n| $arr[]": [
            {
              "cyclist_id": 1,
              "count(*)": 2
            },
            {
              "cyclist_id": 2,
              "count(*)": 3
            },
            {
              "cyclist_id": 3,
              "count(*)": 1
            },
            {
              "cyclist_id": 4,
              "count(*)": 1
            },
            {
              "cyclist_id": 5,
              "count(*)": 1
            },
            {
              "cyclist_id": 6,
              "count(*)": 1
            },
            {
              "cyclist_id": 7,
              "count(*)": 5
            },
            {
              "cyclist_id": 8,
              "count(*)": 0
            }
          ],
          ".cyclists\n| sort_by(.id)\n| map({cyclist_id: .id, \"count(*)\": (.owned_bikes | length)})\n| .": [
            [
              {
                "cyclist_id": 1,
                "count(*)": 2
              },
              {
                "cyclist_id": 2,
                "count(*)": 3
              },
              {
                "cyclist_id": 3,
                "count(*)": 1
              },
              {
                "cyclist_id": 4,
                "count(*)": 1
              },
              {
                "cyclist_id": 5,
                "count(*)": 1
              },
              {
                "cyclist_id": 6,
                "count(*)": 1
              },
              {
                "cyclist_id": 7,
                "count(*)": 5
              },
              {
                "cyclist_id": 8,
                "count(*)": 0
              }
            ]
          ]
        }
      },
      "python": {
        "kind": "failure",
        "python": [],
        "candidates": {
          "def transform(data):\n    cyclists = data.get(\"cyclists\", [])\n    result = []\n    for cyclist in cyclists:\n        cyclist_id = cyclist[\"id\"]\n        bike_count = len(cyclist.get(\"owned_bikes\", []))\n        result.append({\"cyclist_id\": cyclist_id, \"count(*)\": bike_count})\n    # Order by cyclist_id\n    result.sort(key=lambda x: x[\"cyclist_id\"])\n    return result": [
            {
              "cyclist_id": 1,
              "count(*)": 2
            },
            {
              "cyclist_id": 2,
              "count(*)": 3
            },
            {
              "cyclist_id": 3,
              "count(*)": 1
            },
            {
              "cyclist_id": 4,
              "count(*)": 1
            },
            {
              "cyclist_id": 5,
              "count(*)": 1
            },
            {
              "cyclist_id": 6,
              "count(*)": 1
            },
            {
              "cyclist_id": 7,
              "count(*)": 5
            },
            {
              "cyclist_id": 8,
              "count(*)": 0
            }
          ],
          "def transform(data):\n    # Each cyclist record: id, owned_bikes\n    return [\n        {\"cyclist_id\": cyclist[\"id\"], \"count(*)\": len(cyclist[\"owned_bikes\"])}\n        for cyclist in sorted(data[\"cyclists\"], key=lambda x: x[\"id\"])\n    ]": [
            {
              "cyclist_id": 1,
              "count(*)": 2
            },
            {
              "cyclist_id": 2,
              "count(*)": 3
            },
            {
              "cyclist_id": 3,
              "count(*)": 1
            },
            {
              "cyclist_id": 4,
              "count(*)": 1
            },
            {
              "cyclist_id": 5,
              "count(*)": 1
            },
            {
              "cyclist_id": 6,
              "count(*)": 1
            },
            {
              "cyclist_id": 7,
              "count(*)": 5
            },
            {
              "cyclist_id": 8,
              "count(*)": 0
            }
          ],
          "def transform(data):\n    # SQL expects: cyclist_id, count(*), ordered by cyclist_id\n    answer = []\n    for cyclist in data[\"cyclists\"]:\n        answer.append({\n            \"cyclist_id\": cyclist[\"id\"],\n            \"count(*)\": len(cyclist[\"owned_bikes\"])\n        })\n    # Order by cyclist_id\n    answer.sort(key=lambda x: x[\"cyclist_id\"])\n    return answer": [
            {
              "cyclist_id": 1,
              "count(*)": 2
            },
            {
              "cyclist_id": 2,
              "count(*)": 3
            },
            {
              "cyclist_id": 3,
              "count(*)": 1
            },
            {
              "cyclist_id": 4,
              "count(*)": 1
            },
            {
              "cyclist_id": 5,
              "count(*)": 1
            },
            {
              "cyclist_id": 6,
              "count(*)": 1
            },
            {
              "cyclist_id": 7,
              "count(*)": 5
            },
            {
              "cyclist_id": 8,
              "count(*)": 0
            }
          ],
          "def transform(data):\n    # The error suggests the output is actually correct, but let's exactly match the output formatting,\n    # possibly by ensuring we aren't returning extra data or different types.\n    cyclists_counts = []\n    for cyclist in data[\"cyclists\"]:\n        obj = {\n            \"cyclist_id\": int(cyclist[\"id\"]),\n            \"count(*)\": int(len(cyclist[\"owned_bikes\"]))\n        }\n        cyclists_counts.append(obj)\n    cyclists_counts.sort(key=lambda x: x[\"cyclist_id\"])\n    return cyclists_counts": [
            {
              "cyclist_id": 1,
              "count(*)": 2
            },
            {
              "cyclist_id": 2,
              "count(*)": 3
            },
            {
              "cyclist_id": 3,
              "count(*)": 1
            },
            {
              "cyclist_id": 4,
              "count(*)": 1
            },
            {
              "cyclist_id": 5,
              "count(*)": 1
            },
            {
              "cyclist_id": 6,
              "count(*)": 1
            },
            {
              "cyclist_id": 7,
              "count(*)": 5
            },
            {
              "cyclist_id": 8,
              "count(*)": 0
            }
          ]
        }
      }
    }
  }
]