[
  {
    "question_id": 1471,
    "db_id": "debit_card_specializing",
    "question": "What is the ratio of customers who pay in EUR against customers who pay in CZK?",
    "evidence": "ratio of customers who pay in EUR against customers who pay in CZK = count(Currency = 'EUR') / count(Currency = 'CZK').",
    "SQL": "SELECT  CAST(SUM(CASE WHEN `Currency` = 'EUR' THEN 1 ELSE 0 END) AS DOUBLE) / SUM(CASE WHEN `Currency` = 'CZK' THEN 1 ELSE 0 END) FROM `customers`",
    "difficulty": "simple"
  },
  {
    "question_id": 1472,
    "db_id": "debit_card_specializing",
    "question": "In 2012, who had the least consumption in LAM?",
    "evidence": "Year 2012 can be presented as Between 201201 And 201212; The first 4 strings of the Date values in the yearmonth table can represent year.",
    "SQL": "SELECT\n  `T1`.`CustomerID`\nFROM `customers` AS `T1`\nINNER JOIN `yearmonth` AS `T2`\n  ON `T1`.`CustomerID` = `T2`.`CustomerID`\nWHERE\n  `T1`.`Segment` = 'LAM' AND SUBSTR(`T2`.`Date`, 1, 4) = '2012'\nGROUP BY\n  `T1`.`CustomerID`\nORDER BY\n  SUM(`T2`.`Consumption`) ASC\nLIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 1473,
    "db_id": "debit_card_specializing",
    "question": "What was the average monthly consumption of customers in SME for the year 2013?",
    "evidence": "Average Monthly consumption = AVG(Consumption) / 12; Year 2013 can be presented as Between 201301 And 201312; The first 4 strings of the Date values in the yearmonth table can represent year.",
    "SQL": "SELECT\n  AVG(`T2`.`Consumption`) / 12\nFROM `customers` AS `T1`\nINNER JOIN `yearmonth` AS `T2`\n  ON `T1`.`CustomerID` = `T2`.`CustomerID`\nWHERE\n  SUBSTR(`T2`.`Date`, 1, 4) = '2013' AND `T1`.`Segment` = 'SME'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1476,
    "db_id": "debit_card_specializing",
    "question": "What was the difference in gas consumption between CZK-paying customers and EUR-paying customers in 2012?",
    "evidence": "Year 2012 can be presented as Between 201201 And 201212; The first 4 strings of the Date values in the yearmonth table can represent year; Difference in Consumption = CZK customers consumption in 2012 - EUR customers consumption in 2012",
    "SQL": "SELECT\n  SUM(CASE WHEN `T1`.`Currency` = 'CZK' THEN `T2`.`Consumption` ELSE 0 END) - SUM(CASE WHEN `T1`.`Currency` = 'EUR' THEN `T2`.`Consumption` ELSE 0 END)\nFROM `customers` AS `T1`\nINNER JOIN `yearmonth` AS `T2`\n  ON `T1`.`CustomerID` = `T2`.`CustomerID`\nWHERE\n  SUBSTR(`T2`.`Date`, 1, 4) = '2012'",
    "difficulty": "challenging"
  },
  {
    "question_id": 1479,
    "db_id": "debit_card_specializing",
    "question": "Which year recorded the most consumption of gas paid in CZK?",
    "evidence": "The first 4 strings of the Date values in the yearmonth table can represent year.",
    "SQL": "SELECT\n  SUBSTR(`T2`.`Date`, 1, 4)\nFROM `customers` AS `T1`\nINNER JOIN `yearmonth` AS `T2`\n  ON `T1`.`CustomerID` = `T2`.`CustomerID`\nWHERE\n  `T1`.`Currency` = 'CZK'\nGROUP BY\n  SUBSTR(`T2`.`Date`, 1, 4)\nORDER BY\n  SUM(`T2`.`Consumption`) DESC\nLIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 1480,
    "db_id": "debit_card_specializing",
    "question": "What was the gas consumption peak month for SME customers in 2013?",
    "evidence": "Year 2013 can be presented as Between 201301 And 201312; The first 4 strings of the Date values in the yearmonth table can represent year; The 5th and 6th string of the date can refer to month.",
    "SQL": "SELECT\n  SUBSTR(`T2`.`Date`, 5, 2)\nFROM `customers` AS `T1`\nINNER JOIN `yearmonth` AS `T2`\n  ON `T1`.`CustomerID` = `T2`.`CustomerID`\nWHERE\n  SUBSTR(`T2`.`Date`, 1, 4) = '2013' AND `T1`.`Segment` = 'SME'\nGROUP BY\n  SUBSTR(`T2`.`Date`, 5, 2)\nORDER BY\n  SUM(`T2`.`Consumption`) DESC\nLIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 1481,
    "db_id": "debit_card_specializing",
    "question": "What is the difference in the annual average consumption of the customers with the least amount of consumption paid in CZK for 2013 between SME and LAM, LAM and KAM, and KAM and SME?",
    "evidence": "annual average consumption of customer with the lowest consumption in each segment = total consumption per year / the number of customer with lowest consumption in each segment; Difference in annual average = SME's annual average - LAM's annual average; Difference in annual average = LAM's annual average - KAM's annual average; Year 2013 can be presented as Between 201301 And 201312; The first 4 strings of the Date values in the yearmonth table can represent year.",
    "SQL": "SELECT\n  CAST(SUM(CASE WHEN `T1`.`Segment` = 'SME' THEN `T2`.`Consumption` ELSE 0 END) AS DOUBLE) / COUNT(`T1`.`CustomerID`) - CAST(SUM(CASE WHEN `T1`.`Segment` = 'LAM' THEN `T2`.`Consumption` ELSE 0 END) AS DOUBLE) / COUNT(`T1`.`CustomerID`),\n  CAST(SUM(CASE WHEN `T1`.`Segment` = 'LAM' THEN `T2`.`Consumption` ELSE 0 END) AS DOUBLE) / COUNT(`T1`.`CustomerID`) - CAST(SUM(CASE WHEN `T1`.`Segment` = 'KAM' THEN `T2`.`Consumption` ELSE 0 END) AS DOUBLE) / COUNT(`T1`.`CustomerID`),\n  CAST(SUM(CASE WHEN `T1`.`Segment` = 'KAM' THEN `T2`.`Consumption` ELSE 0 END) AS DOUBLE) / COUNT(`T1`.`CustomerID`) - CAST(SUM(CASE WHEN `T1`.`Segment` = 'SME' THEN `T2`.`Consumption` ELSE 0 END) AS DOUBLE) / COUNT(`T1`.`CustomerID`)\nFROM `customers` AS `T1`\nINNER JOIN `yearmonth` AS `T2`\n  ON `T1`.`CustomerID` = `T2`.`CustomerID`\nWHERE\n  `T1`.`Currency` = 'CZK'\n  AND `T2`.`Consumption` = (\n    SELECT\n      MIN(`Consumption`)\n    FROM `yearmonth`\n  )\n  AND `T2`.`Date` BETWEEN 201301 AND 201312",
    "difficulty": "challenging"
  },
  {
    "question_id": 1482,
    "db_id": "debit_card_specializing",
    "question": "Which of the three segments\u2014SME, LAM and KAM\u2014has the biggest and lowest percentage increases in consumption paid in EUR between 2012 and 2013?",
    "evidence": "Increase or Decrease = consumption for 2013 - consumption for 2012; Percentage of Increase = (Increase or Decrease / consumption for 2013) * 100%; The first 4 strings of the Date values in the yearmonth table can represent year",
    "SQL": "SELECT\n  CAST((\n    SUM(\n      CASE\n        WHEN `T1`.`Segment` = 'SME' AND `T2`.`Date` LIKE '2013%'\n        THEN `T2`.`Consumption`\n        ELSE 0\n      END\n    ) - SUM(\n      CASE\n        WHEN `T1`.`Segment` = 'SME' AND `T2`.`Date` LIKE '2012%'\n        THEN `T2`.`Consumption`\n        ELSE 0\n      END\n    )\n  ) AS DOUBLE) * 100 / SUM(\n    CASE\n      WHEN `T1`.`Segment` = 'SME' AND `T2`.`Date` LIKE '2012%'\n      THEN `T2`.`Consumption`\n      ELSE 0\n    END\n  ),\n  CAST(SUM(\n    CASE\n      WHEN `T1`.`Segment` = 'LAM' AND `T2`.`Date` LIKE '2013%'\n      THEN `T2`.`Consumption`\n      ELSE 0\n    END\n  ) - SUM(\n    CASE\n      WHEN `T1`.`Segment` = 'LAM' AND `T2`.`Date` LIKE '2012%'\n      THEN `T2`.`Consumption`\n      ELSE 0\n    END\n  ) AS DOUBLE) * 100 / SUM(\n    CASE\n      WHEN `T1`.`Segment` = 'LAM' AND `T2`.`Date` LIKE '2012%'\n      THEN `T2`.`Consumption`\n      ELSE 0\n    END\n  ),\n  CAST(SUM(\n    CASE\n      WHEN `T1`.`Segment` = 'KAM' AND `T2`.`Date` LIKE '2013%'\n      THEN `T2`.`Consumption`\n      ELSE 0\n    END\n  ) - SUM(\n    CASE\n      WHEN `T1`.`Segment` = 'KAM' AND `T2`.`Date` LIKE '2012%'\n      THEN `T2`.`Consumption`\n      ELSE 0\n    END\n  ) AS DOUBLE) * 100 / SUM(\n    CASE\n      WHEN `T1`.`Segment` = 'KAM' AND `T2`.`Date` LIKE '2012%'\n      THEN `T2`.`Consumption`\n      ELSE 0\n    END\n  )\nFROM `customers` AS `T1`\nINNER JOIN `yearmonth` AS `T2`\n  ON `T1`.`CustomerID` = `T2`.`CustomerID`",
    "difficulty": "challenging"
  },
  {
    "question_id": 1483,
    "db_id": "debit_card_specializing",
    "question": "How much did customer 6 consume in total between August and November 2013?",
    "evidence": "Between August And November 2013 refers to Between 201308 And 201311; The first 4 strings of the Date values in the yearmonth table can represent year; The 5th and 6th string of the date can refer to month.",
    "SQL": "SELECT\n  SUM(`Consumption`)\nFROM `yearmonth`\nWHERE\n  `CustomerID` = 6 AND `Date` BETWEEN '201308' AND '201311'",
    "difficulty": "simple"
  },
  {
    "question_id": 1484,
    "db_id": "debit_card_specializing",
    "question": "How many more \"discount\" gas stations does the Czech Republic have compared to Slovakia?",
    "evidence": "Czech Republic can be represented as the Country value in gasstations table is 'CZE'; Slovakia can be represented as the Country value in the gasstations table is 'SVK';  Computation of more \"discount\" gas stations= Total no. of discount gas stations in Czech Republic - Total no. of discount gas stations in Slovakia",
    "SQL": "SELECT\n  SUM(CASE WHEN `Country` = 'CZE' THEN 1 ELSE 0 END) - SUM(CASE WHEN `Country` = 'SVK' THEN 1 ELSE 0 END)\nFROM `gasstations`\nWHERE\n  `Segment` = 'Discount'",
    "difficulty": "simple"
  },
  {
    "question_id": 1486,
    "db_id": "debit_card_specializing",
    "question": "Is it true that more SMEs pay in Czech koruna than in euros? If so, how many more?",
    "evidence": "Amount of more SMEs = Total of SMEs pay using Currency CZK - Total of SMEs pay using Currency EUR",
    "SQL": "SELECT\n  SUM(`Currency` = 'CZK') - SUM(`Currency` = 'EUR')\nFROM `customers`\nWHERE\n  `Segment` = 'SME'",
    "difficulty": "simple"
  },
  {
    "question_id": 1490,
    "db_id": "debit_card_specializing",
    "question": "How many percent of LAM customer consumed more than 46.73?",
    "evidence": "Percentage of LAM customer consumed more than 46.73 = (Total no. of LAM customers who consumed more than 46.73 / Total no. of LAM customers) * 100.",
    "SQL": "SELECT\n  CAST(SUM(CASE WHEN `T2`.`Consumption` > 46.73 THEN 1 ELSE 0 END) AS DOUBLE) * 100 / COUNT(`T1`.`CustomerID`)\nFROM `customers` AS `T1`\nINNER JOIN `yearmonth` AS `T2`\n  ON `T1`.`CustomerID` = `T2`.`CustomerID`\nWHERE\n  `T1`.`Segment` = 'LAM'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1493,
    "db_id": "debit_card_specializing",
    "question": "In February 2012, what percentage of customers consumed more than 528.3?",
    "evidence": "February 2012 refers to '201202' in yearmonth.date; The first 4 strings of the Date values in the yearmonth table can represent year; The 5th and 6th string of the date can refer to month.",
    "SQL": "SELECT\n  CAST(SUM(CASE WHEN `Consumption` > 528.3 THEN 1 ELSE 0 END) AS DOUBLE) * 100 / COUNT(`CustomerID`)\nFROM `yearmonth`\nWHERE\n  `Date` = '201202'",
    "difficulty": "simple"
  },
  {
    "question_id": 1498,
    "db_id": "debit_card_specializing",
    "question": "What is the highest monthly consumption in the year 2012?",
    "evidence": "The first 4 strings of the Date values in the yearmonth table can represent year; The 5th and 6th string of the date can refer to month.",
    "SQL": "SELECT\n  SUM(`Consumption`)\nFROM `yearmonth`\nWHERE\n  SUBSTR(`Date`, 1, 4) = '2012'\nGROUP BY\n  SUBSTR(`Date`, 5, 2)\nORDER BY\n  SUM(`Consumption`) DESC\nLIMIT 1",
    "difficulty": "simple"
  },
  {
    "question_id": 1500,
    "db_id": "debit_card_specializing",
    "question": "Please list the product description of the products consumed in September, 2013.",
    "evidence": "September 2013 refers to 201309; The first 4 strings of the Date values in the yearmonth table can represent year; The 5th and 6th string of the date can refer to month.",
    "SQL": "SELECT\n  `T3`.`Description`\nFROM `transactions_1k` AS `T1`\nINNER JOIN `yearmonth` AS `T2`\n  ON `T1`.`CustomerID` = `T2`.`CustomerID`\nINNER JOIN `products` AS `T3`\n  ON `T1`.`ProductID` = `T3`.`ProductID`\nWHERE\n  `T2`.`Date` = '201309'",
    "difficulty": "simple"
  },
  {
    "question_id": 1501,
    "db_id": "debit_card_specializing",
    "question": "Please list the countries of the gas stations with transactions taken place in June, 2013.",
    "evidence": "June 2013 refers to '201306'; The first 4 strings of the Date values in the yearmonth table can represent year; The 5th and 6th string of the date can refer to month;",
    "SQL": "SELECT DISTINCT\n  `T2`.`Country`\nFROM `transactions_1k` AS `T1`\nINNER JOIN `gasstations` AS `T2`\n  ON `T1`.`GasStationID` = `T2`.`GasStationID`\nINNER JOIN `yearmonth` AS `T3`\n  ON `T1`.`CustomerID` = `T3`.`CustomerID`\nWHERE\n  `T3`.`Date` = '201306'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1505,
    "db_id": "debit_card_specializing",
    "question": "Among the customers who paid in euro, how many of them have a monthly consumption of over 1000?",
    "evidence": "Pays in euro = Currency = 'EUR'.",
    "SQL": "SELECT\n  COUNT(*)\nFROM `yearmonth` AS `T1`\nINNER JOIN `customers` AS `T2`\n  ON `T1`.`CustomerID` = `T2`.`CustomerID`\nWHERE\n  `T2`.`Currency` = 'EUR' AND `T1`.`Consumption` > 1000.00",
    "difficulty": "simple"
  },
  {
    "question_id": 1506,
    "db_id": "debit_card_specializing",
    "question": "Please list the product descriptions of the transactions taken place in the gas stations in the Czech Republic.",
    "evidence": "Czech Republic can be represented as the Country value in the gasstations table is 'CZE'; ",
    "SQL": "SELECT DISTINCT\n  `T3`.`Description`\nFROM `transactions_1k` AS `T1`\nINNER JOIN `gasstations` AS `T2`\n  ON `T1`.`GasStationID` = `T2`.`GasStationID`\nINNER JOIN `products` AS `T3`\n  ON `T1`.`ProductID` = `T3`.`ProductID`\nWHERE\n  `T2`.`Country` = 'CZE'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1507,
    "db_id": "debit_card_specializing",
    "question": "Please list the disparate time of the transactions taken place in the gas stations from chain no. 11.",
    "evidence": "",
    "SQL": "SELECT DISTINCT\n  `T1`.`Time`\nFROM `transactions_1k` AS `T1`\nINNER JOIN `gasstations` AS `T2`\n  ON `T1`.`GasStationID` = `T2`.`GasStationID`\nWHERE\n  `T2`.`ChainID` = 11",
    "difficulty": "simple"
  },
  {
    "question_id": 1509,
    "db_id": "debit_card_specializing",
    "question": "Among the transactions made in the gas stations in the Czech Republic, how many of them are taken place after 2012/1/1?",
    "evidence": "Czech Republic can be represented as the Country value in the gasstations table is 'CZE'",
    "SQL": "SELECT\n  COUNT(`T1`.`TransactionID`)\nFROM `transactions_1k` AS `T1`\nINNER JOIN `gasstations` AS `T2`\n  ON `T1`.`GasStationID` = `T2`.`GasStationID`\nWHERE\n  `T2`.`Country` = 'CZE'\n  AND DATE_FORMAT(CAST(`T1`.`Date` AS DATETIME), '%Y') >= '2012'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1514,
    "db_id": "debit_card_specializing",
    "question": "What kind of currency did the customer paid at 16:25:00 in 2012/8/24?",
    "evidence": "'2012/8/24' can be represented by '2012-08-24'; ",
    "SQL": "SELECT DISTINCT\n  `T3`.`Currency`\nFROM `transactions_1k` AS `T1`\nINNER JOIN `gasstations` AS `T2`\n  ON `T1`.`GasStationID` = `T2`.`GasStationID`\nINNER JOIN `customers` AS `T3`\n  ON `T1`.`CustomerID` = `T3`.`CustomerID`\nWHERE\n  `T1`.`Date` = '2012-08-24' AND `T1`.`Time` = '16:25:00'",
    "difficulty": "simple"
  },
  {
    "question_id": 1515,
    "db_id": "debit_card_specializing",
    "question": "What segment did the customer have at 2012/8/23 21:20:00?",
    "evidence": "'2012/8/23' can be represented by '2012-08-23'",
    "SQL": "SELECT\n  `T2`.`Segment`\nFROM `transactions_1k` AS `T1`\nINNER JOIN `customers` AS `T2`\n  ON `T1`.`CustomerID` = `T2`.`CustomerID`\nWHERE\n  `T1`.`date` = '2012-08-23' AND `T1`.`time` = '21:20:00'",
    "difficulty": "simple"
  },
  {
    "question_id": 1521,
    "db_id": "debit_card_specializing",
    "question": "For all the transactions happened during 8:00-9:00 in 2012/8/26, how many happened in CZE?",
    "evidence": "Czech Republic can be represented as the Country value in the gasstations table is 'CZE'; '2012/8/26' can be represented by '2012-08-26'; during 8:00-9:00 can be represented as Time BETWEEN '08:00:00' AND '09:00:00'",
    "SQL": "SELECT\n  COUNT(`T1`.`TransactionID`)\nFROM `transactions_1k` AS `T1`\nINNER JOIN `gasstations` AS `T2`\n  ON `T1`.`GasStationID` = `T2`.`GasStationID`\nWHERE\n  `T1`.`Date` = '2012-08-26'\n  AND `T1`.`Time` BETWEEN '08:00:00' AND '09:00:00'\n  AND `T2`.`Country` = 'CZE'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1524,
    "db_id": "debit_card_specializing",
    "question": "What's the nationality of the customer who spent 548.4 in 2012/8/24?",
    "evidence": "'2012/8/24' can be represented by '2012-08-24'",
    "SQL": "SELECT\n  `T2`.`Country`\nFROM `transactions_1k` AS `T1`\nINNER JOIN `gasstations` AS `T2`\n  ON `T1`.`GasStationID` = `T2`.`GasStationID`\nWHERE\n  `T1`.`Date` = '2012-08-24' AND  `T1`.`Price` = 548.4",
    "difficulty": "simple"
  },
  {
    "question_id": 1525,
    "db_id": "debit_card_specializing",
    "question": "What is the percentage of the customers who used EUR in 2012/8/25?",
    "evidence": "'2012/8/25' can be represented by '2012-08-25'",
    "SQL": "SELECT\n  CAST(SUM(CASE WHEN `T2`.`Currency` = 'EUR' THEN 1 ELSE 0 END) AS DOUBLE) * 100 / COUNT(`T1`.`CustomerID`)\nFROM `transactions_1k` AS `T1`\nINNER JOIN `customers` AS `T2`\n  ON `T1`.`CustomerID` = `T2`.`CustomerID`\nWHERE\n  `T1`.`Date` = '2012-08-25'",
    "difficulty": "simple"
  },
  {
    "question_id": 1526,
    "db_id": "debit_card_specializing",
    "question": "For the customer who paid 634.8 in 2012/8/25, what was the consumption decrease rate from Year 2012 to 2013?",
    "evidence": "'2012/8/24' can be represented by '2012-08-24'; Consumption decrease rate = (consumption_2012 - consumption_2013) / consumption_2012",
    "SQL": "SELECT\n  CAST(SUM(CASE WHEN SUBSTR(`Date`, 1, 4) = '2012' THEN `Consumption` ELSE 0 END) - SUM(CASE WHEN SUBSTR(`Date`, 1, 4) = '2013' THEN `Consumption` ELSE 0 END) AS DOUBLE) / SUM(CASE WHEN SUBSTR(`Date`, 1, 4) = '2012' THEN `Consumption` ELSE 0 END)\nFROM `yearmonth`\nWHERE\n  `CustomerID` = (\n    SELECT\n      `T1`.`CustomerID`\n    FROM `transactions_1k` AS `T1`\n    INNER JOIN `gasstations` AS `T2`\n      ON `T1`.`GasStationID` = `T2`.`GasStationID`\n    WHERE\n      `T1`.`Date` = '2012-08-25' AND `T1`.`Price`  = 1513.12   )",
    "difficulty": "challenging"
  },
  {
    "question_id": 1528,
    "db_id": "debit_card_specializing",
    "question": "What is the percentage of \"premium\" against the overall segment in Country = \"SVK\"?",
    "evidence": "",
    "SQL": "SELECT\n  CAST(SUM(CASE WHEN `Country` = 'SVK' AND `Segment` = 'Premium' THEN 1 ELSE 0 END) AS DOUBLE) * 100 / SUM(CASE WHEN `Country` = 'SVK' THEN 1 ELSE 0 END)\nFROM `gasstations`",
    "difficulty": "simple"
  },
  {
    "question_id": 1529,
    "db_id": "debit_card_specializing",
    "question": "What is the amount spent by customer \"38508\" at the gas stations? How much had the customer spent in January 2012?",
    "evidence": "January 2012 refers to the Date value = '201201'",
    "SQL": "SELECT\n  SUM(`T1`.`Price` ),\n  SUM(CASE WHEN `T3`.`Date` = '201201' THEN `T1`.`Price` ELSE 0 END)\nFROM `transactions_1k` AS `T1`\nINNER JOIN `gasstations` AS `T2`\n  ON `T1`.`GasStationID` = `T2`.`GasStationID`\nINNER JOIN `yearmonth` AS `T3`\n  ON `T1`.`CustomerID` = `T3`.`CustomerID`\nWHERE\n  `T1`.`CustomerID` = '38508'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1531,
    "db_id": "debit_card_specializing",
    "question": "Who is the top spending customer and how much is the average price per single item purchased by this customer? What currency was being used?",
    "evidence": "average price per single item = Total(price) / Total(amount)",
    "SQL": "SELECT\n  `T2`.`CustomerID`,\n  SUM(`T2`.`Price` / `T2`.`Amount`),\n  `T1`.`Currency`\nFROM `customers` AS `T1`\nINNER JOIN `transactions_1k` AS `T2`\n  ON `T1`.`CustomerID` = `T2`.`CustomerID`\nWHERE\n  `T2`.`CustomerID` = (\n    SELECT\n      `CustomerID`\n    FROM `yearmonth`\n    ORDER BY\n      `Consumption` DESC\n    LIMIT 1\n  )\nGROUP BY\n  `T2`.`CustomerID`,\n  `T1`.`Currency`",
    "difficulty": "moderate"
  },
  {
    "question_id": 1533,
    "db_id": "debit_card_specializing",
    "question": "For all the people who paid more than 29.00 per unit of product id No.5. Give their consumption status in the August of 2012.",
    "evidence": "August of 2012 refers to the Date value = '201208' ; Price per unit of product = Price / Amount;",
    "SQL": "SELECT\n  `T2`.`Consumption`\nFROM `transactions_1k` AS `T1`\nINNER JOIN `yearmonth` AS `T2`\n  ON `T1`.`CustomerID` = `T2`.`CustomerID`\nWHERE\n  `T1`.`Price` / `T1`.`Amount` > 29.00\n  AND `T1`.`ProductID` = 5\n  AND `T2`.`Date` = '201208'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1312,
    "db_id": "student_club",
    "question": "What's Angela Sanders's major?",
    "evidence": "Angela Sanders is the full name; full name refers to first_name, last_name; major refers to major_name.",
    "SQL": "SELECT\n  `T2`.`major_name`\nFROM `member` AS `T1`\nINNER JOIN `major` AS `T2`\n  ON `T1`.`link_to_major` = `T2`.`major_id`\nWHERE\n  `T1`.`first_name` = 'Angela' AND `T1`.`last_name` = 'Sanders'",
    "difficulty": "simple"
  },
  {
    "question_id": 1317,
    "db_id": "student_club",
    "question": "Among the students from the Student_Club who attended the event \"Women's Soccer\", how many of them want a T-shirt that's in medium size?",
    "evidence": "Women's Soccer is an event name; T-shirt that is in medium size refers to t_shirt_size = 'Medium'",
    "SQL": "SELECT\n  COUNT(`T1`.`event_id`)\nFROM `event` AS `T1`\nINNER JOIN `attendance` AS `T2`\n  ON `T1`.`event_id` = `T2`.`link_to_event`\nINNER JOIN `member` AS `T3`\n  ON `T2`.`link_to_member` = `T3`.`member_id`\nWHERE\n  `T1`.`event_name` = 'Women''s Soccer' AND `T3`.`t_shirt_size` = 'Medium'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1322,
    "db_id": "student_club",
    "question": "Among the events attended by more than 10 members of the Student_Club, how many of them are meetings?",
    "evidence": "meetings events refers to type = 'Meeting'; attended by more than 10 members refers to COUNT(event_id) > 10",
    "SQL": "SELECT\n  `T1`.`event_name`\nFROM `event` AS `T1`\nINNER JOIN `attendance` AS `T2`\n  ON `T1`.`event_id` = `T2`.`link_to_event`\nGROUP BY\n  `T1`.`event_id`\nHAVING\n  COUNT(`T2`.`link_to_event`) > 10\nEXCEPT\nSELECT\n  `T1`.`event_name`\nFROM `event` AS `T1`\nWHERE\n  `T1`.`type` = 'Meeting'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1323,
    "db_id": "student_club",
    "question": "List all the names of events that had an attendance of over 20 students but were not fundraisers.",
    "evidence": "name of events refers to event_name; attendance of over 20 students COUNT(event_id) > 20.",
    "SQL": "SELECT `T1`.`event_name` FROM `event` AS `T1` INNER JOIN `attendance` AS `T2` ON `T1`.`event_id` = `T2`.`link_to_event` GROUP BY `T1`.`event_id` HAVING COUNT(`T2`.`link_to_event`) > 20 AND NOT EXISTS (SELECT 1 FROM `event` AS `E` WHERE `E`.`event_id` = `T1`.`event_id` AND `E`.`type` = 'Fundraiser')",
    "difficulty": "moderate"
  },
  {
    "question_id": 1331,
    "db_id": "student_club",
    "question": "What is the amount of the funds that the Vice President received?",
    "evidence": "'Vice President' is a position of Student Club; funds received refers to amount.",
    "SQL": "SELECT\n  `T2`.`amount`\nFROM `member` AS `T1`\nINNER JOIN `income` AS `T2`\n  ON `T1`.`member_id` = `T2`.`link_to_member`\nWHERE\n  `T1`.`position` = 'Vice President'",
    "difficulty": "simple"
  },
  {
    "question_id": 1334,
    "db_id": "student_club",
    "question": "List the full name of the Student_Club members that grew up in Illinois state.",
    "evidence": "full name of member refers to first_name, last_name",
    "SQL": "SELECT\n  `T1`.`first_name`,\n  `T1`.`last_name`\nFROM `member` AS `T1`\nINNER JOIN `zip_code` AS `T2`\n  ON `T1`.`zip` = `T2`.`zip_code`\nWHERE\n  `T2`.`state` = 'Illinois'",
    "difficulty": "simple"
  },
  {
    "question_id": 1338,
    "db_id": "student_club",
    "question": "Was each expense in October Meeting on October 8, 2019 approved?",
    "evidence": "event_name = 'October Meeting' where event_date = '2019-10-08'; approved = True means expenses was approved; approved = False means expenses was not approved",
    "SQL": "SELECT\n  `T3`.`approved`\nFROM `event` AS `T1`\nINNER JOIN `budget` AS `T2`\n  ON `T1`.`event_id` = `T2`.`link_to_event`\nINNER JOIN `expense` AS `T3`\n  ON `T2`.`budget_id` = `T3`.`link_to_budget`\nWHERE\n  `T1`.`event_name` = 'October Meeting' AND `T1`.`event_date` LIKE '2019-10-08%'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1339,
    "db_id": "student_club",
    "question": "Calculate the total average cost that Elijah Allen spent in the events on September and October.",
    "evidence": "Elijah Allen is the full name; full name refers to first_name, last_name; The 5th and 6th string of the expense_date in the expense table can refer to month; events in September and October refers to month(expense_date) = 9 OR month(expense_date) = 10",
    "SQL": "SELECT\n  AVG(`T2`.`cost`)\nFROM `member` AS `T1`\nINNER JOIN `expense` AS `T2`\n  ON `T1`.`member_id` = `T2`.`link_to_member`\nWHERE\n  `T1`.`last_name` = 'Allen'\n  AND `T1`.`first_name` = 'Elijah'\n  AND (\n    SUBSTR(`T2`.`expense_date`, 6, 2) = '09'\n    OR SUBSTR(`T2`.`expense_date`, 6, 2) = '10'\n  )",
    "difficulty": "challenging"
  },
  {
    "question_id": 1340,
    "db_id": "student_club",
    "question": "Calculate the difference of the total amount spent in all events by the Student_Club in year 2019 and 2020.",
    "evidence": "The first 4 strings of the event_date values in the event table can represent year; The difference of the total amount spent = SUBTRACT(spent where YEAR(event_date) = 2019, spent where YEAR(event_date) = 2020)",
    "SQL": "SELECT\n  SUM(CASE WHEN SUBSTR(`T1`.`event_date`, 1, 4) = '2019' THEN `T2`.`spent` ELSE 0 END) - SUM(CASE WHEN SUBSTR(`T1`.`event_date`, 1, 4) = '2020' THEN `T2`.`spent` ELSE 0 END) AS `num`\nFROM `event` AS `T1`\nINNER JOIN `budget` AS `T2`\n  ON `T1`.`event_id` = `T2`.`link_to_event`",
    "difficulty": "moderate"
  },
  {
    "question_id": 1344,
    "db_id": "student_club",
    "question": "What was the notes of the fundraising on 2019/9/14?",
    "evidence": "fundraising on 2019/9/14 refers to source = 'Fundraising' where date_received = '2019-09-14'",
    "SQL": "SELECT\n  `notes`\nFROM `income`\nWHERE\n  `source` = 'Fundraising' AND `date_received` = '2019-09-14'",
    "difficulty": "simple"
  },
  {
    "question_id": 1346,
    "db_id": "student_club",
    "question": "Tell the phone number of \"Carlo Jacobs\".",
    "evidence": "Carlo Jacobs is the full name; full name refers to first_name, last_name;",
    "SQL": "SELECT\n  `phone`\nFROM `member`\nWHERE\n  `first_name` = 'Carlo' AND `last_name` = 'Jacobs'",
    "difficulty": "simple"
  },
  {
    "question_id": 1350,
    "db_id": "student_club",
    "question": "What is the status of the event which bought \"Post Cards, Posters\" on 2019/8/20?",
    "evidence": "'Post Cards, Posters' is an expense description; on 2019/8/20 refers to expense_date = '2019-8-20'; status of event refers to event_status",
    "SQL": "SELECT\n  `T1`.`event_status`\nFROM `budget` AS `T1`\nINNER JOIN `expense` AS `T2`\n  ON `T1`.`budget_id` = `T2`.`link_to_budget`\nWHERE\n  `T2`.`expense_description` = 'Post Cards, Posters'\n  AND `T2`.`expense_date` = '2019-08-20'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1351,
    "db_id": "student_club",
    "question": "What was Brent Thomason's major?",
    "evidence": "Brent Thomason is the full name; full name refers to first_name, last_name; major refers to major_name",
    "SQL": "SELECT\n  `T2`.`major_name`\nFROM `member` AS `T1`\nINNER JOIN `major` AS `T2`\n  ON `T1`.`link_to_major` = `T2`.`major_id`\nWHERE\n  `T1`.`first_name` = 'Brent' AND `T1`.`last_name` = 'Thomason'",
    "difficulty": "simple"
  },
  {
    "question_id": 1352,
    "db_id": "student_club",
    "question": "For all the club members from \"Business\" major, how many of them wear medium size t-shirt?",
    "evidence": "'Business' is a major name; wear medium size t-shirt refers to t_shirt_size = 'Medium'",
    "SQL": "SELECT\n  COUNT(`T1`.`member_id`)\nFROM `member` AS `T1`\nINNER JOIN `major` AS `T2`\n  ON `T1`.`link_to_major` = `T2`.`major_id`\nWHERE\n  `T2`.`major_name` = 'Business' AND `T1`.`t_shirt_size` = 'Medium'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1356,
    "db_id": "student_club",
    "question": "Which department was the President of the club in?",
    "evidence": "'President' is a position of Student Club",
    "SQL": "SELECT\n  `T2`.`department`\nFROM `member` AS `T1`\nINNER JOIN `major` AS `T2`\n  ON `T1`.`link_to_major` = `T2`.`major_id`\nWHERE\n  `T1`.`position` = 'President'",
    "difficulty": "simple"
  },
  {
    "question_id": 1357,
    "db_id": "student_club",
    "question": "State the date Connor Hilton paid his/her dues.",
    "evidence": "Connor Hilton is the full name; full name refers to first_name, last_name; date the dues was paid refers to date_received where source = 'Dues';",
    "SQL": "SELECT\n  `T2`.`date_received`\nFROM `member` AS `T1`\nINNER JOIN `income` AS `T2`\n  ON `T1`.`member_id` = `T2`.`link_to_member`\nWHERE\n  `T1`.`first_name` = 'Connor'\n  AND `T1`.`last_name` = 'Hilton'\n  AND `T2`.`source` = 'Dues'",
    "difficulty": "simple"
  },
  {
    "question_id": 1359,
    "db_id": "student_club",
    "question": "How many times was the budget in Advertisement for \"Yearly Kickoff\" meeting more than \"October Meeting\"?",
    "evidence": "budget in Advertisement refer to category = 'Advertisement' in the budget table; DIVIDE(SUM(amount when event_name = 'Yearly Kickoff'), SUM(amount when event_name = 'October Meeting'))",
    "SQL": "SELECT\n  CAST(SUM(CASE WHEN `T2`.`event_name` = 'Yearly Kickoff' THEN `T1`.`amount` ELSE 0 END) AS DOUBLE) / SUM(CASE WHEN `T2`.`event_name` = 'October Meeting' THEN `T1`.`amount` ELSE 0 END)\nFROM `budget` AS `T1`\nINNER JOIN `event` AS `T2`\n  ON `T1`.`link_to_event` = `T2`.`event_id`\nWHERE\n  `T1`.`category` = 'Advertisement' AND `T2`.`type` = 'Meeting'",
    "difficulty": "challenging"
  },
  {
    "question_id": 1361,
    "db_id": "student_club",
    "question": "What is the total cost of the pizzas for all the events?",
    "evidence": "total cost of the pizzas refers to SUM(cost) where expense_description = 'Pizza'",
    "SQL": "SELECT\n  SUM(`cost`)\nFROM `expense`\nWHERE\n  `expense_description` = 'Pizza'",
    "difficulty": "simple"
  },
  {
    "question_id": 1362,
    "db_id": "student_club",
    "question": "How many cities are there in Orange County, Virginia?",
    "evidence": "Orange County is the county name, Virginia is the state name",
    "SQL": "SELECT\n  COUNT(`city`)\nFROM `zip_code`\nWHERE\n  `county` = 'Orange County' AND `state` = 'Virginia'",
    "difficulty": "simple"
  },
  {
    "question_id": 1368,
    "db_id": "student_club",
    "question": "What does the person with the phone number \"809-555-3360\" major in?",
    "evidence": "major in refers to major_name",
    "SQL": "SELECT\n  `T2`.`major_name`\nFROM `member` AS `T1`\nINNER JOIN `major` AS `T2`\n  ON `T1`.`link_to_major` = `T2`.`major_id`\nWHERE\n  `T1`.`phone` = '809-555-3360'",
    "difficulty": "simple"
  },
  {
    "question_id": 1371,
    "db_id": "student_club",
    "question": "How many members attended the \"Women's Soccer\" event?",
    "evidence": "'Women's Soccer' is the event name;",
    "SQL": "SELECT\n  COUNT(`T2`.`link_to_member`)\nFROM `event` AS `T1`\nINNER JOIN `attendance` AS `T2`\n  ON `T1`.`event_id` = `T2`.`link_to_event`\nWHERE\n  `T1`.`event_name` = 'Women''s Soccer'",
    "difficulty": "simple"
  },
  {
    "question_id": 1375,
    "db_id": "student_club",
    "question": "List all the members of the \"School of Applied Sciences, Technology and Education\" department.",
    "evidence": "list all members means to list all the full name; full name refers to first_name, last_name;",
    "SQL": "SELECT\n  `T1`.`first_name`,\n  `T1`.`last_name`\nFROM `member` AS `T1`\nINNER JOIN `major` AS `T2`\n  ON `T1`.`link_to_major` = `T2`.`major_id`\nWHERE\n  `T2`.`department` = 'School of Applied Sciences, Technology and Education'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1376,
    "db_id": "student_club",
    "question": "Among all the closed events, which event has the highest spend-to-budget ratio?",
    "evidence": "closed events refers to event_name where status = 'Closed'; highest spend-to budget ratio refers to MAX(DIVIDE(spent, amount))",
    "SQL": "SELECT\n  `T2`.`event_name`\nFROM `budget` AS `T1`\nINNER JOIN `event` AS `T2`\n  ON `T1`.`link_to_event` = `T2`.`event_id`\nWHERE\n  `T2`.`status` = 'Closed'\nORDER BY\n  `T1`.`spent` / `T1`.`amount` DESC\nLIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 1378,
    "db_id": "student_club",
    "question": "What is the highest amount of budget spend for an event?",
    "evidence": "highest amount of budget spend refers to MAX(spent)",
    "SQL": "SELECT\n  MAX(`spent`)\nFROM `budget`",
    "difficulty": "simple"
  },
  {
    "question_id": 1380,
    "db_id": "student_club",
    "question": "What is the total amount of money spent for food?",
    "evidence": "total amount of money spent refers to SUM(spent); spent for food refers to category = 'Food'",
    "SQL": "SELECT SUM(spent) FROM budget WHERE category = 'Food'",
    "difficulty": "simple"
  },
  {
    "question_id": 1381,
    "db_id": "student_club",
    "question": "List the name of students that have attended more than 7 events.",
    "evidence": "name of students means the full name; full name refers to first_name, last_name; attended more than 7 events refers to COUNT(link_to_event) > 7",
    "SQL": "SELECT\n  `T1`.`first_name`,\n  `T1`.`last_name`\nFROM `member` AS `T1`\nINNER JOIN `attendance` AS `T2`\n  ON `T1`.`member_id` = `T2`.`link_to_member`\nGROUP BY\n  `T2`.`link_to_member`\nHAVING\n  COUNT(`T2`.`link_to_event`) > 7",
    "difficulty": "moderate"
  },
  {
    "question_id": 1387,
    "db_id": "student_club",
    "question": "Which student has been entrusted to manage the budget for the Yearly Kickoff?",
    "evidence": "name of students means the full name; full name refers to first_name, last_name;'Yearly Kickoff' is an event name;",
    "SQL": "SELECT\n  `T4`.`first_name`,\n  `T4`.`last_name`\nFROM `event` AS `T1`\nINNER JOIN `budget` AS `T2`\n  ON `T1`.`event_id` = `T2`.`link_to_event`\nINNER JOIN `expense` AS `T3`\n  ON `T2`.`budget_id` = `T3`.`link_to_budget`\nINNER JOIN `member` AS `T4`\n  ON `T3`.`link_to_member` = `T4`.`member_id`\nWHERE\n  `T1`.`event_name` = 'Yearly Kickoff'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1389,
    "db_id": "student_club",
    "question": "Which event has the lowest cost?",
    "evidence": "event refers to event_name; lowest cost means MIN(cost)",
    "SQL": "SELECT\n  `T1`.`event_name`\nFROM `event` AS `T1`\nINNER JOIN `budget` AS `T2`\n  ON `T1`.`event_id` = `T2`.`link_to_event`\nINNER JOIN `expense` AS `T3`\n  ON `T2`.`budget_id` = `T3`.`link_to_budget`\nORDER BY\n  `T3`.`cost`\nLIMIT 1",
    "difficulty": "simple"
  },
  {
    "question_id": 1390,
    "db_id": "student_club",
    "question": "Based on the total cost for all event, what is the percentage of cost for Yearly Kickoff event?",
    "evidence": "percentage = DIVIDE(SUM(cost where event_name = 'Yearly Kickoff'), SUM(cost)) * 100",
    "SQL": "SELECT\n  CAST(SUM(CASE WHEN `T1`.`event_name` = 'Yearly Kickoff' THEN `T3`.`cost` ELSE 0 END) AS DOUBLE) * 100 / SUM(`T3`.`cost`)\nFROM `event` AS `T1`\nINNER JOIN `budget` AS `T2`\n  ON `T1`.`event_id` = `T2`.`link_to_event`\nINNER JOIN `expense` AS `T3`\n  ON `T2`.`budget_id` = `T3`.`link_to_budget`",
    "difficulty": "moderate"
  },
  {
    "question_id": 1392,
    "db_id": "student_club",
    "question": "Indicate the top source of funds received in September 2019 based on their amount.",
    "evidence": "top source funds refers to MAX(source); September 2019 means date_received BETWEEN '2019-09-01' and '2019-09-30'",
    "SQL": "SELECT\n  `source`\nFROM `income`\nWHERE\n  `date_received` BETWEEN '2019-09-01' AND '2019-09-30'\nORDER BY\n  `source` DESC\nLIMIT 1",
    "difficulty": "simple"
  },
  {
    "question_id": 1394,
    "db_id": "student_club",
    "question": "How many members of the Student_Club have major in 'Physics Teaching'?",
    "evidence": "'Physics Teaching' is the major_name;",
    "SQL": "SELECT\n  COUNT(`T2`.`member_id`)\nFROM `major` AS `T1`\nINNER JOIN `member` AS `T2`\n  ON `T1`.`major_id` = `T2`.`link_to_major`\nWHERE\n  `T1`.`major_name` = 'Physics Teaching'",
    "difficulty": "simple"
  },
  {
    "question_id": 1398,
    "db_id": "student_club",
    "question": "Name the event with the highest amount spent on advertisement.",
    "evidence": "Name of event refers to event_name; highest amount spent on advertisement refers to MAX(spent) where category = 'Advertisement'",
    "SQL": "SELECT\n  `T2`.`event_name`\nFROM `budget` AS `T1`\nINNER JOIN `event` AS `T2`\n  ON `T1`.`link_to_event` = `T2`.`event_id`\nWHERE\n  `T1`.`category` = 'Advertisement'\nORDER BY\n  `T1`.`spent` DESC\nLIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 1399,
    "db_id": "student_club",
    "question": "Did Maya Mclean attend the 'Women's Soccer' event?",
    "evidence": "Maya Mclean is the full name; full name refers to first_name, last_name; 'Women's Soccer' is an event_name",
    "SQL": "SELECT\n  CASE WHEN `T3`.`event_name` = 'Women''s Soccer' THEN 'YES' END AS `result`\nFROM `member` AS `T1`\nINNER JOIN `attendance` AS `T2`\n  ON `T1`.`member_id` = `T2`.`link_to_member`\nINNER JOIN `event` AS `T3`\n  ON `T2`.`link_to_event` = `T3`.`event_id`\nWHERE\n  `T1`.`first_name` = 'Maya' AND `T1`.`last_name` = 'Mclean'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1401,
    "db_id": "student_club",
    "question": "Indicate the cost of posters for 'September Speaker' event.",
    "evidence": "'Posters' is the expense description; 'September Speaker' is an event name",
    "SQL": "SELECT\n  `T3`.`cost`\nFROM `event` AS `T1`\nINNER JOIN `budget` AS `T2`\n  ON `T1`.`event_id` = `T2`.`link_to_event`\nINNER JOIN `expense` AS `T3`\n  ON `T2`.`budget_id` = `T3`.`link_to_budget`\nWHERE\n  `T1`.`event_name` = 'September Speaker'\n  AND `T3`.`expense_description` = 'Posters'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1403,
    "db_id": "student_club",
    "question": "Indicate the name of the closed event whose cost has exceeded the budget the most.",
    "evidence": "closed events refers to event_name where status = 'Closed'; exceed the budget the most refers to MIN(remaining) where remaining < 0",
    "SQL": "SELECT\n  `T2`.`event_name`\nFROM `budget` AS `T1`\nINNER JOIN `event` AS `T2`\n  ON `T2`.`event_id` = `T1`.`link_to_event`\nWHERE\n  `T1`.`event_status` = 'Closed' AND `T1`.`remaining` < 0\nORDER BY\n  `T1`.`remaining`\nLIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 1404,
    "db_id": "student_club",
    "question": "Identify the type of expenses and their total value approved for 'October Meeting' event.",
    "evidence": "total value refers to SUM(cost); 'October Meeting' is an event name;",
    "SQL": "SELECT `T1`.`type`, SUM(`T3`.`cost`) AS `total_cost` FROM `event` AS `T1` INNER JOIN `budget` AS `T2` ON `T1`.`event_id` = `T2`.`link_to_event` INNER JOIN `expense` AS `T3` ON `T2`.`budget_id` = `T3`.`link_to_budget` WHERE `T1`.`event_name` = 'October Meeting' GROUP BY `T1`.`type`",
    "difficulty": "moderate"
  },
  {
    "question_id": 1405,
    "db_id": "student_club",
    "question": "Calculate the amount budgeted for 'April Speaker' event. List all the budgeted categories for said event in an ascending order based on their amount budgeted.",
    "evidence": "'April Speaker' is an event name; amount budgeted refers to SUM(amount); budget categories refers to category",
    "SQL": "SELECT T2.category, SUM(T2.amount) FROM event AS T1 INNER JOIN budget AS T2 ON T1.event_id = T2.link_to_event WHERE T1.event_name = 'April Speaker' GROUP BY T2.category ORDER BY SUM(T2.amount) ASC",
    "difficulty": "moderate"
  },
  {
    "question_id": 1409,
    "db_id": "student_club",
    "question": "Mention the total expense used on 8/20/2019.",
    "evidence": "total expense refers SUM(cost) where expense_date = '2019-08-20'",
    "SQL": "SELECT\n  SUM(`cost`)\nFROM `expense`\nWHERE\n  `expense_date` = '2019-08-20'",
    "difficulty": "simple"
  },
  {
    "question_id": 1410,
    "db_id": "student_club",
    "question": "List out the full name and total cost that member id \"rec4BLdZHS2Blfp4v\" incurred?",
    "evidence": "full name refers to first_name, last name",
    "SQL": "SELECT\n  `T1`.`first_name`,\n  `T1`.`last_name`,\n  SUM(`T2`.`cost`)\nFROM `member` AS `T1`\nINNER JOIN `expense` AS `T2`\n  ON `T1`.`member_id` = `T2`.`link_to_member`\nWHERE\n  `T1`.`member_id` = 'rec4BLdZHS2Blfp4v'",
    "difficulty": "simple"
  },
  {
    "question_id": 1411,
    "db_id": "student_club",
    "question": "State what kind of expenses that Sacha Harrison incurred?",
    "evidence": "kind of expenses refers to expense_description; Sacha Harrison is the full name; full name refers to first_name, last_name;",
    "SQL": "SELECT\n  `T2`.`expense_description`\nFROM `member` AS `T1`\nINNER JOIN `expense` AS `T2`\n  ON `T1`.`member_id` = `T2`.`link_to_member`\nWHERE\n  `T1`.`first_name` = 'Sacha' AND `T1`.`last_name` = 'Harrison'",
    "difficulty": "simple"
  },
  {
    "question_id": 1422,
    "db_id": "student_club",
    "question": "State the category of events were held at MU 215.",
    "evidence": "'MU 215' is the location of event; ",
    "SQL": "SELECT DISTINCT\n  `T2`.`category`\nFROM `event` AS `T1`\nINNER JOIN `budget` AS `T2`\n  ON `T1`.`event_id` = `T2`.`link_to_event`\nWHERE\n  `T1`.`location` = 'MU 215'",
    "difficulty": "simple"
  },
  {
    "question_id": 1426,
    "db_id": "student_club",
    "question": "List the last name of members with a major in environmental engineering and include its department and college name.",
    "evidence": "'Environmental Engineering' is the major_name;",
    "SQL": "SELECT\n  `T2`.`last_name`,\n  `T1`.`department`,\n  `T1`.`college`\nFROM `major` AS `T1`\nINNER JOIN `member` AS `T2`\n  ON `T1`.`major_id` = `T2`.`link_to_major`\nWHERE\n  `T2`.`position` = 'Member' AND `T1`.`major_name` = 'Environmental Engineering'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1427,
    "db_id": "student_club",
    "question": "What are the budget category of the events located at MU 215 and a guest speaker type with a 0 budget spent?",
    "evidence": "budget category refers to category; events located at refers to location; type = 'Guest Speaker'; 0 budget spent refers to spent = 0; ",
    "SQL": "SELECT DISTINCT\n  `T2`.`category`,\n  `T1`.`type`\nFROM `event` AS `T1`\nINNER JOIN `budget` AS `T2`\n  ON `T1`.`event_id` = `T2`.`link_to_event`\nWHERE\n  `T1`.`location` = 'MU 215' AND `T2`.`spent` = 0 AND `T1`.`type` = 'Guest Speaker'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1432,
    "db_id": "student_club",
    "question": "Among the members with t-shirt size of medium, what is the percentage of the amount 50 received by the Student_Club?",
    "evidence": "t_shirt_size = 'Medium' where position = 'Member'; percentage = DIVIDE(COUNT(amount = 50), COUNT(member_id)) * 100",
    "SQL": "SELECT\n  CAST(SUM(CASE WHEN `T2`.`amount` = 50 THEN 1.0 ELSE 0 END) AS DOUBLE) * 100 / COUNT(`T2`.`income_id`)\nFROM `member` AS `T1`\nINNER JOIN `income` AS `T2`\n  ON `T1`.`member_id` = `T2`.`link_to_member`\nWHERE\n  `T1`.`position` = 'Member' AND `T1`.`t_shirt_size` = 'Medium'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1435,
    "db_id": "student_club",
    "question": "List the names of closed event as \"game\" that was closed from 3/15/2019 to 3/20/2020.",
    "evidence": "name of events refers event_name; game event that was closed refers to type = 'Game' where status = 'Closed'; event_date BETWEEN '2019-03-15' and '2020-03-20'; ",
    "SQL": "SELECT DISTINCT\n  `event_name`\nFROM `event`\nWHERE\n  `type` = 'Game'\n  AND DATE(SUBSTR(`event_date`, 1, 10)) BETWEEN '2019-03-15' AND '2020-03-20'\n  AND `status` = 'Closed'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1457,
    "db_id": "student_club",
    "question": "Give the full name and contact number of members who had to spend more than average on each expense.",
    "evidence": "full name refers to first_name, last_name; contact number refers to phone; had spent more than average on each expense refers to cost > AVG(cost)",
    "SQL": "SELECT DISTINCT\n  `T3`.`first_name`,\n  `T3`.`last_name`,\n  `T3`.`phone`\nFROM `expense` AS `T1`\nINNER JOIN `budget` AS `T2`\n  ON `T1`.`link_to_budget` = `T2`.`budget_id`\nINNER JOIN `member` AS `T3`\n  ON `T3`.`member_id` = `T1`.`link_to_member`\nWHERE\n  `T1`.`cost` > (\n    SELECT\n      AVG(`T1`.`cost`)\n    FROM `expense` AS `T1`\n    INNER JOIN `budget` AS `T2`\n      ON `T1`.`link_to_budget` = `T2`.`budget_id`\n    INNER JOIN `member` AS `T3`\n      ON `T3`.`member_id` = `T1`.`link_to_member`\n  )",
    "difficulty": "challenging"
  },
  {
    "question_id": 1460,
    "db_id": "student_club",
    "question": "Write the full name of the member who spent money for water, veggie tray and supplies and include the cost of it.",
    "evidence": "full name refers to first_name, last name; spent money for refers expense description; expense_description = 'Water, Veggie tray, supplies'",
    "SQL": "SELECT\n  `T2`.`first_name`,\n  `T2`.`last_name`,\n  `T1`.`cost`\nFROM `expense` AS `T1`\nINNER JOIN `member` AS `T2`\n  ON `T1`.`link_to_member` = `T2`.`member_id`\nWHERE\n  `T1`.`expense_description` = 'Water, Veggie tray, supplies'",
    "difficulty": "challenging"
  },
  {
    "question_id": 1464,
    "db_id": "student_club",
    "question": "Write the full names of students who received funds on the date of 9/9/2019 and include the amount received.",
    "evidence": "full name refers to first_name, last_name, amount of funds received refers to amount, received funds on date refers to date_received",
    "SQL": "SELECT DISTINCT\n  `T3`.`first_name`,\n  `T3`.`last_name`,\n  `T4`.`amount`\nFROM `event` AS `T1`\nINNER JOIN `attendance` AS `T2`\n  ON `T1`.`event_id` = `T2`.`link_to_event`\nINNER JOIN `member` AS `T3`\n  ON `T3`.`member_id` = `T2`.`link_to_member`\nINNER JOIN `income` AS `T4`\n  ON `T4`.`link_to_member` = `T3`.`member_id`\nWHERE\n  `T4`.`date_received` = '2019-09-09'",
    "difficulty": "challenging"
  },
  {
    "question_id": 1149,
    "db_id": "thrombosis_prediction",
    "question": "Are there more in-patient or outpatient who were male? What is the deviation in percentage?",
    "evidence": "male refers to SEX = 'M'; in-patient refers to Admission = '+'; outpatient refers to Admission = '-'; percentage = DIVIDE(COUNT(ID) where SEX = 'M' and Admission = '+', COUNT(ID) where SEX\u00a0 = 'M' and Admission = '-')",
    "SQL": "SELECT\n  CAST(SUM(CASE WHEN `Admission` = '+' THEN 1 ELSE 0 END) AS DOUBLE) * 100 / SUM(CASE WHEN `Admission` = '-' THEN 1 ELSE 0 END)\nFROM `Patient`\nWHERE\n  `SEX` = 'M'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1150,
    "db_id": "thrombosis_prediction",
    "question": "What is the percentage of female patient were born after 1930?",
    "evidence": "female refers to Sex = 'F'; patient who were born after 1930 refers to year(Birthday) > '1930'; calculation = DIVIDE(COUNT(ID) where year(Birthday) > '1930' and SEX = 'F'), (COUNT(ID) where SEX = 'F')",
    "SQL": "SELECT\n  CAST(SUM(\n    CASE WHEN DATE_FORMAT(CAST(`Birthday` AS DATETIME), '%Y') > '1930' THEN 1 ELSE 0 END\n  ) AS DOUBLE) * 100 / COUNT(*)\nFROM `Patient`\nWHERE\n  `SEX` = 'F'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1152,
    "db_id": "thrombosis_prediction",
    "question": "What is the ratio of outpatient to inpatient followed up treatment among all the 'SLE' diagnosed patient?",
    "evidence": "'SLE' diagnosed patient means Diagnosis = 'SLE'; inpatient refers to Admission = '+'; outpatient refers to Admission = '-'; calculation =  DIVIDE(COUNT(ID) where Diagnosis = 'SLE' and Admission = '+', COUNT(ID) where Diagnosis = 'SLE' and Admission = '-')",
    "SQL": "SELECT\n  SUM(CASE WHEN `Admission` = '+' THEN 1 ELSE 0 END) / SUM(CASE WHEN `Admission` = '-' THEN 1 ELSE 0 END)\nFROM `Patient`\nWHERE\n  `Diagnosis` = 'SLE'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1153,
    "db_id": "thrombosis_prediction",
    "question": "What is the disease patient '30609' diagnosed with. List all the date of laboratory tests done for this patient.",
    "evidence": "'30609' is the Patient ID; disease means Diagnosis",
    "SQL": "SELECT\n  `T1`.`Diagnosis`,\n  `T2`.`Date`\nFROM `Patient` AS `T1`\nINNER JOIN `Laboratory` AS `T2`\n  ON `T1`.`ID` = `T2`.`ID`\nWHERE\n  `T1`.`ID` = 30609",
    "difficulty": "simple"
  },
  {
    "question_id": 1155,
    "db_id": "thrombosis_prediction",
    "question": "List the patient ID, sex and birthday of patient with LDH beyond normal range.",
    "evidence": "LDH beyond normal range refers to LDH > '500';",
    "SQL": "SELECT DISTINCT\n  `T1`.`ID`,\n  `T1`.`SEX`,\n  `T1`.`Birthday`\nFROM `Patient` AS `T1`\nINNER JOIN `Laboratory` AS `T2`\n  ON `T1`.`ID` = `T2`.`ID`\nWHERE\n  `T2`.`LDH` > 500",
    "difficulty": "simple"
  },
  {
    "question_id": 1156,
    "db_id": "thrombosis_prediction",
    "question": "State the ID and age of patient with positive degree of coagulation.",
    "evidence": "age refers to SUBTRACT(year(current_timestamp), year(Birthday)); positive degree of coagulation refers to RVVT = '+';",
    "SQL": "SELECT DISTINCT\n  `T1`.`ID`,\n  DATE_FORMAT(CAST(CURRENT_TIMESTAMP() AS DATETIME), '%Y') - DATE_FORMAT(CAST(`T1`.`Birthday` AS DATETIME), '%Y')\nFROM `Patient` AS `T1`\nINNER JOIN `Examination` AS `T2`\n  ON `T1`.`ID` = `T2`.`ID`\nWHERE\n  `T2`.`RVVT` = '+'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1157,
    "db_id": "thrombosis_prediction",
    "question": "For patients with severe degree of thrombosis, list their ID, sex and disease the patient is diagnosed with.",
    "evidence": "severe degree of thrombosis refers to thrombosis = 2; disease refers to diagnosis;",
    "SQL": "SELECT DISTINCT\n  `T1`.`ID`,\n  `T1`.`SEX`,\n  `T1`.`Diagnosis`\nFROM `Patient` AS `T1`\nINNER JOIN `Examination` AS `T2`\n  ON `T1`.`ID` = `T2`.`ID`\nWHERE\n  `T2`.`Thrombosis` = 2",
    "difficulty": "simple"
  },
  {
    "question_id": 1162,
    "db_id": "thrombosis_prediction",
    "question": "How many female patients who came at the hospital in 1997 was immediately followed at the outpatient clinic?",
    "evidence": "female refers to sex = 'F'; came at the hospital in 1997 refers to year(Description) = '1997'; immediately followed at the outpatient clinic refers to Admission = '-'",
    "SQL": "SELECT\n  COUNT(*)\nFROM `Patient`\nWHERE\n  DATE_FORMAT(CAST(`Description` AS DATETIME), '%Y') = '1997'\n  AND `SEX` = 'F'\n  AND `Admission` = '-'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1164,
    "db_id": "thrombosis_prediction",
    "question": "How many of the patients with the most serious thrombosis cases examined in 1997 are women?",
    "evidence": "the most serious thrombosis refers to Thrombosis = '1' (the most severe one); women refers to sex = 'F'",
    "SQL": "SELECT\n  COUNT(*)\nFROM `Patient` AS `T1`\nINNER JOIN `Examination` AS `T2`\n  ON `T1`.`ID` = `T2`.`ID`\nWHERE\n  `T1`.`SEX` = 'F'\n  AND DATE_FORMAT(CAST(`T2`.`Examination Date` AS DATETIME), '%Y') = '1997'\n  AND `T2`.`Thrombosis` = 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 1166,
    "db_id": "thrombosis_prediction",
    "question": "What are the symptoms observed by the youngest patient to ever did a medical examination? Identify their diagnosis.",
    "evidence": "The larger the birthday value, the younger the person is, and vice versa; symptoms observed refers to the symptoms is not NULL",
    "SQL": "SELECT\n  `T2`.`Symptoms`,\n  `T1`.`Diagnosis`\nFROM `Patient` AS `T1`\nINNER JOIN `Examination` AS `T2`\n  ON `T1`.`ID` = `T2`.`ID`\nWHERE\n  NOT `T2`.`Symptoms` IS NULL\nORDER BY\n  `T1`.`Birthday` DESC\nLIMIT 1",
    "difficulty": "simple"
  },
  {
    "question_id": 1168,
    "db_id": "thrombosis_prediction",
    "question": "The oldest SJS patient's medical laboratory work was completed on what date, and what age was the patient when they initially arrived at the hospital?",
    "evidence": "The larger the birthday value, the younger the person is, and vice versa; 'SJS' refers to diagnosis; (SUBTRACT(year(`First Date`)), year(Birthday)); age of the patients when they initially arrived at the hospital refers to year(Birthday)",
    "SQL": "SELECT\n  `T1`.`Date`,\n  DATE_FORMAT(CAST(`T2`.`First Date` AS DATETIME), '%Y') - DATE_FORMAT(CAST(`T2`.`Birthday` AS DATETIME), '%Y'),\n  `T2`.`Birthday`\nFROM `Laboratory` AS `T1`\nINNER JOIN `Patient` AS `T2`\n  ON `T1`.`ID` = `T2`.`ID`\nWHERE\n  `T2`.`Diagnosis` = 'SJS' AND NOT `T2`.`Birthday` IS NULL\nORDER BY\n  `T2`.`Birthday` ASC\nLIMIT 1",
    "difficulty": "challenging"
  },
  {
    "question_id": 1169,
    "db_id": "thrombosis_prediction",
    "question": "What is the ratio of male to female patients among all those with abnormal uric acid counts?",
    "evidence": "male refers to SEX = 'M'; female refers to SEX = 'F'; abnormal uric acid refers to UA < = '8.0' where SEX = 'M', UA < = '6.5' where SEX = 'F'; calculation = DIVIDE(SUM(UA <= '8.0' and SEX = 'M'), SUM(UA <= '6.5 and SEX = 'F'))",
    "SQL": "SELECT\n  CAST(SUM(CASE WHEN `T2`.`UA` <= 8.0 AND `T1`.`SEX` = 'M' THEN 1 ELSE 0 END) AS DOUBLE) / SUM(CASE WHEN `T2`.`UA` <= 6.5 AND `T1`.`SEX` = 'F' THEN 1 ELSE 0 END)\nFROM `Patient` AS `T1`\nINNER JOIN `Laboratory` AS `T2`\n  ON `T1`.`ID` = `T2`.`ID`",
    "difficulty": "challenging"
  },
  {
    "question_id": 1171,
    "db_id": "thrombosis_prediction",
    "question": "How many underage patients were examined during the course of the three-year period from 1990 to 1993?",
    "evidence": "underage patients refers to year(Birthday) < 18; three-year period from 1990 to 1993 refers to year(`Examination Date`) between '1990' and '1993'",
    "SQL": "SELECT\n  COUNT(`T1`.`ID`)\nFROM `Patient` AS `T1`\nINNER JOIN `Examination` AS `T2`\n  ON `T1`.`ID` = `T2`.`ID`\nWHERE\n  DATE_FORMAT(CAST(`T2`.`Examination Date` AS DATETIME), '%Y') BETWEEN '1990' AND '1993'\n  AND DATE_FORMAT(CAST(`T2`.`Examination Date` AS DATETIME), '%Y') - DATE_FORMAT(CAST(`T1`.`Birthday` AS DATETIME), '%Y') < '18'",
    "difficulty": "challenging"
  },
  {
    "question_id": 1175,
    "db_id": "thrombosis_prediction",
    "question": "How old was the patient who had the highest hemoglobin count at the time of the examination, and what is the doctor's diagnosis?",
    "evidence": "How old the patient refers to SUBTRACT(year(`Examination Date`), year(Birthday)); the highest hemoglobin count refers to MAX(HGB)",
    "SQL": "SELECT\n  DATE_FORMAT(CAST(`T2`.`Date` AS DATETIME), '%Y') - DATE_FORMAT(CAST(`T1`.`Birthday` AS DATETIME), '%Y'),\n  `T1`.`Diagnosis`\nFROM `Patient` AS `T1`\nINNER JOIN `Laboratory` AS `T2`\n  ON `T1`.`ID` = `T2`.`ID`\nORDER BY\n  `T2`.`HGB` DESC\nLIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 1179,
    "db_id": "thrombosis_prediction",
    "question": "For the patient who was diagnosed with SLE on 1994/2/19, what was his/her anti-Cardiolipin antibody concentration status on 1993/11/12?",
    "evidence": "diagnosed with SLE refers to Diagnosis = 'SLE'; 1994/2/19 refers to Description = '1994-02-19'; anti-Cardiolipin refers to aCL IgM; 1993/11/12 refers to Examination Date = '1993/11/12'",
    "SQL": "SELECT\n  `aCL IgA`,\n  `aCL IgG`,\n  `aCL IgM`\nFROM `Examination`\nWHERE\n  `ID` IN (\n    SELECT\n      `ID`\n    FROM `Patient`\n    WHERE\n      `Diagnosis` = 'SLE' AND `Description` = '1994-02-19'\n  )\n  AND `Examination Date` = '1993-11-12'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1185,
    "db_id": "thrombosis_prediction",
    "question": "For the patient who was born on 1959/2/18, what is the decrease rate for his/her total cholesterol from November to December in 1981?",
    "evidence": "born on 1959/2/18 refers to Birthday = '1959-02-18'; calculation = DIVISION(SUBTRACT(SUM(Birthday = '1959-02-18' and Date like '1981-11-%' THEN `T-CHO`), SUM(Birthday = '1959-02-18' and Date like '1981-12-%' THEN `T-CHO`)), SUM(Birthday = '1959-02-18' and Date like '1981-12-%' THEN `T-CHO`))",
    "SQL": "SELECT\n  CAST((\n    SUM(CASE WHEN `T2`.`Date` LIKE '1981-11-%' THEN `T2`.`T-CHO` ELSE 0 END) - SUM(CASE WHEN `T2`.`Date` LIKE '1981-12-%' THEN `T2`.`T-CHO` ELSE 0 END)\n  ) AS DOUBLE) / SUM(CASE WHEN `T2`.`Date` LIKE '1981-12-%' THEN `T2`.`T-CHO` ELSE 0 END)\nFROM `Patient` AS `T1`\nINNER JOIN `Laboratory` AS `T2`\n  ON `T1`.`ID` = `T2`.`ID`\nWHERE\n  `T1`.`Birthday` = '1959-02-18'",
    "difficulty": "challenging"
  },
  {
    "question_id": 1187,
    "db_id": "thrombosis_prediction",
    "question": "How many patients who were examined between 1987/7/6 and 1996/1/31 had a GPT level greater than 30 and an ALB level less than 4? List them by their ID.",
    "evidence": "examined between 1987/7/6 and 1996/1/31 refers to Date BETWEEN '1987-07-06' AND '1996-01-31'; GPT level greater than 30 refers to GPT > 30; ALB level less than 4 ALB < 4",
    "SQL": "SELECT DISTINCT\n  `ID`\nFROM `Laboratory`\nWHERE\n  `Date` BETWEEN '1987-07-06' AND '1996-01-31' AND `GPT` > 30 AND `ALB` < 4",
    "difficulty": "moderate"
  },
  {
    "question_id": 1189,
    "db_id": "thrombosis_prediction",
    "question": "What number of patients with a degree of thrombosis level 2 and ANA pattern of only S, have a level of anti-Cardiolip in antibody (IgM) 20% higher than average?",
    "evidence": "thrombosis level 2 refers to Thrombosis = 2; ANA pattern of only S refers to ANA = 'S'; average anti-Cardiolip in antibody (IgM) refers to AVG(`aCL IgM`); calculation = MULTIPLY(AVG + AVG, 0.2)",
    "SQL": "SELECT\n  COUNT(*)\nFROM `Examination`\nWHERE\n  `Thrombosis` = 2\n  AND `ANA Pattern` = 'S'\n  AND `aCL IgM` > (\n    SELECT\n      AVG(`aCL IgM`) * 1.2\n    FROM `Examination`\n    WHERE\n      `Thrombosis` = 2 AND `ANA Pattern` = 'S'\n  )",
    "difficulty": "challenging"
  },
  {
    "question_id": 1192,
    "db_id": "thrombosis_prediction",
    "question": "List all patients who were followed up at the outpatient clinic who underwent a laboratory test in October 1991 and had a total blood bilirubin level within the normal range.",
    "evidence": "followed up at the outpatient clinic refers to Admission = '-'; laboratory test in April 1981 refers to Date like '1991-10%'; blood bilirubin level within the normal range refers to T-BIL < 2.0; ",
    "SQL": "SELECT DISTINCT\n  `T1`.`ID`\nFROM `Patient` AS `T1`\nINNER JOIN `Laboratory` AS `T2`\n  ON `T1`.`ID` = `T2`.`ID`\nWHERE\n  `T1`.`Admission` = '-' AND `T2`.`T-BIL` < 2.0 AND `T2`.`Date` LIKE '1991-10-%'",
    "difficulty": "challenging"
  },
  {
    "question_id": 1195,
    "db_id": "thrombosis_prediction",
    "question": "What is the average blood albumin level for female patients with a PLT greater than 400 who have been diagnosed with SLE?",
    "evidence": "average blood albumin level refers to AVG(ALB); female refers to SEX = 'F'; PLT greater than 400 refers to PLT > 400; diagnosed with SLE refers to Diagnosis= 'SLE'",
    "SQL": "SELECT\n  AVG(`T2`.`ALB`)\nFROM `Patient` AS `T1`\nINNER JOIN `Laboratory` AS `T2`\n  ON `T1`.`ID` = `T2`.`ID`\nWHERE\n  `T2`.`PLT` > 400 AND `T1`.`Diagnosis` = 'SLE' AND `T1`.`SEX` = 'F'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1198,
    "db_id": "thrombosis_prediction",
    "question": "How many female patients were given an APS diagnosis?",
    "evidence": "female refers to SEX = 'F'; APS diagnosis refers to Diagnosis='APS'",
    "SQL": "SELECT\n  COUNT(`ID`)\nFROM `Patient`\nWHERE\n  `SEX` = 'F' AND `Diagnosis` = 'APS'",
    "difficulty": "simple"
  },
  {
    "question_id": 1201,
    "db_id": "thrombosis_prediction",
    "question": "What percentage of patients who were born in 1980 and were diagnosed with RA are women?",
    "evidence": "born in 1980 refers to YEAR(BIRTHDAY) = '1980'; 'RA' refers to Diagnosis='RA' ; women refers to SEX = 'F'; calculation = DIVIDE(SUM(SEX = 'F'), COUNT(SEX)) * 100",
    "SQL": "SELECT\n  CAST(SUM(CASE WHEN `SEX` = 'F' THEN 1 ELSE 0 END) AS DOUBLE) * 100 / COUNT(`ID`)\nFROM `Patient`\nWHERE\n  `Diagnosis` = 'RA' AND DATE_FORMAT(CAST(`Birthday` AS DATETIME), '%Y') = '1980'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1205,
    "db_id": "thrombosis_prediction",
    "question": "Was the patient with the number 57266's uric acid within a normal range?",
    "evidence": "uric acid within a normal range refers to UA > 8.0 and SEX = 'M'OR UA > 6.5 and SEX = 'F'",
    "SQL": "SELECT\n  CASE\n    WHEN (\n      `T1`.`SEX` = 'F' AND `T2`.`UA` > 6.5\n    )\n    OR (\n      `T1`.`SEX` = 'M' AND `T2`.`UA` > 8.0\n    )\n    THEN TRUE\n    ELSE FALSE\n  END\nFROM `Patient` AS `T1`\nINNER JOIN `Laboratory` AS `T2`\n  ON `T1`.`ID` = `T2`.`ID`\nWHERE\n  `T1`.`ID` = 57266",
    "difficulty": "moderate"
  },
  {
    "question_id": 1208,
    "db_id": "thrombosis_prediction",
    "question": "Provide IDs for male patients with ALT glutamic pylvic transaminase (GPT) that have history of ALT glutamic pylvic transaminase (GPT) exceed the normal range.",
    "evidence": "male refers to SEX = 'M'; ALT glutamic pylvic transaminase (GPT) exceed the normal range refers to GPT > = 60",
    "SQL": "SELECT DISTINCT\n  `T1`.`ID`\nFROM `Patient` AS `T1`\nINNER JOIN `Laboratory` AS `T2`\n  ON `T1`.`ID` = `T2`.`ID`\nWHERE\n  `T1`.`SEX` = 'M' AND `T2`.`GPT` >= 60",
    "difficulty": "moderate"
  },
  {
    "question_id": 1209,
    "db_id": "thrombosis_prediction",
    "question": "Please provide the diagnosis of patients with ALT glutamic pylvic transaminase beyond the normal range by ascending order of their date of birth.",
    "evidence": "ALT glutamic pylvic transaminase beyond the normal range refers to GPT > 60; The larger the birthday value, the younger the person is, and vice versa; ",
    "SQL": "SELECT `T1`.`Diagnosis`\nFROM `Patient` AS `T1`\nINNER JOIN `Laboratory` AS `T2`\n  ON `T1`.`ID` = `T2`.`ID`\nWHERE\n  `T2`.`GPT` > 60\nORDER BY\n  `T1`.`Birthday` ASC",
    "difficulty": "moderate"
  },
  {
    "question_id": 1220,
    "db_id": "thrombosis_prediction",
    "question": "Provide all ID, sex and birthday of patients whose urea nitrogen (UN) just within the borderline of passing?",
    "evidence": "urea nitrogen (UN) just within the borderline of passing refers to UN = 29; ",
    "SQL": "SELECT DISTINCT\n  `T1`.`ID`,\n  `T1`.`SEX`,\n  `T1`.`Birthday`\nFROM `Patient` AS `T1`\nINNER JOIN `Laboratory` AS `T2`\n  ON `T1`.`ID` = `T2`.`ID`\nWHERE\n  `T2`.`UN` = 29",
    "difficulty": "simple"
  },
  {
    "question_id": 1225,
    "db_id": "thrombosis_prediction",
    "question": "List and group all patients by sex for total bilirubin (T-BIL) level not within the normal range.",
    "evidence": "total bilirubin (T-BIL) not within normal range refers to T-BIL > = 2.0",
    "SQL": "SELECT T1.ID, T1.SEX FROM Patient AS T1 INNER JOIN Laboratory AS T2 ON T1.ID = T2.ID WHERE T2.`T-BIL` >= 2.0 GROUP BY T1.SEX, T1.ID",
    "difficulty": "moderate"
  },
  {
    "question_id": 1227,
    "db_id": "thrombosis_prediction",
    "question": "What is the average age of the male patient with high cholesterol?",
    "evidence": "average age = DIVIDE(SUM(SUBTRACT(YEAR(NOW()), YEAR(birthday))), COUNT(ID)); male patient refers to sex = 'M'; high cholesterol refers to `T-CHO` > = 250;",
    "SQL": "SELECT AVG(YEAR(CURRENT_DATE) - YEAR(T1.Birthday)) FROM Patient AS T1  INNER JOIN Laboratory AS T2 ON T1.ID = T2.ID  WHERE T2.`T-CHO` >= 250 AND T1.SEX = 'M'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1229,
    "db_id": "thrombosis_prediction",
    "question": "For all patients with triglyceride (TG) level beyond the normal range, how many are age more than 50 years?",
    "evidence": "triglyceride (TG) level beyond the normal range refers to TG > = 200; more than 50 years of age = SUBTRACT(year(current_timestamp), year(Birthday)) > 50; Should consider DISTINCT in the final result;",
    "SQL": "SELECT\n  COUNT(DISTINCT `T1`.`ID`)\nFROM `Patient` AS `T1`\nINNER JOIN `Laboratory` AS `T2`\n  ON `T1`.`ID` = `T2`.`ID`\nWHERE\n  `T2`.`TG` >= 200\n  AND DATE_FORMAT(CAST(CURRENT_TIMESTAMP() AS DATETIME), '%Y') - DATE_FORMAT(CAST(`T1`.`Birthday` AS DATETIME), '%Y') > 50",
    "difficulty": "moderate"
  },
  {
    "question_id": 1231,
    "db_id": "thrombosis_prediction",
    "question": "For patient born between 1936-1956, how many male patients have creatinine phosphokinase beyond the normal range?",
    "evidence": "born between 1936-1956 refers to year(Birthday) BETWEEN '1936' AND '1956'; male patients refers to sex = 'M'; creatinine phosphokinase beyond the normal range refers to CPK > = 250; Should consider DISTINCT in the final result;",
    "SQL": "SELECT\n  COUNT(DISTINCT `T1`.`ID`)\nFROM `Patient` AS `T1`\nINNER JOIN `Laboratory` AS `T2`\n  ON `T1`.`ID` = `T2`.`ID`\nWHERE\n  DATE_FORMAT(CAST(`T1`.`Birthday` AS DATETIME), '%Y') BETWEEN '1936' AND '1956'\n  AND `T1`.`SEX` = 'M'\n  AND `T2`.`CPK` >= 250",
    "difficulty": "challenging"
  },
  {
    "question_id": 1232,
    "db_id": "thrombosis_prediction",
    "question": "Provide ID, sex and age of patient who has blood glucose (GLU) not within normal range but with total cholesterol(T-CHO) within normal range.",
    "evidence": "age = SUBTRACT(year(current_timestamp), year(Birthday)); blood glucose (GLU) not within normal range refers to GLU > = 180; total cholesterol(T-CHO) within normal range refers to `T-CHO` < 250; ",
    "SQL": "SELECT DISTINCT\n  `T1`.`ID`,\n  `T1`.`SEX`,\n  DATE_FORMAT(CAST(CURRENT_TIMESTAMP() AS DATETIME), '%Y') - DATE_FORMAT(CAST(`T1`.`Birthday` AS DATETIME), '%Y')\nFROM `Patient` AS `T1`\nINNER JOIN `Laboratory` AS `T2`\n  ON `T1`.`ID` = `T2`.`ID`\nWHERE\n  `T2`.`GLU` >= 180 AND `T2`.`T-CHO` < 250",
    "difficulty": "challenging"
  },
  {
    "question_id": 1235,
    "db_id": "thrombosis_prediction",
    "question": "What are the patient's diagnosis for those who has lower red blood blood cell? State their ID and age.",
    "evidence": "patient's diagnosis refers to Diagnosis; lower red blood cell refers to RBC < 3.5; age = SUBTRACT(year(current_timestamp), year(Birthday)); ",
    "SQL": "SELECT DISTINCT\n  `T1`.`Diagnosis`,\n  `T1`.`ID`,\n  DATE_FORMAT(CAST(CURRENT_TIMESTAMP() AS DATETIME), '%Y') - DATE_FORMAT(CAST(`T1`.`Birthday` AS DATETIME), '%Y')\nFROM `Patient` AS `T1`\nINNER JOIN `Laboratory` AS `T2`\n  ON `T1`.`ID` = `T2`.`ID`\nWHERE\n  `T2`.`RBC` < 3.5",
    "difficulty": "moderate"
  },
  {
    "question_id": 1238,
    "db_id": "thrombosis_prediction",
    "question": "Among the patients who were diagnosed with SLE, who is the oldest with normal hemoglobin level. Provide the ID and sex.",
    "evidence": "diagnosed with SLE refers to Diagnosis = 'SLE'; The larger the birthday value, the younger the person is, and vice versa; normal hemoglobin level refers to 10 < HGB < 17;",
    "SQL": "SELECT\n  `T1`.`ID`,\n  `T1`.`SEX`\nFROM `Patient` AS `T1`\nINNER JOIN `Laboratory` AS `T2`\n  ON `T1`.`ID` = `T2`.`ID`\nWHERE\n  `T1`.`Diagnosis` = 'SLE' AND `T2`.`HGB` > 10 AND `T2`.`HGB` < 17\nORDER BY\n  `T1`.`Birthday` ASC\nLIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 1239,
    "db_id": "thrombosis_prediction",
    "question": "Name the ID and age of patient with two or more laboratory examinations which show their hematoclit level exceeded the normal range.",
    "evidence": "age = SUBTRACT(year(current_timestamp), year(Birthday)); patient with two or more laboratory examinations refers to COUNT(ID) > 2; hematoclit level exceeded the normal range refers to HCT > = 52;",
    "SQL": "SELECT DISTINCT\n  `T1`.`ID`,\n  DATE_FORMAT(CAST(CURRENT_TIMESTAMP() AS DATETIME), '%Y') - DATE_FORMAT(CAST(`T1`.`Birthday` AS DATETIME), '%Y')\nFROM `Patient` AS `T1`\nINNER JOIN `Laboratory` AS `T2`\n  ON `T1`.`ID` = `T2`.`ID`\nWHERE\n  `T1`.`ID` IN (\n    SELECT\n      `ID`\n    FROM `Laboratory`\n    WHERE\n      `HCT` >= 52\n    GROUP BY\n      `ID`\n    HAVING\n      COUNT(`ID`) >= 2\n  )",
    "difficulty": "challenging"
  },
  {
    "question_id": 1241,
    "db_id": "thrombosis_prediction",
    "question": "For patients with abnormal platelet level, state the number of patients with lower than normal range. How is it compare to the number of patients with higher than normal range?",
    "evidence": "abnormal platelet level refers to PLT <= 100 or PLT >= 400; platelet level lower than normal range refers to PLT < 100; calculation = SUBTRACT(SUM(PLT < 100), SUM(PLT > 400)); platelet level higher than normal range refers to PLT > 400;",
    "SQL": "SELECT\n  SUM(CASE WHEN `T2`.`PLT` <= 100 THEN 1 ELSE 0 END) - SUM(CASE WHEN `T2`.`PLT` >= 400 THEN 1 ELSE 0 END)\nFROM `Patient` AS `T1`\nINNER JOIN `Laboratory` AS `T2`\n  ON `T1`.`ID` = `T2`.`ID`",
    "difficulty": "challenging"
  },
  {
    "question_id": 1242,
    "db_id": "thrombosis_prediction",
    "question": "For laboratory examinations take in 1984, list all patients below 50 years old with normal platelet level.",
    "evidence": "laboratory examinations take in 1984 refers to YEAR(Date) = '1984'; below 50 years old = SUBTRACT(year(current_timestamp), year(Birthday)) < 50; normal platelet level refers to PLT between 100 and 400; ",
    "SQL": "SELECT DISTINCT\n  `T1`.`ID`\nFROM `Patient` AS `T1`\nINNER JOIN `Laboratory` AS `T2`\n  ON `T1`.`ID` = `T2`.`ID`\nWHERE\n  `T2`.`PLT` BETWEEN 100 AND 400\n  AND DATE_FORMAT(CAST(`T2`.`Date` AS DATETIME), '%Y') - DATE_FORMAT(CAST(`T1`.`Birthday` AS DATETIME), '%Y') < 50\n  AND DATE_FORMAT(CAST(`T2`.`Date` AS DATETIME), '%Y') = '1984'",
    "difficulty": "challenging"
  },
  {
    "question_id": 1243,
    "db_id": "thrombosis_prediction",
    "question": "For all patients who are older than 55 years old, what is the percentage of female who has abnormal prothrombin time (PT)?",
    "evidence": "older than 55 years old = SUBTRACT(year(current_timestamp), year(Birthday)) > 55; abnormal prothrombin time (PT) refers to PT > = 14; percentage = DIVIDE(SUM(PT > = 14 AND SEX = 'F'), SUM(PT > = 14)) * 100; female refers to sex = 'F'; ",
    "SQL": "SELECT\n  CAST(SUM(CASE WHEN `T2`.`PT` >= 14 AND `T1`.`SEX` = 'F' THEN 1 ELSE 0 END) AS DOUBLE) * 100 / COUNT(CASE WHEN `T2`.`PT` >= 14 THEN 1 ELSE 0 END)\nFROM `Patient` AS `T1`\nINNER JOIN `Laboratory` AS `T2`\n  ON `T1`.`ID` = `T2`.`ID`\nWHERE\n  DATE_FORMAT(CAST(CURRENT_TIMESTAMP() AS DATETIME), '%Y') - DATE_FORMAT(CAST(`T1`.`Birthday` AS DATETIME), '%Y') > 55",
    "difficulty": "challenging"
  },
  {
    "question_id": 1247,
    "db_id": "thrombosis_prediction",
    "question": "Among the male patients who have a normal level of white blood cells, how many of them have an abnormal fibrinogen level?",
    "evidence": "male patients refers to Sex = 'M'; normal level of white blood cells refers to WBC > 3.5 and WBC <9.0; abnormal fibrinogen level refers to FG < = 150 or FG > = 450; Don't compute repetitive ones.",
    "SQL": "SELECT\n  COUNT(DISTINCT `T1`.`ID`)\nFROM `Patient` AS `T1`\nINNER JOIN `Laboratory` AS `T2`\n  ON `T1`.`ID` = `T2`.`ID`\nWHERE\n  `T2`.`FG` <= 150\n  OR `T2`.`FG` >= 450\n  AND `T2`.`WBC` > 3.5\n  AND `T2`.`WBC` < 9.0\n  AND `T1`.`SEX` = 'M'",
    "difficulty": "challenging"
  },
  {
    "question_id": 1251,
    "db_id": "thrombosis_prediction",
    "question": "How many patients with an Ig G higher than normal?",
    "evidence": "Ig G higher than normal refers to IGG >= 2000; Should consider DISTINCT in the final result;",
    "SQL": "SELECT\n  COUNT(DISTINCT `T1`.`ID`)\nFROM `Patient` AS `T1`\nINNER JOIN `Laboratory` AS `T2`\n  ON `T1`.`ID` = `T2`.`ID`\nINNER JOIN `Examination` AS `T3`\n  ON `T3`.`ID` = `T2`.`ID`\nWHERE\n  `T2`.`IGG` >= 2000",
    "difficulty": "simple"
  },
  {
    "question_id": 1252,
    "db_id": "thrombosis_prediction",
    "question": "Among the patients with a normal Ig G level, how many of them have symptoms?",
    "evidence": "normal Ig G level refers to IGG > 900 and IGG < 2000; have symptoms refers to Symptoms IS NOT NULL;",
    "SQL": "SELECT\n  COUNT(`T1`.`ID`)\nFROM `Patient` AS `T1`\nINNER JOIN `Laboratory` AS `T2`\n  ON `T1`.`ID` = `T2`.`ID`\nINNER JOIN `Examination` AS `T3`\n  ON `T3`.`ID` = `T2`.`ID`\nWHERE\n  `T2`.`IGG` BETWEEN 900 AND 2000 AND NOT `T3`.`Symptoms` IS NULL",
    "difficulty": "moderate"
  },
  {
    "question_id": 1254,
    "db_id": "thrombosis_prediction",
    "question": "How many patients with a normal Ig A level came to the hospital after 1990/1/1?",
    "evidence": "normal Ig A level refers to IGA > 80 AND IGA < 500; came to the hospital after 1990/1/1 refers to YEAR(`First Date`) > = 1990;",
    "SQL": "SELECT\n  COUNT(`T1`.`ID`)\nFROM `Patient` AS `T1`\nINNER JOIN `Laboratory` AS `T2`\n  ON `T1`.`ID` = `T2`.`ID`\nWHERE\n  `T2`.`IGA` BETWEEN 80 AND 500\n  AND DATE_FORMAT(CAST(`T1`.`First Date` AS DATETIME), '%Y') > '1990'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1255,
    "db_id": "thrombosis_prediction",
    "question": "For the patients with an abnormal Ig M level, what is the most common disease they are diagnosed with?",
    "evidence": "abnormal Ig M level refers to IGM <=40 OR IGM >= 400; most common disease refers to MAX(COUNT(Diagnosis));",
    "SQL": "SELECT\n  `T1`.`Diagnosis`\nFROM `Patient` AS `T1`\nINNER JOIN `Laboratory` AS `T2`\n  ON `T1`.`ID` = `T2`.`ID`\nWHERE\n  NOT `T2`.`IGM` BETWEEN 40 AND 400\nGROUP BY\n  `T1`.`Diagnosis`\nORDER BY\n  COUNT(`T1`.`Diagnosis`) DESC\nLIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 1256,
    "db_id": "thrombosis_prediction",
    "question": "How many patients with a abnormal C-reactive protein don't have their data recorded?",
    "evidence": "abnormal C-reactive protein refers to CRP ='+'; don't have data recorded refers to Description IS NULL;",
    "SQL": "SELECT COUNT(T1.ID) FROM Patient AS T1 INNER JOIN Laboratory AS T2 ON T1.ID = T2.ID WHERE (T2.CRP = '+' ) AND T1.Description IS NULL;",
    "difficulty": "moderate"
  },
  {
    "question_id": 1257,
    "db_id": "thrombosis_prediction",
    "question": "Among the patients whose creatinine level is abnormal, how many of them aren't 70 yet?",
    "evidence": "creatinine level is abnormal refers to CRE >= 1.5; aren't 70 yet refers to SUBTRACT((YEAR(CURDATE()), YEAR(Birthday))) < 70; ",
    "SQL": "SELECT COUNT(DISTINCT T1.ID) FROM Patient AS T1 INNER JOIN Laboratory AS T2 ON T1.ID = T2.ID WHERE T2.CRE >= 1.5 AND YEAR(CURDATE()) - YEAR(T1.Birthday) < 70",
    "difficulty": "challenging"
  },
  {
    "question_id": 1265,
    "db_id": "thrombosis_prediction",
    "question": "How many patients have a normal level of anti-ribonuclear protein and have been admitted to the hospital?",
    "evidence": "normal level of anti-ribonuclear protein refers to RNP = '-', '+-'; And'-' means 'negative'; '+-' refers to '0'; admitted to the hospital refers to Admission = '+'; Should consider DISTINCT in the final result;",
    "SQL": "SELECT\n  COUNT(DISTINCT `T1`.`ID`)\nFROM `Patient` AS `T1`\nINNER JOIN `Laboratory` AS `T2`\n  ON `T1`.`ID` = `T2`.`ID`\nWHERE\n  `T2`.`RNP` = 'negative' OR `T2`.`RNP` = '0' AND `T1`.`Admission` = '+'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1267,
    "db_id": "thrombosis_prediction",
    "question": "Among the patients with normal anti-SM, how many of them does not have thrombosis?",
    "evidence": "normal anti-SM refers to SM IN('-', '+-'); SM = 'negative' means '-'; SM = '0' means '+-'; SM = '1' means '+'; does not have thrombosis refers to Thrombosis = 0;",
    "SQL": "SELECT\n  COUNT(`T1`.`ID`)\nFROM `Examination` AS `T1`\nINNER JOIN `Laboratory` AS `T2`\n  ON `T1`.`ID` = `T2`.`ID`\nWHERE\n  `T2`.`SM` IN ('negative', '0') AND `T1`.`Thrombosis` = 0",
    "difficulty": "moderate"
  },
  {
    "question_id": 1270,
    "db_id": "thrombosis_prediction",
    "question": "Among the patients who has a normal anti-scl70, how many of them are female and does not have any symptom?",
    "evidence": "normal anti-scl70 refers to SC170 IN('negative', '0'); female refers to Sex = 'F'; does not have any symptom refers to symptoms IS NULL; Should consider DISTINCT in the final result;",
    "SQL": "SELECT\n  COUNT(DISTINCT `T1`.`ID`)\nFROM `Patient` AS `T1`\nINNER JOIN `Laboratory` AS `T2`\n  ON `T1`.`ID` = `T2`.`ID`\nINNER JOIN `Examination` AS `T3`\n  ON `T3`.`ID` = `T2`.`ID`\nWHERE\n  (\n    `T2`.`SC170` = 'negative' OR `T2`.`SC170` = '0'\n  )\n  AND `T1`.`SEX` = 'F'\n  AND `T3`.`Symptoms` IS NULL",
    "difficulty": "challenging"
  },
  {
    "question_id": 1275,
    "db_id": "thrombosis_prediction",
    "question": "Among the patients who has a normal level of anti-centromere and a normal level of anti-SSB, how many of them are male?",
    "evidence": "normal level of anti-centromere refers to CENTROMEA IN('-', '+-'); normal level of anti-SSB refers to SSB IN('-', '+-'); male refers to Sex = 'M'; Should consider DISTINCT in the final result;",
    "SQL": "SELECT\n  COUNT(DISTINCT `T1`.`ID`)\nFROM `Patient` AS `T1`\nINNER JOIN `Laboratory` AS `T2`\n  ON `T1`.`ID` = `T2`.`ID`\nWHERE\n  `T2`.`CENTROMEA` IN ('negative', '0')\n  AND `T2`.`SSB` IN ('negative', '0')\n  AND `T1`.`SEX` = 'M'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1281,
    "db_id": "thrombosis_prediction",
    "question": "Among the patients who have an abnormal level of glutamic oxaloacetic transaminase, when was the youngest of them born?",
    "evidence": "abnormal level of glutamic oxaloacetic transaminase refers to GOT > = 60; The larger the birthday value, the younger the person is, and vice versa;",
    "SQL": "SELECT\n  `T1`.`Birthday`\nFROM `Patient` AS `T1`\nINNER JOIN `Laboratory` AS `T2`\n  ON `T1`.`ID` = `T2`.`ID`\nWHERE\n  `T2`.`GOT` >= 60\nORDER BY\n  `T1`.`Birthday` DESC\nLIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 1302,
    "db_id": "thrombosis_prediction",
    "question": "For the patients with a normal range of creatinine phosphokinase, how many of them have a positive measure of degree of coagulation?",
    "evidence": "normal range of creatinine phosphokinase refers to CPK < 250; positive measure of degree of coagulation refers to KCT = '+' or RVVT = '+' or LAC = '+' ;",
    "SQL": "SELECT\n  COUNT(`T1`.`ID`)\nFROM `Patient` AS `T1`\nINNER JOIN `Laboratory` AS `T2`\n  ON `T1`.`ID` = `T2`.`ID`\nINNER JOIN `Examination` AS `T3`\n  ON `T1`.`ID` = `T3`.`ID`\nWHERE\n  `T2`.`CPK` < 250\n  AND (\n    `T3`.`KCT` = '+' OR `T3`.`RVVT` = '+' OR `T3`.`LAC` = '+'\n  )",
    "difficulty": "challenging"
  },
  {
    "question_id": 1025,
    "db_id": "european_football_2",
    "question": "Give the name of the league had the most goals in the 2016 season?",
    "evidence": "league that had the most goals refers to MAX(SUM(home_team_goal, away_team_goal)); 2016 season refers to season = '2015/2016';",
    "SQL": "SELECT  `t2`.`name` FROM `Match` AS `t1` INNER JOIN `League` AS `t2`\n  ON `t1`.`league_id` = `t2`.`id`\nWHERE\n  `t1`.`season` = '2015/2016'\nGROUP BY\n  `t2`.`name`\nORDER BY\n  SUM(`t1`.`home_team_goal` + `t1`.`away_team_goal`) DESC\nLIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 1028,
    "db_id": "european_football_2",
    "question": "In Scotland Premier League, which away team won the most during the 2010 season?",
    "evidence": "Final result should return the Team.team_long_name; Scotland Premier League refers to League.name = 'Scotland Premier League'; away team refers to away_team_api_id; away team that won the most refers to MAX(SUBTRACT(away_team_goal, home_team_goal) > 0); 2010 season refers to season = '2009/2010'; won the most refers to MAX(COUNT(*));",
    "SQL": "SELECT\n  `teamInfo`.`team_long_name`\nFROM `League` AS `leagueData`\nINNER JOIN `Match` AS `matchData`\n  ON `leagueData`.`id` = `matchData`.`league_id`\nINNER JOIN `Team` AS `teamInfo`\n  ON `matchData`.`away_team_api_id` = `teamInfo`.`team_api_id`\nWHERE\n  `leagueData`.`name` = 'Scotland Premier League'\n  AND `matchData`.`season` = '2009/2010'\n  AND `matchData`.`away_team_goal` - `matchData`.`home_team_goal` > 0\nGROUP BY\n  `matchData`.`away_team_api_id`\nORDER BY\n  COUNT(*) DESC\nLIMIT 1",
    "difficulty": "challenging"
  },
  {
    "question_id": 1029,
    "db_id": "european_football_2",
    "question": "What are the speed in which attacks are put together of the top 4 teams with the highest build Up Play Speed?",
    "evidence": "speed in which attacks are put together refers to buildUpPlaySpeed;highest build up play speed refers to MAX(buildUpPlaySpeed)",
    "SQL": "SELECT\n  `t1`.`buildUpPlaySpeed`\nFROM `Team_Attributes` AS `t1`\nINNER JOIN `Team` AS `t2`\n  ON `t1`.`team_api_id` = `t2`.`team_api_id`\nORDER BY\n  `t1`.`buildUpPlaySpeed` ASC\nLIMIT 4",
    "difficulty": "moderate"
  },
  {
    "question_id": 1030,
    "db_id": "european_football_2",
    "question": "Give the name of the league had the most matches end as draw in the 2016 season?",
    "evidence": "most matches end as draw refers to MAX(SUM(home_team_goal = away_team_goal)); 2016 season refers to season = '2015/2016';",
    "SQL": "SELECT\n  `t2`.`name`\nFROM `Match` AS `t1`\nINNER JOIN `League` AS `t2`\n  ON `t1`.`league_id` = `t2`.`id`\nWHERE\n  `t1`.`season` = '2015/2016' AND `t1`.`home_team_goal` = `t1`.`away_team_goal`\nGROUP BY\n  `t2`.`name`\nORDER BY\n  COUNT(`t1`.`id`) DESC\nLIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 1031,
    "db_id": "european_football_2",
    "question": "At present, calculate for the player's age who have a sprint speed of no less than 97 between 2013 to 2015.",
    "evidence": "players age at present = SUBTRACT((DATETIME(), birthday)); sprint speed of no less than 97 refers to sprint_speed > = 97; between 2013 to 2015 refers to YEAR(date) > = '2013' AND YEAR(date) < = '2015'; ",
    "SQL": "SELECT DISTINCT YEAR(CURDATE()) - YEAR(t2.birthday) AS age FROM Player_Attributes AS t1 INNER JOIN Player AS t2 ON t1.player_api_id = t2.player_api_id WHERE YEAR(t1.`date`) BETWEEN 2013 AND 2015 AND t1.sprint_speed >= 97",
    "difficulty": "challenging"
  },
  {
    "question_id": 1032,
    "db_id": "european_football_2",
    "question": "Give the name of the league with the highest matches of all time and how many matches were played in the said league.",
    "evidence": " league with highest matches of all time refers to MAX(COUNT(league_id));",
    "SQL": "SELECT\n  `t2`.`name`,\n  COUNT(`t1`.`id`)\nFROM `Match` AS `t1`\nINNER JOIN `League` AS `t2`\n  ON `t1`.`league_id` = `t2`.`id`\nGROUP BY\n  `t2`.`name`\nORDER BY\n  COUNT(`t1`.`id`) DESC\nLIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 1035,
    "db_id": "european_football_2",
    "question": "Give the team_fifa_api_id of teams with more than 50 but less than 60 build-up play speed.",
    "evidence": "teams with more than 50 but less than 60 build-up play speed refers to buildUpPlaySpeed >50 AND buildUpPlaySpeed <60; ",
    "SQL": "SELECT DISTINCT\n  `team_fifa_api_id`\nFROM `Team_Attributes`\nWHERE\n  `buildUpPlaySpeed` > 50 AND `buildUpPlaySpeed` < 60",
    "difficulty": "simple"
  },
  {
    "question_id": 1036,
    "db_id": "european_football_2",
    "question": "List the long name of teams with above-average build-up play passing in 2012.",
    "evidence": "long name of teams refers to team_long_name; build-up play passing refers to buildUpPlayPassing; above-average build-up play passing = buildUpPlayPassing > DIVIDE(SUM(buildUpPlayPassing), COUNT(team_long_name) WHERE buildUpPlayPassing IS NOT NULL); in 2012 refers to strftime('%Y', date) = '2012'; ",
    "SQL": "SELECT DISTINCT\n  `t4`.`team_long_name`\nFROM `Team_Attributes` AS `t3`\nINNER JOIN `Team` AS `t4`\n  ON `t3`.`team_api_id` = `t4`.`team_api_id`\nWHERE\n  SUBSTR(`t3`.`date`, 1, 4) = '2012'\n  AND `t3`.`buildUpPlayPassing` > (\n    SELECT\n      CAST(SUM(`t2`.`buildUpPlayPassing`) AS DOUBLE) / COUNT(`t1`.`id`)\n    FROM `Team` AS `t1`\n    INNER JOIN `Team_Attributes` AS `t2`\n      ON `t1`.`team_api_id` = `t2`.`team_api_id`\n    WHERE\n      DATE_FORMAT(CAST(`t2`.`date` AS DATETIME), '%Y') = '2012'\n  )",
    "difficulty": "challenging"
  },
  {
    "question_id": 1037,
    "db_id": "european_football_2",
    "question": "Calculate the percentage of players who prefer left foot, who were born between 1987 and 1992.",
    "evidence": "players who prefer left foot refers to preferred_foot = 'left'; percentage of players who prefer left foot = DIVIDE(MULTIPLY((SUM(preferred_foot = 'left'), 100)), COUNT(player_fifa_api_id)); born between 1987 and 1992 refers to YEAR(birthday) BETWEEN '1987' AND '1992';",
    "SQL": "SELECT\n  CAST(COUNT(CASE WHEN `t2`.`preferred_foot` = 'left' THEN `t1`.`id` ELSE NULL END) AS DOUBLE) * 100 / COUNT(`t1`.`id`) AS `percent`\nFROM `Player` AS `t1`\nINNER JOIN `Player_Attributes` AS `t2`\n  ON `t1`.`player_api_id` = `t2`.`player_api_id`\nWHERE\n  SUBSTR(`t1`.`birthday`, 1, 4) BETWEEN '1987' AND '1992'",
    "difficulty": "challenging"
  },
  {
    "question_id": 1039,
    "db_id": "european_football_2",
    "question": "Find the average number of long-shot done by Ahmed Samir Farag.",
    "evidence": "average number of long shot = DIVIDE(SUM(long_shots), COUNT(player_fifa_api_id));",
    "SQL": "SELECT\n  CAST(SUM(`t2`.`long_shots`) AS DOUBLE) / COUNT(`t2`.`date`)\nFROM `Player` AS `t1`\nINNER JOIN `Player_Attributes` AS `t2`\n  ON `t1`.`player_api_id` = `t2`.`player_api_id`\nWHERE\n  `t1`.`player_name` = 'Ahmed Samir Farag'",
    "difficulty": "simple"
  },
  {
    "question_id": 1040,
    "db_id": "european_football_2",
    "question": "List the top 10 players' names whose heights are above 180 in descending order of average heading accuracy.",
    "evidence": "heights are above 180 refers to Player.height > 180; average heading accuracy = DIVIDE(SUM(heading_accuracy), COUNT(player_fifa_api_id));",
    "SQL": "SELECT\n  `t1`.`player_name`\nFROM `Player` AS `t1`\nINNER JOIN `Player_Attributes` AS `t2`\n  ON `t1`.`player_api_id` = `t2`.`player_api_id`\nWHERE\n  `t1`.`height` > 180\nGROUP BY\n  `t1`.`id`\nORDER BY\n  CAST(SUM(`t2`.`heading_accuracy`) AS DOUBLE) / COUNT(`t2`.`player_fifa_api_id`) DESC\nLIMIT 10",
    "difficulty": "moderate"
  },
  {
    "question_id": 1042,
    "db_id": "european_football_2",
    "question": "List the name of leagues in which the average goals by the home team is higher than the away team in the 2009/2010 season.",
    "evidence": "name of league refers to League.name; average goals by the home team is higher than the away team = AVG(home_team_goal) > AVG(away_team_goal); AVG(xx_goal) = SUM(xx_goal) / COUNT(DISTINCT Match.id); 2009/2010 season refers to season = '2009/2010'",
    "SQL": "SELECT\n  `t1`.`name`\nFROM `League` AS `t1`\nINNER JOIN `Match` AS `t2`\n  ON `t1`.`id` = `t2`.`league_id`\nWHERE\n  `t2`.`season` = '2009/2010'\nGROUP BY\n  `t1`.`name`\nHAVING\n  (\n    CAST(SUM(`t2`.`home_team_goal`) AS DOUBLE) / COUNT(DISTINCT `t2`.`id`)\n  ) - (\n    CAST(SUM(`t2`.`away_team_goal`) AS DOUBLE) / COUNT(DISTINCT `t2`.`id`)\n  ) > 0",
    "difficulty": "challenging"
  },
  {
    "question_id": 1044,
    "db_id": "european_football_2",
    "question": "List the football players with a birthyear of 1970 and a birthmonth of October.",
    "evidence": "players with a birthyear of 1970 and a birthmonth of October refers to substr(birthday,1,7) AS 'year-month',WHERE year = '1970' AND month = '10';",
    "SQL": "SELECT\n  `player_name`\nFROM `Player`\nWHERE\n  SUBSTR(`birthday`, 1, 7) = '1970-10'",
    "difficulty": "simple"
  },
  {
    "question_id": 1048,
    "db_id": "european_football_2",
    "question": "What is the overall rating of the football player Gabriel Tamas in year 2011?",
    "evidence": "in year 2011 refers to strftime('%Y', date) = '2011';",
    "SQL": "SELECT  `t2`.`overall_rating` FROM `Player` AS `t1`\nINNER JOIN `Player_Attributes` AS `t2`\n  ON `t1`.`player_api_id` = `t2`.`player_api_id` WHERE  `t1`.`player_name` = 'Gabriel Tamas'  AND DATE_FORMAT(CAST(`t2`.`date` AS DATETIME), '%Y') = '2011'",
    "difficulty": "simple"
  },
  {
    "question_id": 1057,
    "db_id": "european_football_2",
    "question": "Calculate the average home team goal in the 2010/2011 season in the country of Poland.",
    "evidence": "average home team goal = AVG(home_team_goal)= SUM(home_team_goal) / COUNT(DISTINCT Match.id) WHERE name = 'Poland' and season = '2010/2011';",
    "SQL": "SELECT\n  CAST(SUM(`t2`.`home_team_goal`) AS DOUBLE) / COUNT(`t2`.`id`)\nFROM `Country` AS `t1`\nINNER JOIN `Match` AS `t2`\n  ON `t1`.`id` = `t2`.`country_id`\nWHERE\n  `t1`.`name` = 'Poland' AND `t2`.`season` = '2010/2011'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1058,
    "db_id": "european_football_2",
    "question": "Who has the highest average finishing rate between the highest and shortest football player?",
    "evidence": "finishing rate refers to finishing; highest average finishing rate = MAX(AVG(finishing)); highest football player refers to MAX(height); shortest football player refers to MIN(height);",
    "SQL": "SELECT `A` FROM (SELECT AVG(`finishing`) AS `result`, 'Max' AS `A` FROM `Player` AS `T1` INNER JOIN `Player_Attributes` AS `T2` ON `T1`.`player_api_id` = `T2`.`player_api_id` WHERE `T1`.`height` = (SELECT MAX(`height`) FROM `Player`) UNION SELECT AVG(`finishing`) AS `result`, 'Min' AS `A` FROM `Player` AS `T1` INNER JOIN `Player_Attributes` AS `T2` ON `T1`.`player_api_id` = `T2`.`player_api_id` WHERE `T1`.`height` = (SELECT MIN(`height`) FROM `Player`)) AS `derived` ORDER BY `result` DESC LIMIT 1",
    "difficulty": "challenging"
  },
  {
    "question_id": 1068,
    "db_id": "european_football_2",
    "question": "From 2010 to 2015, what was the average overall rating of players who are higher than 170?",
    "evidence": "from 2010 to 2015 refers to strftime('%Y', date) >= '2010' AND <= '2015'; average overall rating = SUM(t2.overall_rating)/ COUNT(t2.id); higher than 170 refers to Player.height > 170;",
    "SQL": "SELECT\n  CAST(SUM(`t2`.`overall_rating`) AS DOUBLE) / COUNT(`t2`.`id`)\nFROM `Player` AS `t1`\nINNER JOIN `Player_Attributes` AS `t2`\n  ON `t1`.`player_api_id` = `t2`.`player_api_id`\nWHERE\n  `t1`.`height` > 170\n  AND DATE_FORMAT(CAST(`t2`.`date` AS DATETIME), '%Y') >= '2010'\n  AND DATE_FORMAT(CAST(`t2`.`date` AS DATETIME), '%Y') <= '2015'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1076,
    "db_id": "european_football_2",
    "question": "What is the difference of the average ball control score between Abdou Diallo and Aaron Appindangoye\n?",
    "evidence": "difference of the average ball control = SUBTRACT(AVG(ball_control WHERE player_name = 'Abdou Diallo'), AVG(ball_control WHERE player_name = 'Aaron Appindangoye')); AVG(ball_control WHERE player_name = 'XX XX') = SUM(CASE WHEN player_name = 'XX XX' THEN ball_control ELSE 0 END) / COUNT(CASE WHEN player_name = 'XX XX' THEN id ELSE NULL END)",
    "SQL": "SELECT\n  CAST(SUM(\n    CASE WHEN `t1`.`player_name` = 'Abdou Diallo' THEN `t2`.`ball_control` ELSE 0 END\n  ) AS DOUBLE) / COUNT(CASE WHEN `t1`.`player_name` = 'Abdou Diallo' THEN `t2`.`id` ELSE NULL END) - CAST(SUM(\n    CASE\n      WHEN `t1`.`player_name` = 'Aaron Appindangoye'\n      THEN `t2`.`ball_control`\n      ELSE 0\n    END\n  ) AS DOUBLE) / COUNT(CASE WHEN `t1`.`player_name` = 'Aaron Appindangoye' THEN `t2`.`id` ELSE NULL END)\nFROM `Player` AS `t1`\nINNER JOIN `Player_Attributes` AS `t2`\n  ON `t1`.`player_api_id` = `t2`.`player_api_id`",
    "difficulty": "challenging"
  },
  {
    "question_id": 1078,
    "db_id": "european_football_2",
    "question": "Which player is older, Aaron Lennon or Abdelaziz Barrada?",
    "evidence": "The larger the birthday value, the younger the person is, and vice versa;",
    "SQL": "SELECT\n  `player_name`\nFROM `Player`\nWHERE\n  `player_name` IN ('Aaron Lennon', 'Abdelaziz Barrada')\nORDER BY\n  `birthday` ASC\nLIMIT 1",
    "difficulty": "simple"
  },
  {
    "question_id": 1079,
    "db_id": "european_football_2",
    "question": "Which player is the tallest?",
    "evidence": "tallest player refers to MAX(height);",
    "SQL": "SELECT\n  `player_name`\nFROM `Player`\nORDER BY\n  `height` DESC\nLIMIT 1",
    "difficulty": "simple"
  },
  {
    "question_id": 1080,
    "db_id": "european_football_2",
    "question": "Among the players whose preferred foot was the left foot when attacking, how many of them would remain in his position when the team attacked?",
    "evidence": "preferred foot when attacking was the left refers to preferred_foot = 'left'; players who would remain in his position when the team attacked refers to attacking_work_rate = 'low';",
    "SQL": "SELECT\n  COUNT(`player_api_id`)\nFROM `Player_Attributes`\nWHERE\n  `preferred_foot` = 'left' AND `attacking_work_rate` = 'low'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1084,
    "db_id": "european_football_2",
    "question": "Among the players born before the year 1986, how many of them would remain in his position and defense while the team attacked?",
    "evidence": "players born before the year 1986 refers to strftime('%Y', birthday)<'1986'; players who would remain in his position and defense while the team attacked refers to defensive_work_rate = 'high'; Should consider DISTINCT in the final result;",
    "SQL": "SELECT\n  COUNT(DISTINCT `t1`.`player_name`)\nFROM `Player` AS `t1`\nINNER JOIN `Player_Attributes` AS `t2`\n  ON `t1`.`player_api_id` = `t2`.`player_api_id`\nWHERE\n  DATE_FORMAT(CAST(`t1`.`birthday` AS DATETIME), '%Y') < '1986'\n  AND `t2`.`defensive_work_rate` = 'high'",
    "difficulty": "challenging"
  },
  {
    "question_id": 1088,
    "db_id": "european_football_2",
    "question": "Please list the names of the players whose volley score and dribbling score are over 70.",
    "evidence": "volley score are over 70 refers to volleys > 70; dribbling score refers to dribbling are over 70 refers to dribbling > 70;",
    "SQL": "SELECT DISTINCT\n  `t1`.`player_name`\nFROM `Player` AS `t1`\nINNER JOIN `Player_Attributes` AS `t2`\n  ON `t1`.`player_api_id` = `t2`.`player_api_id`\nWHERE\n  `t2`.`volleys` > 70 AND `t2`.`dribbling` > 70",
    "difficulty": "moderate"
  },
  {
    "question_id": 1091,
    "db_id": "european_football_2",
    "question": "How many matches were held in the Belgium Jupiler League in April, 2009?",
    "evidence": "Belgium Jupiler League refers to League.name = 'Belgium Jupiler League'; in April, 2009 refers to SUBSTR(`date`, 1, 7);",
    "SQL": "SELECT\n  COUNT(`t2`.`id`)\nFROM `League` AS `t1`\nINNER JOIN `Match` AS `t2`\n  ON `t1`.`id` = `t2`.`league_id`\nWHERE\n  `t1`.`name` = 'Belgium Jupiler League' AND SUBSTR(`t2`.`date`, 1, 7) = '2009-04'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1092,
    "db_id": "european_football_2",
    "question": "Give the name of the league had the most matches in the 2008/2009 season?",
    "evidence": "league that had the most matches in the 2008/2009 season refers to MAX(league_name WHERE season = '2008/2009');",
    "SQL": "SELECT `t1`.`name` FROM `League` AS `t1`JOIN `Match` AS `t2` ON `t1`.`id` = `t2`.`league_id` WHERE  `t2`.`season` = '2008/2009' GROUP BY  `t1`.`name` HAVING COUNT(`t2`.`id`) = (SELECT MAX(match_count) FROM (SELECT COUNT(`t2`.`id`) AS match_count FROM `Match` AS `t2` WHERE `t2`.`season` = '2008/2009' GROUP BY  `t2`.`league_id`) AS subquery)",
    "difficulty": "simple"
  },
  {
    "question_id": 1094,
    "db_id": "european_football_2",
    "question": "How much higher in percentage is Ariel Borysiuk's overall rating than that of Paulin Puel?",
    "evidence": "how much higher in percentage = MULTIPLY(DIVIDE(SUBTRACT(overall_rating WHERE player_name = 'Ariel Borysiuk', overall_rating WHERE player_name = 'Paulin Puel'), overall_rating WHERE player_name = 'Paulin Puel'), 100);",
    "SQL": "SELECT\n  (\n    SUM(\n      CASE\n        WHEN `t1`.`player_name` = 'Ariel Borysiuk'\n        THEN `t2`.`overall_rating`\n        ELSE 0\n      END\n    ) * 1.0 - SUM(\n      CASE WHEN `t1`.`player_name` = 'Paulin Puel' THEN `t2`.`overall_rating` ELSE 0 END\n    )\n  ) * 100 / SUM(\n    CASE WHEN `t1`.`player_name` = 'Paulin Puel' THEN `t2`.`overall_rating` ELSE 0 END\n  )\nFROM `Player` AS `t1`\nINNER JOIN `Player_Attributes` AS `t2`\n  ON `t1`.`player_api_id` = `t2`.`player_api_id`",
    "difficulty": "challenging"
  },
  {
    "question_id": 1096,
    "db_id": "european_football_2",
    "question": "Calculate the average overall rating of Pietro Marino.",
    "evidence": "Pietro Marino refers to player_name = 'Pietro Marino'; average overall rating AVG(T1.overall_rating)",
    "SQL": "SELECT\n  CAST(SUM(`t2`.`overall_rating`) AS DOUBLE) / COUNT(`t2`.`id`)\nFROM `Player` AS `t1`\nINNER JOIN `Player_Attributes` AS `t2`\n  ON `t1`.`player_api_id` = `t2`.`player_api_id`\nWHERE\n  `t1`.`player_name` = 'Pietro Marino'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1098,
    "db_id": "european_football_2",
    "question": "What is Ajax's highest chance creation passing score and what is it classified as?",
    "evidence": "Ajax's refers to team_long_name = 'Ajax'; chance creation passing score refers to MAX(chanceCreationPassing); classified refer to chanceCreationPassingClass",
    "SQL": "SELECT\n  `t2`.`chanceCreationPassing`,\n  `t2`.`chanceCreationPassingClass`\nFROM `Team` AS `t1`\nINNER JOIN `Team_Attributes` AS `t2`\n  ON `t1`.`team_api_id` = `t2`.`team_api_id`\nWHERE\n  `t1`.`team_long_name` = 'Ajax'\nORDER BY\n  `t2`.`chanceCreationPassing` DESC\nLIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 1102,
    "db_id": "european_football_2",
    "question": "For the players who had a 77 points overall rating on 2016/6/23, who was the oldest? Give the name of the player.",
    "evidence": "77 points overall rating refers to overall_rating = 77; on 2016/6/23 refers to date LIKE '2016-06-23%'; The larger the birthday value, the younger the person is, and vice versa;",
    "SQL": "SELECT\n  `t1`.`player_name`\nFROM `Player` AS `t1`\nINNER JOIN `Player_Attributes` AS `t2`\n  ON `t1`.`player_api_id` = `t2`.`player_api_id`\nWHERE\n  SUBSTR(`t2`.`date`, 1, 10) = '2016-06-23' AND `t2`.`overall_rating` = 77\nORDER BY\n  `t1`.`birthday` ASC\nLIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 1103,
    "db_id": "european_football_2",
    "question": "What was the overall rating for Aaron Mooy on 2016/2/4?",
    "evidence": "Aaron Mooy refers to player_name = 'Aaron Mooy'; on 2016/2/4 refers to date LIKE '2016-02-04%';",
    "SQL": "SELECT\n  `t2`.`overall_rating`\nFROM `Player` AS `t1`\nINNER JOIN `Player_Attributes` AS `t2`\n  ON `t1`.`player_api_id` = `t2`.`player_api_id`\nWHERE\n  SUBSTR(`t2`.`date`, 1, 10) = '2016-02-04' AND `t1`.`player_name` = 'Aaron Mooy'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1105,
    "db_id": "european_football_2",
    "question": "How was Francesco Migliore's attacking work rate on 2015/5/1?",
    "evidence": "Francesco Migliore refers to player_name = 'Francesco Migliore'; on 2015/5/1 refers to date LIKE '2015-05-01%';",
    "SQL": "SELECT\n  `t2`.`attacking_work_rate`\nFROM `Player` AS `t1`\nINNER JOIN `Player_Attributes` AS `t2`\n  ON `t1`.`player_api_id` = `t2`.`player_api_id`\nWHERE\n  `t2`.`date` LIKE '2015-05-01%' AND `t1`.`player_name` = 'Francesco Migliore'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1107,
    "db_id": "european_football_2",
    "question": "When was the first time did Kevin Constant have his highest crossing score? Give the date.",
    "evidence": "Kevin Constant refers to player_name = 'Kevin Constant'; highest crossing score refers to MAX(crossing)",
    "SQL": "SELECT `date` FROM (SELECT `t2`.`crossing`, `t2`.`date` FROM `Player` AS `t1` INNER JOIN `Player_Attributes` AS `t2` ON `t1`.`player_fifa_api_id` = `t2`.`player_fifa_api_id` WHERE `t1`.`player_name` = 'Kevin Constant' ORDER BY `t2`.`crossing` DESC) AS `subquery` ORDER BY `date` DESC LIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 1110,
    "db_id": "european_football_2",
    "question": "Tell the build Up play passing class for \"FC Lorient\" on 2010/2/22.",
    "evidence": "\"FC Lorient\" refers to team_long_name = 'FC Lorient'; on 2010/2/22 refers to date LIKE '2010-02-22%';",
    "SQL": "SELECT\n  `t2`.`buildUpPlayPassingClass`\nFROM `Team` AS `t1`\nINNER JOIN `Team_Attributes` AS `t2`\n  ON `t1`.`team_api_id` = `t2`.`team_api_id`\nWHERE\n  `t1`.`team_long_name` = 'FC Lorient' AND `t2`.`date` LIKE '2010-02-22%'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1113,
    "db_id": "european_football_2",
    "question": "For the team \"Hannover 96\", what was its defence aggression class on 2015/9/10?",
    "evidence": "\"Hannover 96\" refers to team_long_name = 'Hannover 96'; on 2015/9/10 refers to date LIKE '2015-09-10%';",
    "SQL": "SELECT\n  `t2`.`defenceAggressionClass`\nFROM `Team` AS `t1`\nINNER JOIN `Team_Attributes` AS `t2`\n  ON `t1`.`team_api_id` = `t2`.`team_api_id`\nWHERE\n  `t1`.`team_long_name` = 'Hannover 96' AND `t2`.`date` LIKE '2015-09-10%'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1114,
    "db_id": "european_football_2",
    "question": "What was the average overall rating for Marko Arnautovic from 2007/2/22 to 2016/4/21?",
    "evidence": "average overall rating refers to avg(overall_rating); Marko Arnautovic refers to player_name = 'Marko Arnautovic'; from 2007/2/22 to 2016/4/21 refers to the first 10 characters of date BETWEEN '2007-02-22' and '2016-04-21'",
    "SQL": "SELECT\n  CAST(SUM(`t2`.`overall_rating`) AS DOUBLE) / COUNT(`t2`.`id`)\nFROM `Player` AS `t1`\nINNER JOIN `Player_Attributes` AS `t2`\n  ON `t1`.`player_fifa_api_id` = `t2`.`player_fifa_api_id`\nWHERE\n  `t1`.`player_name` = 'Marko Arnautovic'\n  AND SUBSTR(`t2`.`date`, 1, 10) BETWEEN '2007-02-22' AND '2016-04-21'",
    "difficulty": "challenging"
  },
  {
    "question_id": 1115,
    "db_id": "european_football_2",
    "question": "What percentage is Landon Donovan's overall rating higher than Jordan Bowery on 2013/7/12?",
    "evidence": "Landon Donovan's refers to player_name = 'Landon Donovan'; Jordan Bowery refers to player_name = 'Jordan Bowery'; percentage refers to DIVIDE(SUBTRACT(player_name = 'Landon Donovan' overall_rating; player_name = 'Jordan Bowery' overall_rating), player_name = 'Landon Donovan' overall_rating)*100",
    "SQL": "SELECT\n  (\n    SUM(\n      CASE\n        WHEN `t1`.`player_name` = 'Landon Donovan'\n        THEN `t2`.`overall_rating`\n        ELSE 0\n      END\n    ) * 1.0 - SUM(\n      CASE WHEN `t1`.`player_name` = 'Jordan Bowery' THEN `t2`.`overall_rating` ELSE 0 END\n    )\n  ) * 100 / SUM(\n    CASE\n      WHEN `t1`.`player_name` = 'Landon Donovan'\n      THEN `t2`.`overall_rating`\n      ELSE 0\n    END\n  ) AS `LvsJ_percent`\nFROM `Player` AS `t1`\nINNER JOIN `Player_Attributes` AS `t2`\n  ON `t1`.`player_fifa_api_id` = `t2`.`player_fifa_api_id`\nWHERE\n  SUBSTR(`t2`.`date`, 1, 10) = '2013-07-12'",
    "difficulty": "challenging"
  },
  {
    "question_id": 1116,
    "db_id": "european_football_2",
    "question": "List down most tallest players' name.",
    "evidence": "tallest refers to rank based on the height in descending order; Most tallest players refers to rank = 1 ",
    "SQL": "SELECT player_name FROM (SELECT player_name, height, DENSE_RANK() OVER (ORDER BY height DESC) as `rank`FROM Player) AS ranked_players WHERE `rank` = 1",
    "difficulty": "simple"
  },
  {
    "question_id": 1122,
    "db_id": "european_football_2",
    "question": "State the name of the most strongest player.",
    "evidence": "strongest players refers to player has MAX(overall_rating)",
    "SQL": "SELECT DISTINCT t1.player_name FROM Player AS t1 INNER JOIN Player_Attributes AS t2 ON t1.player_api_id = t2.player_api_id WHERE t2.overall_rating = (SELECT MAX(overall_rating) FROM Player_Attributes)",
    "difficulty": "simple"
  },
  {
    "question_id": 1124,
    "db_id": "european_football_2",
    "question": "Who are the players that tend to be attacking when their mates were doing attack moves? List down their name.",
    "evidence": "tend to be attacking when their mates were doing attack moves refers to attacking_work_rate = 'high';",
    "SQL": "SELECT DISTINCT\n  `t1`.`player_name`\nFROM `Player` AS `t1`\nINNER JOIN `Player_Attributes` AS `t2`\n  ON `t1`.`player_api_id` = `t2`.`player_api_id`\nWHERE\n  `t2`.`attacking_work_rate` = 'high'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1130,
    "db_id": "european_football_2",
    "question": "What are the short name of team who played safe while creating chance of passing?",
    "evidence": "played safe while creating chance of passing refers to chanceCreationPassingClass = 'Safe'; short name of team refers to team_short_name",
    "SQL": "SELECT DISTINCT\n  `t1`.`team_short_name`\nFROM `Team` AS `t1`\nINNER JOIN `Team_Attributes` AS `t2`\n  ON `t1`.`team_api_id` = `t2`.`team_api_id`\nWHERE\n  `t2`.`chanceCreationPassingClass` = 'Safe'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1133,
    "db_id": "european_football_2",
    "question": "How many football players born after the 1990s have the first name \"Aaron\"?",
    "evidence": "first name \"Aaron\" refers to player_name LIKE 'Aaron%'; born after the 1990s refers to birthday > '1990'",
    "SQL": "SELECT\n  COUNT(`id`)\nFROM `Player`\nWHERE\n  `birthday` > '1990' AND `player_name` LIKE 'Aaron%'",
    "difficulty": "simple"
  },
  {
    "question_id": 1134,
    "db_id": "european_football_2",
    "question": "What is the difference between players 6 and 23's jumping scores?",
    "evidence": "difference between players 6 and 23's jumping scores refers to SUBTRACT(jumping AND id = 6,jumping AND id = 23)",
    "SQL": "SELECT\n  SUM(CASE WHEN `t1`.`id` = 6 THEN `t1`.`jumping` ELSE 0 END) - SUM(CASE WHEN `t1`.`id` = 23 THEN `t1`.`jumping` ELSE 0 END)\nFROM `Player_Attributes` AS `t1`",
    "difficulty": "simple"
  },
  {
    "question_id": 1135,
    "db_id": "european_football_2",
    "question": "Please provide top four football players' IDs who are among the lowest potential players and prefer to use the right foot when attacking.",
    "evidence": "lowest potential players refers to MIN(potential); prefer to use the right foot when attacking refers to preferred_foot = 'right'",
    "SQL": "SELECT\n  `id`\nFROM `Player_Attributes`\nWHERE\n  `preferred_foot` = 'right'\nORDER BY\n  `potential` ASC\nLIMIT 4",
    "difficulty": "moderate"
  },
  {
    "question_id": 1136,
    "db_id": "european_football_2",
    "question": "How many players had the highest potential score for crossing that preferred to use their left foots while attacking?",
    "evidence": "highest potential score for crossing refers to MAX(crossing); preferred to use their left foots while attacking refers to preferred_foot = 'left'",
    "SQL": "SELECT\n  COUNT(`t1`.`id`)\nFROM `Player_Attributes` AS `t1`\nWHERE\n  `t1`.`preferred_foot` = 'left'\n  AND `t1`.`crossing` = (\n    SELECT\n      MAX(`crossing`)\n    FROM `Player_Attributes`\n  )",
    "difficulty": "moderate"
  },
  {
    "question_id": 1139,
    "db_id": "european_football_2",
    "question": "What was the final score for the match on September 24, 2008, in the Belgian Jupiler League between the home team and the away team?",
    "evidence": "September 24, 2008 refers to date like '2008-09-24%'; in the Belgian Jupiler League refers to League.name = 'Belgium Jupiler League'; final score for home team refers to home_team_goal; final score for away team refers to away_team_goal",
    "SQL": "SELECT\n  `t2`.`home_team_goal`,\n  `t2`.`away_team_goal`\nFROM `League` AS `t1`\nINNER JOIN `Match` AS `t2`\n  ON `t1`.`id` = `t2`.`league_id`\nWHERE\n  `t1`.`name` = 'Belgium Jupiler League' AND `t2`.`date` LIKE '2008-09-24%'",
    "difficulty": "challenging"
  },
  {
    "question_id": 1141,
    "db_id": "european_football_2",
    "question": "Does the KSV Cercle Brugge team have a slow, balanced or fast speed class?",
    "evidence": "KSV Cercle Brugge refers to team_long_name = 'KSV Cercle Brugge'; speed class refers to buildUpPlaySpeedClass",
    "SQL": "SELECT DISTINCT\n  `t1`.`buildUpPlaySpeedClass`\nFROM `Team_Attributes` AS `t1`\nINNER JOIN `Team` AS `t2`\n  ON `t1`.`team_api_id` = `t2`.`team_api_id`\nWHERE\n  `t2`.`team_long_name` = 'KSV Cercle Brugge'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1144,
    "db_id": "european_football_2",
    "question": "Please state the finishing rate and curve score of the player who has the heaviest weight.",
    "evidence": "finishing rate refer to finishing; curve score refer to curve; heaviest weight refers to MAX(weight)",
    "SQL": "SELECT\n  `id`,\n  `finishing`,\n  `curve`\nFROM `Player_Attributes`\nWHERE\n  `player_api_id` = (\n    SELECT\n      `player_api_id`\n    FROM `Player`\n    ORDER BY\n      `weight` DESC\n    LIMIT 1\n  )\nLIMIT 1",
    "difficulty": "simple"
  },
  {
    "question_id": 1145,
    "db_id": "european_football_2",
    "question": "Which top 4 leagues had the most games in the 2015-2016 season?",
    "evidence": "in the 2015-2016 season refers to season = '2015/2016'; league with most games refers to League.name where MAX(COUNT(id)); ",
    "SQL": "SELECT\n  `t1`.`name`\nFROM `League` AS `t1`\nINNER JOIN `Match` AS `t2`\n  ON `t1`.`id` = `t2`.`league_id`\nWHERE\n  `t2`.`season` = '2015/2016'\nGROUP BY\n  `t1`.`name`\nORDER BY\n  COUNT(`t2`.`id`) DESC\nLIMIT 4",
    "difficulty": "simple"
  },
  {
    "question_id": 1146,
    "db_id": "european_football_2",
    "question": "Please provide the full name of the away team that scored the most goals.",
    "evidence": "full name refers to team_long_name; away team refers to away_team_api_id; scored the most goals refers to MAX(away_team_goal)",
    "SQL": "SELECT\n  `t2`.`team_long_name`\nFROM `Match` AS `t1`\nINNER JOIN `Team` AS `t2`\n  ON `t1`.`away_team_api_id` = `t2`.`team_api_id`\nORDER BY\n  `t1`.`away_team_goal` DESC\nLIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 1147,
    "db_id": "european_football_2",
    "question": "Please name one player whose overall strength is the greatest.",
    "evidence": "overall strength is the greatest refers to MAX(overall_rating)",
    "SQL": "SELECT DISTINCT\n  `t1`.`player_name`\nFROM `Player` AS `t1`\nINNER JOIN `Player_Attributes` AS `t2`\n  ON `t1`.`player_api_id` = `t2`.`player_api_id`\nWHERE\n  `t2`.`overall_rating` = (\n    SELECT\n      MAX(`overall_rating`)\n    FROM `Player_Attributes`\n  )",
    "difficulty": "simple"
  },
  {
    "question_id": 1148,
    "db_id": "european_football_2",
    "question": "What is the percentage of players that are under 180 cm who have an overall strength of more than 70?",
    "evidence": "percentage refers to DIVIDE(COUNT(height < 180 AND overall_rating > 70),COUNT(id)) * 100",
    "SQL": "SELECT\n  CAST(COUNT(CASE WHEN `t2`.`overall_rating` > 70 AND `t1`.`height` < 180 THEN `t1`.`id` ELSE NULL END) AS DOUBLE) * 100 / COUNT(`t1`.`id`) AS `percent`\nFROM `Player` AS `t1`\nINNER JOIN `Player_Attributes` AS `t2`\n  ON `t1`.`player_api_id` = `t2`.`player_api_id`",
    "difficulty": "moderate"
  },
  {
    "question_id": 846,
    "db_id": "formula_1",
    "question": "Please list the reference names of the drivers who are eliminated in the first period in race number 20.",
    "evidence": "driver reference name refers to driverRef; first qualifying period refers to q1; drivers who are eliminated in the first qualifying period refers to 5 drivers with MAX(q1); race number refers to raceId;",
    "SQL": "SELECT\n  `T2`.`driverRef`\nFROM `qualifying` AS `T1`\nINNER JOIN `drivers` AS `T2`\n  ON `T2`.`driverId` = `T1`.`driverId`\nWHERE\n  `T1`.`raceId` = 20\nORDER BY\n  `T1`.`q1` DESC\nLIMIT 5",
    "difficulty": "moderate"
  },
  {
    "question_id": 847,
    "db_id": "formula_1",
    "question": "What is the surname of the driver with the best lap time in race number 19 in the second qualifying period?",
    "evidence": "race number refers to raceId; second qualifying period refers to q2; best lap time refers to MIN(q2);",
    "SQL": "SELECT\n  `T2`.`surname`\nFROM `qualifying` AS `T1`\nINNER JOIN `drivers` AS `T2`\n  ON `T2`.`driverId` = `T1`.`driverId`\nWHERE\n  `T1`.`raceId` = 19\nORDER BY\n  `T1`.`q2` ASC\nLIMIT 1",
    "difficulty": "simple"
  },
  {
    "question_id": 850,
    "db_id": "formula_1",
    "question": "Please give the name of the race held on the circuits in Germany.",
    "evidence": "Germany is a name of country;",
    "SQL": "SELECT DISTINCT\n  `T2`.`name`\nFROM `circuits` AS `T1`\nINNER JOIN `races` AS `T2`\n  ON `T2`.`circuitID` = `T1`.`circuitId`\nWHERE\n  `T1`.`country` = 'Germany'",
    "difficulty": "simple"
  },
  {
    "question_id": 854,
    "db_id": "formula_1",
    "question": "What is the coordinates location of the circuits for Australian grand prix?",
    "evidence": "coordinate position/location refers to lat, lng; circuits for Australian grand prix refers to races.name = 'Australian Grand Prix'",
    "SQL": "SELECT DISTINCT\n  `T1`.`lat`,\n  `T1`.`lng`\nFROM `circuits` AS `T1`\nINNER JOIN `races` AS `T2`\n  ON `T2`.`circuitID` = `T1`.`circuitId`\nWHERE\n  `T2`.`name` = 'Australian Grand Prix'",
    "difficulty": "simple"
  },
  {
    "question_id": 857,
    "db_id": "formula_1",
    "question": "Give the coordinate position for Abu Dhabi Grand Prix.",
    "evidence": "coordinate position/location refers to lat, lng; Abu Dhabi Grand Prix refers to races.name = 'Abu Dhabi Grand Prix'",
    "SQL": "SELECT DISTINCT\n  `T1`.`lat`,\n  `T1`.`lng`\nFROM `circuits` AS `T1`\nINNER JOIN `races` AS `T2`\n  ON `T2`.`circuitID` = `T1`.`circuitId`\nWHERE\n  `T2`.`name` = 'Abu Dhabi Grand Prix'",
    "difficulty": "simple"
  },
  {
    "question_id": 859,
    "db_id": "formula_1",
    "question": "What's Bruno Senna's Q1 result in the qualifying race No. 354?",
    "evidence": "race number refers to raceId; Bruno Senna refers to the full name of the driver; Full name of the driver refers to drivers.forename and drivers.surname;",
    "SQL": "SELECT\n  `T1`.`q1`\nFROM `qualifying` AS `T1`\nINNER JOIN `drivers` AS `T2`\n  ON `T2`.`driverId` = `T1`.`driverId`\nWHERE\n  `T1`.`raceId` = 354 AND `T2`.`forename` = 'Bruno' AND `T2`.`surname` = 'Senna'",
    "difficulty": "simple"
  },
  {
    "question_id": 861,
    "db_id": "formula_1",
    "question": "What is his number of the driver who finished 0:01:54 in the Q3 of qualifying race No.903?",
    "evidence": "race number refers to raceId; finished 0:0M:SS in the Q3 refers to q3 LIKE 'M:SS%'",
    "SQL": "SELECT\n  `T2`.`number`\nFROM `qualifying` AS `T1`\nINNER JOIN `drivers` AS `T2`\n  ON `T2`.`driverId` = `T1`.`driverId`\nWHERE\n  `T1`.`raceId` = 903 AND `T1`.`q3` LIKE '1:54%'",
    "difficulty": "simple"
  },
  {
    "question_id": 862,
    "db_id": "formula_1",
    "question": "For the Bahrain Grand Prix in 2007, how many drivers not finished the game?",
    "evidence": "Bahrain Grand Prix refers to races.name = 'Bahrain Grand Prix'; drivers who finished the race refers to time is not empty (i.e. time IS NOT NULL);",
    "SQL": "SELECT\n  COUNT(`T3`.`driverId`)\nFROM `races` AS `T1`\nINNER JOIN `results` AS `T2`\n  ON `T2`.`raceId` = `T1`.`raceId`\nINNER JOIN `drivers` AS `T3`\n  ON `T3`.`driverId` = `T2`.`driverId`\nWHERE\n  `T1`.`year` = 2007 AND `T1`.`name` = 'Bahrain Grand Prix' AND `T2`.`time` IS NULL",
    "difficulty": "simple"
  },
  {
    "question_id": 865,
    "db_id": "formula_1",
    "question": "For all the drivers who finished the game in race No. 592, who is the oldest?",
    "evidence": "drivers who finished the race refers to time is not empty (i.e. time IS NOT NULL); race number refers to raceId; date of birth refers to drivers.dob; The larger the birthday value, the younger the person is, and vice versa;",
    "SQL": "SELECT\n  `T1`.`forename`,\n  `T1`.`surname`\nFROM `drivers` AS `T1`\nINNER JOIN `results` AS `T2`\n  ON `T2`.`driverId` = `T1`.`driverId`\nWHERE\n  `T2`.`raceId` = 592 AND NOT `T2`.`time` IS NULL AND NOT `T1`.`dob` IS NULL\nORDER BY\n  `T1`.`dob` ASC\nLIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 866,
    "db_id": "formula_1",
    "question": "Who was the player that got the lap time of 0:01:27 in the race No. 161? Show his introduction website.",
    "evidence": "player and driver are synonyms; the lap time of 0:0M:SS refers to lapTime.time LIKE 'M:SS%';race number refers to raceId; introduction website of the drivers refers to url;",
    "SQL": "SELECT DISTINCT\n  `T2`.`forename`,\n  `T2`.`surname`,\n  `T2`.`url`\nFROM `lapTimes` AS `T1`\nINNER JOIN `drivers` AS `T2`\n  ON `T2`.`driverId` = `T1`.`driverId`\nWHERE\n  `T1`.`raceId` = 161 AND `T1`.`time` LIKE '1:27%'",
    "difficulty": "moderate"
  },
  {
    "question_id": 868,
    "db_id": "formula_1",
    "question": "Where is Malaysian Grand Prix held? Give the location coordinates.",
    "evidence": "location coordinates refers to (lat, lng); Malaysian Grand Prix refers to races.name = 'Malaysian Grand Prix'",
    "SQL": "SELECT DISTINCT\n  `T1`.`lat`,\n  `T1`.`lng`\nFROM `circuits` AS `T1`\nINNER JOIN `races` AS `T2`\n  ON `T2`.`circuitID` = `T1`.`circuitId`\nWHERE\n  `T2`.`name` = 'Malaysian Grand Prix'",
    "difficulty": "simple"
  },
  {
    "question_id": 869,
    "db_id": "formula_1",
    "question": "For the constructor which got the highest point in the race No. 9 , what is its introduction website?",
    "evidence": "race number refers to raceId; constructor which got the highest point refers to MAX(constructorResults.points); introduction website of the constructor refers to url;",
    "SQL": "SELECT\n  `T2`.`url`\nFROM `constructorResults` AS `T1`\nINNER JOIN `constructors` AS `T2`\n  ON `T2`.`constructorId` = `T1`.`constructorId`\nWHERE\n  `T1`.`raceId` = 9\nORDER BY\n  `T1`.`points` DESC\nLIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 872,
    "db_id": "formula_1",
    "question": "In the race No. 45, for the driver who had the Q3 time as 0:01:33, what is his abbreviated code?",
    "evidence": "race number refers to raceId; had the Q3 time as 0:0M:SS refers to q3 LIKE 'M:SS%'",
    "SQL": "SELECT\n  `T2`.`code`\nFROM `qualifying` AS `T1`\nINNER JOIN `drivers` AS `T2`\n  ON `T2`.`driverId` = `T1`.`driverId`\nWHERE\n  `T1`.`raceId` = 45 AND `T1`.`q3` LIKE '1:33%'",
    "difficulty": "simple"
  },
  {
    "question_id": 875,
    "db_id": "formula_1",
    "question": "Show me the season page of year when the race No. 901 took place.",
    "evidence": "the season page refers to url; race number refers to raceId;",
    "SQL": "SELECT\n  `T2`.`url`\nFROM `races` AS `T1`\nINNER JOIN `seasons` AS `T2`\n  ON `T2`.`year` = `T1`.`year`\nWHERE\n  `T1`.`raceId` = 901",
    "difficulty": "simple"
  },
  {
    "question_id": 877,
    "db_id": "formula_1",
    "question": "For all the drivers who finished the game in race No. 872, who is the youngest?",
    "evidence": "race number refers to raceId; drivers who finished the race refers to time has value; the youngest is a driver where MAX(dob);",
    "SQL": "SELECT\n  `T1`.`forename`,\n  `T1`.`surname`\nFROM `drivers` AS `T1`\nINNER JOIN `results` AS `T2`\n  ON `T2`.`driverId` = `T1`.`driverId`\nWHERE\n  `T2`.`raceId` = 872 AND NOT `T2`.`time` IS NULL\nORDER BY\n  `T1`.`dob` DESC\nLIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 879,
    "db_id": "formula_1",
    "question": "For the driver who set the fastest lap speed, what is his nationality?",
    "evidence": "the fastest lap speed refers to (MAX) fastestLapSpeed;",
    "SQL": "SELECT\n  `T1`.`nationality`\nFROM `drivers` AS `T1`\nINNER JOIN `results` AS `T2`\n  ON `T2`.`driverId` = `T1`.`driverId`\nORDER BY\n  `T2`.`fastestLapSpeed` DESC\nLIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 880,
    "db_id": "formula_1",
    "question": "Paul di Resta was in the No. 853 race, what percent faster did he finish in the 853rd race than the next race for the fastest lap speed?",
    "evidence": "Paul di Resta refers to the full name of the driver; Full name of the driver refers to drivers.forename ='Paul' and drivers.surname = 'di Resta'; race number refers to raceId; percentage = DIVIDE(SUBTRACT(fastestLapSpeed(raceId = 853), (fastestLapSpeed (raceId = 854)) * 100 , (fastestLapSpeed(raceId = 853))",
    "SQL": "SELECT\n  (\n    SUM(CASE WHEN `T2`.`raceId` = 853 THEN `T2`.`fastestLapSpeed` ELSE 0 END) - SUM(CASE WHEN `T2`.`raceId` = 854 THEN `T2`.`fastestLapSpeed` ELSE 0 END)\n  ) * 100 / SUM(CASE WHEN `T2`.`raceId` = 853 THEN `T2`.`fastestLapSpeed` ELSE 0 END)\nFROM `drivers` AS `T1`\nINNER JOIN `results` AS `T2`\n  ON `T2`.`driverId` = `T1`.`driverId`\nWHERE\n  `T1`.`forename` = 'Paul' AND `T1`.`surname` = 'di Resta'",
    "difficulty": "challenging"
  },
  {
    "question_id": 881,
    "db_id": "formula_1",
    "question": "For the drivers who took part in the race in 1983/7/16, what's their race completion rate?",
    "evidence": "DIVIDE(COUNT(driverid when time has value ), (COUNT(driverid )) as percentage; in 1983/7/16 refers to when date = '1983-07-16'",
    "SQL": "SELECT\n  CAST(COUNT(CASE WHEN NOT `T2`.`time` IS NULL THEN `T2`.`driverId` END) AS DOUBLE) * 100 / COUNT(`T2`.`driverId`)\nFROM `races` AS `T1`\nINNER JOIN `results` AS `T2`\n  ON `T2`.`raceId` = `T1`.`raceId`\nWHERE\n  `T1`.`date` = '1983-07-16'",
    "difficulty": "moderate"
  },
  {
    "question_id": 884,
    "db_id": "formula_1",
    "question": "List the names of all races that occurred in the earliest recorded year and month.",
    "evidence": "earliest recorded year and month refers to year = year(min(date)) and month = month(min(date));",
    "SQL": "SELECT\n  `name`\nFROM `races`\nWHERE\n  DATE_FORMAT(CAST(`date` AS DATETIME), '%Y') = (\n    SELECT\n      DATE_FORMAT(CAST(`date` AS DATETIME), '%Y')\n    FROM `races`\n    ORDER BY\n      `date` ASC\n    LIMIT 1\n  )\n  AND DATE_FORMAT(CAST(`date` AS DATETIME), '%m') = (\n    SELECT\n      DATE_FORMAT(CAST(`date` AS DATETIME), '%m')\n    FROM `races`\n    ORDER BY\n      `date` ASC\n    LIMIT 1\n  )",
    "difficulty": "moderate"
  },
  {
    "question_id": 892,
    "db_id": "formula_1",
    "question": "State the driver with the most points scored. Find his full name with that points.",
    "evidence": "the most points scored refers to max(points); Full name of the driver refers to drivers.forename and drivers.surname;",
    "SQL": "SELECT\n  `T3`.`forename`,\n  `T3`.`surname`,\n  `T2`.`points`\nFROM `races` AS `T1`\nINNER JOIN `driverStandings` AS `T2`\n  ON `T2`.`raceId` = `T1`.`raceId`\nINNER JOIN `drivers` AS `T3`\n  ON `T3`.`driverId` = `T2`.`driverId`\nORDER BY\n  `T2`.`points` DESC\nLIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 894,
    "db_id": "formula_1",
    "question": "What is the best lap time recorded? List the driver and race with such recorded lap time.",
    "evidence": "the best lap time refers to min(milliseconds); List the driver refers to drivers.forename and drivers.surname; List the race refers to races.name",
    "SQL": "SELECT\n  `T2`.`milliseconds`,\n  `T1`.`forename`,\n  `T1`.`surname`,\n  `T3`.`name`\nFROM `drivers` AS `T1`\nINNER JOIN `lapTimes` AS `T2`\n  ON `T1`.`driverId` = `T2`.`driverId`\nINNER JOIN `races` AS `T3`\n  ON `T2`.`raceId` = `T3`.`raceId`\nORDER BY\n  `T2`.`milliseconds` ASC\nLIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 895,
    "db_id": "formula_1",
    "question": "What is the average lap time for Lewis Hamilton in the 2009 Malaysian Grand Prix?",
    "evidence": "average lap time = AVG(milliseconds); 'Lewis Hamilton' refers to the full name of the driver; Full name of the driver refers to drivers.forename and drivers.surname; 'Malaysian Grand Prix' refers to races.name = 'Malaysian Grand Prix'",
    "SQL": "SELECT\n  AVG(`T2`.`milliseconds`)\nFROM `races` AS `T1`\nINNER JOIN `lapTimes` AS `T2`\n  ON `T2`.`raceId` = `T1`.`raceId`\nINNER JOIN `drivers` AS `T3`\n  ON `T3`.`driverId` = `T2`.`driverId`\nWHERE\n  `T3`.`forename` = 'Lewis'\n  AND `T3`.`surname` = 'Hamilton'\n  AND `T1`.`year` = 2009\n  AND `T1`.`name` = 'Malaysian Grand Prix'",
    "difficulty": "moderate"
  },
  {
    "question_id": 896,
    "db_id": "formula_1",
    "question": "Calculate the percentage whereby Hamilton was not at the 1st track of the the f1 circuit since 2010.",
    "evidence": "percentage = DIVIDE(COUNT(raceId) where surname = 'Hamilton' and position>1), (COUNT(raceId) where surname = 'Hamilton'); since 2010 refers to year >= 2010",
    "SQL": "SELECT\n  CAST(COUNT(CASE WHEN `T2`.`position` <> 1 THEN `T2`.`position` END) AS DOUBLE) * 100 / COUNT(`T2`.`driverStandingsId`)\nFROM `races` AS `T1`\nINNER JOIN `driverStandings` AS `T2`\n  ON `T2`.`raceId` = `T1`.`raceId`\nINNER JOIN `drivers` AS `T3`\n  ON `T3`.`driverId` = `T2`.`driverId`\nWHERE\n  `T3`.`surname` = 'Hamilton' AND `T1`.`year` >= 2010",
    "difficulty": "challenging"
  },
  {
    "question_id": 897,
    "db_id": "formula_1",
    "question": "Name the driver with the most winning. Mention his nationality and what is his maximum point scores.",
    "evidence": "Full name of the driver refers to drivers.forename and drivers.surname; the most winning refers to MAX(COUNT(wins)); average point scores refers to MAX(points);",
    "SQL": "SELECT\n  `T1`.`forename`,\n  `T1`.`surname`,\n  `T1`.`nationality`,\n  MAX(`T2`.`points`)\nFROM `drivers` AS `T1`\nINNER JOIN `driverStandings` AS `T2`\n  ON `T2`.`driverId` = `T1`.`driverId`\nWHERE\n  `T2`.`wins` >= 1\nGROUP BY\n  `T1`.`forename`,\n  `T1`.`surname`,\n  `T1`.`nationality`\nORDER BY\n  COUNT(`T2`.`wins`) DESC\nLIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 898,
    "db_id": "formula_1",
    "question": "How old is the youngest Japanese driver? What is his name?",
    "evidence": "date of birth refers to drivers.dob; The larger the birthday value, the younger the person is, and vice versa; Japanese refers to nationality = 'Japanese'; age = YEAR(CURRENT_TIMESTAMP) - YEAR(dob);",
    "SQL": "SELECT\n  DATE_FORMAT(CAST(CURRENT_TIMESTAMP() AS DATETIME), '%Y') - DATE_FORMAT(CAST(`dob` AS DATETIME), '%Y'),\n  `forename`,\n  `surname`\nFROM `drivers`\nWHERE\n  `nationality` = 'Japanese'\nORDER BY\n  `dob` DESC\nLIMIT 1",
    "difficulty": "simple"
  },
  {
    "question_id": 901,
    "db_id": "formula_1",
    "question": "Name the races along with its circuit name and location for f1 races hosted in September 2005.",
    "evidence": "in September 2005 refers to MONTH(date) = 9 and YEAR(date) = 2005",
    "SQL": "SELECT DISTINCT\n  `T2`.`name`,\n  `T1`.`name`,\n  `T1`.`location`\nFROM `circuits` AS `T1`\nINNER JOIN `races` AS `T2`\n  ON `T2`.`circuitID` = `T1`.`circuitId`\nWHERE\n  `T2`.`year` = 2005 AND DATE_FORMAT(CAST(`T2`.`date` AS DATETIME), '%m') = '09'",
    "difficulty": "simple"
  },
  {
    "question_id": 902,
    "db_id": "formula_1",
    "question": "Which race was Alex Yoong in when he was in track number less than 20?",
    "evidence": "Alex Yoong refers to the full name of the driver; Full name of the driver refers to drivers.forename and drivers.surname;track number less than 10 refers to position < 20",
    "SQL": "SELECT\n  `T1`.`name`\nFROM `races` AS `T1`\nINNER JOIN `driverStandings` AS `T2`\n  ON `T2`.`raceId` = `T1`.`raceId`\nINNER JOIN `drivers` AS `T3`\n  ON `T3`.`driverId` = `T2`.`driverId`\nWHERE\n  `T3`.`forename` = 'Alex' AND `T3`.`surname` = 'Yoong' AND `T2`.`position` < 20",
    "difficulty": "simple"
  },
  {
    "question_id": 904,
    "db_id": "formula_1",
    "question": "State the race and year of race in which Michael Schumacher had his fastest lap.",
    "evidence": "fastest lap refers to min(milliseconds); Alex Yoong refers to the full name of the driver; Full name of the driver refers to drivers.forename and drivers.surname;",
    "SQL": "SELECT\n  `T1`.`name`,\n  `T1`.`year`\nFROM `races` AS `T1`\nINNER JOIN `lapTimes` AS `T2`\n  ON `T2`.`raceId` = `T1`.`raceId`\nINNER JOIN `drivers` AS `T3`\n  ON `T3`.`driverId` = `T2`.`driverId`\nWHERE\n  `T3`.`forename` = 'Michael' AND `T3`.`surname` = 'Schumacher'\nORDER BY\n  `T2`.`milliseconds` ASC\nLIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 906,
    "db_id": "formula_1",
    "question": "Which was Lewis Hamilton first race? What was his points recorded for his first race event?",
    "evidence": "first race refers to min(Year); Lewis Hamiltonrefers to the full name of the driver; Full name of the driver refers to drivers.forename and drivers.surname;",
    "SQL": "SELECT\n  `T1`.`name`,\n  `T2`.`points`\nFROM `races` AS `T1`\nINNER JOIN `driverStandings` AS `T2`\n  ON `T2`.`raceId` = `T1`.`raceId`\nINNER JOIN `drivers` AS `T3`\n  ON `T3`.`driverId` = `T2`.`driverId`\nWHERE\n  `T3`.`forename` = 'Lewis' AND `T3`.`surname` = 'Hamilton'\nORDER BY\n  `T1`.`year` ASC\nLIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 909,
    "db_id": "formula_1",
    "question": "Among all European Grand Prix races, what is the percentage of the races were hosted in Germany?",
    "evidence": "European Grand Prix races refers to races.name = 'European Grand Prix';percentage = divide(COUNT(races where country = Germany and name = 'Europearn Grand Prix'),COUNT(races where name = 'Europearn Grand Prix'))*100",
    "SQL": "SELECT\n  CAST(COUNT(CASE WHEN `T1`.`country` = 'Germany' THEN `T2`.`circuitID` END) AS DOUBLE) * 100 / COUNT(`T2`.`circuitId`)\nFROM `circuits` AS `T1`\nINNER JOIN `races` AS `T2`\n  ON `T2`.`circuitID` = `T1`.`circuitId`\nWHERE\n  `T2`.`name` = 'European Grand Prix'",
    "difficulty": "moderate"
  },
  {
    "question_id": 910,
    "db_id": "formula_1",
    "question": "What's the location coordinates of Silverstone Circuit?",
    "evidence": "location coordinates refers to (lat, lng); Silverstone Circuit refers to circuits.name = 'Silverstone Circuit'",
    "SQL": "SELECT\n  `lat`,\n  `lng`\nFROM `circuits`\nWHERE\n  `name` = 'Silverstone Circuit'",
    "difficulty": "simple"
  },
  {
    "question_id": 912,
    "db_id": "formula_1",
    "question": "What's the reference name of Marina Bay Street Circuit?",
    "evidence": "reference name refers to circuitRef; Marina Bay Street Circuit refers to circuits.name = 'Marina Bay Street Circuit'",
    "SQL": "SELECT\n  `circuitRef`\nFROM `circuits`\nWHERE\n  `name` = 'Marina Bay Street Circuit'",
    "difficulty": "simple"
  },
  {
    "question_id": 915,
    "db_id": "formula_1",
    "question": "Which country is the oldest driver from?",
    "evidence": "date of birth refers to drivers.dob; The larger the birthday value, the younger the person is, and vice versa;",
    "SQL": "SELECT\n  `nationality`\nFROM `drivers`\nWHERE\n  NOT `dob` IS NULL\nORDER BY\n  `dob` ASC\nLIMIT 1",
    "difficulty": "simple"
  },
  {
    "question_id": 928,
    "db_id": "formula_1",
    "question": "Which driver ranked the first in the Canadian Grand Prix in 2007? Please give his reference name.",
    "evidence": "reference name refers to driverRef; Canadian Grand Prix refers to races.name = 'Canadian Grand Prix';",
    "SQL": "SELECT\n  `T3`.`forename`,\n  `T3`.`surname`,\n  `T3`.`driverRef`\nFROM `races` AS `T1`\nINNER JOIN `results` AS `T2`\n  ON `T2`.`raceId` = `T1`.`raceId`\nINNER JOIN `drivers` AS `T3`\n  ON `T3`.`driverId` = `T2`.`driverId`\nWHERE\n  `T1`.`name` = 'Canadian Grand Prix' AND `T2`.`rank` = 1 AND `T1`.`year` = 2007",
    "difficulty": "moderate"
  },
  {
    "question_id": 930,
    "db_id": "formula_1",
    "question": "In which Formula_1 race did Lewis Hamilton rank the highest?",
    "evidence": "rank the highest refers to min(rank); Lewis Hamilton refers to the full name of the driver; Full name of the driver refers to drivers.forename and drivers.surname;",
    "SQL": "SELECT\n  `name`\nFROM `races`\nWHERE\n  `raceId` IN (\n    SELECT\n      `raceId`\n    FROM `results`\n    WHERE\n      `rank` = 1\n      AND `driverId` = (\n        SELECT\n          `driverId`\n        FROM `drivers`\n        WHERE\n          `forename` = 'Lewis' AND `surname` = 'Hamilton'\n      )\n  )",
    "difficulty": "simple"
  },
  {
    "question_id": 931,
    "db_id": "formula_1",
    "question": "What was the fastest lap speed among all drivers in the 2009 Spanish Grand Prix?",
    "evidence": "the fastest lap speed among all refers to max(fastestLapSpeed); Spanish Grand Prix refers to races.name = 'Spanish Grand Prix';",
    "SQL": "SELECT\n  `T2`.`fastestLapSpeed`\nFROM `races` AS `T1`\nINNER JOIN `results` AS `T2`\n  ON `T2`.`raceId` = `T1`.`raceId`\nWHERE\n  `T1`.`name` = 'Spanish Grand Prix'\n  AND `T1`.`year` = 2009\n  AND NOT `T2`.`fastestLapSpeed` IS NULL\nORDER BY\n  `T2`.`fastestLapSpeed` DESC\nLIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 933,
    "db_id": "formula_1",
    "question": "What was Lewis Hamilton's final rank in the 2008 Chinese Grand Prix?",
    "evidence": "Lewis Hamilton refers to the full name of the driver; Full name of the driver refers to drivers.forename and drivers.surname; final rank refers to positionOrder; Chinese Grand Prix refers to races.name = 'Chinese Grand Prix';",
    "SQL": "SELECT\n  `T2`.`positionOrder`\nFROM `races` AS `T1`\nINNER JOIN `results` AS `T2`\n  ON `T2`.`raceId` = `T1`.`raceId`\nINNER JOIN `drivers` AS `T3`\n  ON `T3`.`driverId` = `T2`.`driverId`\nWHERE\n  `T3`.`forename` = 'Lewis'\n  AND `T3`.`surname` = 'Hamilton'\n  AND `T1`.`name` = 'Chinese Grand Prix'\n  AND `T1`.`year` = 2008",
    "difficulty": "moderate"
  },
  {
    "question_id": 937,
    "db_id": "formula_1",
    "question": "What's the finish time for the driver who ranked second in 2008's Chinese Grand Prix?",
    "evidence": "finish time refers to time; Chinese Grand Prix refers to races.name = 'Chinese Grand Prix';",
    "SQL": "SELECT\n  `T1`.`time`\nFROM `results` AS `T1`\nINNER JOIN `races` AS `T2`\n  ON `T1`.`raceId` = `T2`.`raceId`\nWHERE\n  `T1`.`rank` = 2 AND `T2`.`name` = 'Chinese Grand Prix' AND `T2`.`year` = 2008",
    "difficulty": "simple"
  },
  {
    "question_id": 940,
    "db_id": "formula_1",
    "question": "Among the drivers that finished the race in the 2008 Chinese Grand Prix, how many of them have participated in Formula_1 races?",
    "evidence": "COUNT(raceID) > 0 reveals that this driver participated in races; drivers who finished the race refers to time has value.",
    "SQL": "SELECT COUNT(*) FROM (SELECT `T1`.`driverId` FROM `results` AS `T1` INNER JOIN `races` AS `T2` ON `T1`.`raceId` = `T2`.`raceId` WHERE `T2`.`name` = 'Chinese Grand Prix' AND `T2`.`year` = 2008 AND `T1`.`time` IS NOT NULL GROUP BY `T1`.`driverId` HAVING COUNT(`T2`.`raceId`) > 0) AS derived_table",
    "difficulty": "moderate"
  },
  {
    "question_id": 944,
    "db_id": "formula_1",
    "question": "How much faster in percentage is the champion than the driver who finished the race last in the 2008 Australian Grand Prix?",
    "evidence": "how much faster in percentage = divide(subtract(incremental time, champion time), last_driver time) * 100; last driver finished time = incremental time + champion time; only champion's finished time is represented by 'HH:MM:SS.mmm'; finished the game refers to time is not null",
    "SQL": "WITH `time_in_seconds` AS (\n  SELECT\n    `T1`.`positionOrder`,\n    CASE\n      WHEN `T1`.`positionOrder` = 1\n      THEN (\n        CAST(SUBSTR(`T1`.`time`, 1, 1) AS DOUBLE) * 3600\n      ) + (\n        CAST(SUBSTR(`T1`.`time`, 3, 2) AS DOUBLE) * 60\n      ) + CAST(SUBSTR(`T1`.`time`, 6) AS DOUBLE)\n      ELSE CAST(SUBSTR(`T1`.`time`, 2) AS DOUBLE)\n    END AS `time_seconds`\n  FROM `results` AS `T1`\n  INNER JOIN `races` AS `T2`\n    ON `T1`.`raceId` = `T2`.`raceId`\n  WHERE\n    `T2`.`name` = 'Australian Grand Prix'\n    AND NOT `T1`.`time` IS NULL\n    AND `T2`.`year` = 2008\n), `champion_time` AS (\n  SELECT\n    `time_seconds`\n  FROM `time_in_seconds`\n  WHERE\n    `positionOrder` = 1\n), `last_driver_incremental` AS (\n  SELECT\n    `time_seconds`\n  FROM `time_in_seconds`\n  WHERE\n    `positionOrder` = (\n      SELECT\n        MAX(`positionOrder`)\n      FROM `time_in_seconds`\n    )\n)\nSELECT\n  (\n    CAST((\n      SELECT\n        `time_seconds`\n      FROM `last_driver_incremental`\n    ) AS DOUBLE) * 100\n  ) / (\n    SELECT\n      `time_seconds` + (\n        SELECT\n          `time_seconds`\n        FROM `last_driver_incremental`\n      )\n    FROM `champion_time`\n  )",
    "difficulty": "challenging"
  },
  {
    "question_id": 945,
    "db_id": "formula_1",
    "question": "How many circuits are there in Adelaide, Australia?",
    "evidence": "Australia is the country; Melbourne is the location of circuit;",
    "SQL": "SELECT\n  COUNT(`circuitId`)\nFROM `circuits`\nWHERE\n  `location` = 'Adelaide' AND `country` = 'Australia'",
    "difficulty": "simple"
  },
  {
    "question_id": 948,
    "db_id": "formula_1",
    "question": "What are the maximum points of British constructors?",
    "evidence": "maximum points = MAX(points); British is a nationality",
    "SQL": "SELECT\n  MAX(`T1`.`points`)\nFROM `constructorStandings` AS `T1`\nINNER JOIN `constructors` AS `T2`\n  ON `T1`.`constructorId` = `T2`.`constructorId`\nWHERE\n  `T2`.`nationality` = 'British'",
    "difficulty": "simple"
  },
  {
    "question_id": 950,
    "db_id": "formula_1",
    "question": "Please list the constructor names with 0 points at race 291.",
    "evidence": "race at 291 refers to raceID = 291;",
    "SQL": "SELECT\n  `T2`.`name`\nFROM `constructorStandings` AS `T1`\nINNER JOIN `constructors` AS `T2`\n  ON `T1`.`constructorId` = `T2`.`constructorId`\nWHERE\n  `T1`.`points` = 0 AND `T1`.`raceId` = 291",
    "difficulty": "simple"
  },
  {
    "question_id": 951,
    "db_id": "formula_1",
    "question": "How many Japanese constructors have 0 points in 2 races?",
    "evidence": "2 races refers to COUNT(raceID) = 2; Japanese refers to constructors.nationality = 'Japanese';",
    "SQL": "SELECT\n  COUNT(`T1`.`raceId`)\nFROM `constructorStandings` AS `T1`\nINNER JOIN `constructors` AS `T2`\n  ON `T1`.`constructorId` = `T2`.`constructorId`\nWHERE\n  `T1`.`points` = 0 AND `T2`.`nationality` = 'Japanese'\nGROUP BY\n  `T1`.`constructorId`\nHAVING\n  COUNT(`raceId`) = 2",
    "difficulty": "simple"
  },
  {
    "question_id": 954,
    "db_id": "formula_1",
    "question": "Please calculate the race completion percentage of Japanese drivers from 2007 to 2009.",
    "evidence": "from 2007 to 2009 refers to year between 2007 and 2009; race completion refers to time is not null; percentage = Divide(COUNT(DriverID where time is not null and year between 2007 and 2009),Count (DriverID where year between 2007 and 2009))*100; ",
    "SQL": "SELECT\n  CAST(SUM(CASE WHEN NOT `T1`.`time` IS NULL THEN 1 ELSE 0 END) AS DOUBLE) * 100 / COUNT(`T1`.`raceId`)\nFROM `results` AS `T1`\nINNER JOIN `races` AS `T2`\n  ON `T1`.`raceId` = `T2`.`raceId`\nINNER JOIN `drivers` AS `T3`\n  ON `T1`.`driverId` = `T3`.`driverId`\nWHERE\n  `T3`.`nationality` = 'Japanese' AND `T2`.`year` BETWEEN 2007 AND 2009",
    "difficulty": "challenging"
  },
  {
    "question_id": 955,
    "db_id": "formula_1",
    "question": "What is the average time in seconds of champion for each year, before year 1975?",
    "evidence": "only champion's finished time is represented by 'HH:MM:SS.mmm'; finished the game refers to time is not null; before year 1975 refers to year < 1975;",
    "SQL": "WITH time_in_seconds AS (SELECT T2.year, T2.raceId, T1.positionOrder, CASE WHEN T1.positionOrder = 1 THEN (CAST(SUBSTR(T1.time, 1, 1) AS FLOAT) * 3600) + (CAST(SUBSTR(T1.time, 3, 2) AS FLOAT) * 60) + CAST(SUBSTR(T1.time, 6, 2) AS FLOAT) + CAST(SUBSTR(T1.time, 9) AS FLOAT) / 1000 ELSE 0 END AS time_seconds FROM results AS T1 INNER JOIN races AS T2 ON T1.raceId = T2.raceId WHERE NOT T1.time IS NULL), champion_time AS (SELECT year, raceId, time_seconds FROM time_in_seconds WHERE positionOrder = 1) SELECT year, AVG(time_seconds) FROM champion_time WHERE year < 1975 GROUP BY year HAVING NOT AVG(time_seconds) IS NULL",
    "difficulty": "challenging"
  },
  {
    "question_id": 959,
    "db_id": "formula_1",
    "question": "What is the fastest lap number of the champion in 2009?",
    "evidence": "in 2009 refers to year = 2009; Only the time of the champion shows in the format of \"hour: minutes: seconds.millionsecond\"",
    "SQL": "SELECT\n  `T1`.`fastestLap`\nFROM `results` AS `T1`\nINNER JOIN `races` AS `T2`\n  ON `T1`.`raceId` = `T2`.`raceId`\nWHERE\n  `T2`.`year` = 2009 AND `T1`.`time` LIKE '_:%:__.___'",
    "difficulty": "simple"
  },
  {
    "question_id": 960,
    "db_id": "formula_1",
    "question": "What is the average of fastest lap speed in the 2009 Spanish Grand Prix race?",
    "evidence": "Spanish Grand Prix is the name of race refers to name = 'Spanish Grand Prix'; average fastest lap speed refers to avg(fastestLapSpeed);",
    "SQL": "SELECT\n  AVG(`T1`.`fastestLapSpeed`)\nFROM `results` AS `T1`\nINNER JOIN `races` AS `T2`\n  ON `T1`.`raceId` = `T2`.`raceId`\nWHERE\n  `T2`.`year` = 2009 AND `T2`.`name` = 'Spanish Grand Prix'",
    "difficulty": "moderate"
  },
  {
    "question_id": 962,
    "db_id": "formula_1",
    "question": "From 2000 to 2005, what percentage of drivers who were born before 1985 and the lap numbers were over 50?",
    "evidence": "born before 1985 refers to year(dob)<1985; in 2000 to 2005 refers to year between 2000 and 2005; percentage = Divide(COUNT(driverId where year (dob) <1985 and laps >50),COUNT(DriverID where year between 2000 and 2005) *100;",
    "SQL": "SELECT\n  CAST(SUM(\n    CASE\n      WHEN DATE_FORMAT(CAST(`T3`.`dob` AS DATETIME), '%Y') < '1985' AND `T1`.`laps` > 50\n      THEN 1\n      ELSE 0\n    END\n  ) AS DOUBLE) * 100 / COUNT(*)\nFROM `results` AS `T1`\nINNER JOIN `races` AS `T2`\n  ON `T1`.`raceId` = `T2`.`raceId`\nINNER JOIN `drivers` AS `T3`\n  ON `T1`.`driverId` = `T3`.`driverId`\nWHERE\n  `T2`.`year` BETWEEN 2000 AND 2005",
    "difficulty": "challenging"
  },
  {
    "question_id": 963,
    "db_id": "formula_1",
    "question": "How many French drivers who obtain the laptime less than 02:00.00?",
    "evidence": "lap time less than 02:00.00 refers to seconds < 120;",
    "SQL": "SELECT\n  COUNT(`T1`.`driverId`)\nFROM `drivers` AS `T1`\nINNER JOIN `lapTimes` AS `T2`\n  ON `T1`.`driverId` = `T2`.`driverId`\nWHERE\n  `T1`.`nationality` = 'French'\n  AND (\n    CAST(SUBSTR(`T2`.`time`, 1, 2) AS SIGNED) * 60 + CAST(SUBSTR(`T2`.`time`, 4, 2) AS SIGNED) + CAST(SUBSTR(`T2`.`time`, 7, 2) AS DOUBLE) / 1000\n  ) < 120",
    "difficulty": "moderate"
  },
  {
    "question_id": 964,
    "db_id": "formula_1",
    "question": "List out the code for drivers who have nationality in American.",
    "evidence": "nationality = 'American'",
    "SQL": "SELECT `code` FROM `drivers` WHERE `Nationality` = 'American'",
    "difficulty": "simple"
  },
  {
    "question_id": 967,
    "db_id": "formula_1",
    "question": "State code numbers of top 3 yougest drivers. How many Netherlandic drivers among them?",
    "evidence": "youngest driver refers to Max (year(dob)); Netherlandic and Dutch refer to the same country",
    "SQL": "SELECT COUNT(*) FROM (SELECT `T1`.`nationality` FROM `drivers` AS `T1` ORDER BY `T1`.`dob` DESC LIMIT 3) AS `T3` WHERE `T3`.`nationality` = 'Dutch'",
    "difficulty": "simple"
  },
  {
    "question_id": 971,
    "db_id": "formula_1",
    "question": "Please state the reference name of the oldest German driver.",
    "evidence": "oldest refers to MIN(year(dob)); reference names appear in drverRef.",
    "SQL": "SELECT `driverRef` FROM `drivers` WHERE `nationality` = 'German' ORDER BY `dob` ASC LIMIT 1",
    "difficulty": "simple"
  },
  {
    "question_id": 972,
    "db_id": "formula_1",
    "question": "Which drivers who were born in 1971 and has the fastest lap time on the race? Give id and code of these drivers.",
    "evidence": "born in 1971 refers to year(dob) = 1971; has the fastest lap time refers to fastestLapTime has values",
    "SQL": "SELECT\n  `T2`.`driverId`,\n  `T2`.`code`\nFROM `results` AS `T1`\nINNER JOIN `drivers` AS `T2`\n  ON `T1`.`driverId` = `T2`.`driverId`\nWHERE\n  DATE_FORMAT(CAST(`T2`.`dob` AS DATETIME), '%Y') = '1971'\n  AND NOT `T1`.`fastestLapTime` IS NULL",
    "difficulty": "moderate"
  },
  {
    "question_id": 977,
    "db_id": "formula_1",
    "question": "From race no. 50 to 100, how many finishers have been disqualified?",
    "evidence": "disqualified refers to statusID = 2, finisher refers to time! = null; race no. refers to raceId; raceId > 50 and raceId < 100;",
    "SQL": "SELECT\n  SUM(CASE WHEN NOT `time` IS NULL THEN 1 ELSE 0 END)\nFROM `results`\nWHERE\n  `statusId` = 2 AND `raceID` < 100 AND `raceId` > 50",
    "difficulty": "simple"
  },
  {
    "question_id": 978,
    "db_id": "formula_1",
    "question": "How many times the circuits were held in Austria? Please give their location and coordinates.",
    "evidence": "location coordinates refers to (lat,lng); Austria refers to country = 'Austria';",
    "SQL": "SELECT DISTINCT\n  `location`,\n  `lat`,\n  `lng`\nFROM `circuits`\nWHERE\n  `country` = 'Austria'",
    "difficulty": "simple"
  },
  {
    "question_id": 981,
    "db_id": "formula_1",
    "question": "On what year did the youngest driver had his first qualifying race? Also state the name, date and time of the race.",
    "evidence": "date of birth refers to drivers.dob; The larger the birthday value, the younger the person is, and vice versa; first qualifying race refers to MIN(races.date);",
    "SQL": "SELECT\n  `T3`.`year`,\n  `T3`.`name`,\n  `T3`.`date`,\n  `T3`.`time`\nFROM `qualifying` AS `T1`\nINNER JOIN `drivers` AS `T2`\n  ON `T1`.`driverId` = `T2`.`driverId`\nINNER JOIN `races` AS `T3`\n  ON `T1`.`raceId` = `T3`.`raceId`\nWHERE\n  `T1`.`driverId` = (\n    SELECT\n      `driverId`\n    FROM `drivers`\n    ORDER BY\n      `dob` DESC\n    LIMIT 1\n  )\nORDER BY\n  `T3`.`date` ASC\nLIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 988,
    "db_id": "formula_1",
    "question": "List down top 3 German drivers who has the shortest average pit stop duration and were born between 1980-1985.",
    "evidence": "Full name of the driver refers to drivers.forename and drivers.surname; born between 1980-1985 refers to 1980<= year(dob) <=1985; Average pitstop duration refers to Divide(SUM(duration),COUNT(duration)); shortest average refers to Min(avg(duration));",
    "SQL": "SELECT\n  `T2`.`forename`,\n  `T2`.`surname`\nFROM `pitStops` AS `T1`\nINNER JOIN `drivers` AS `T2`\n  ON `T1`.`driverId` = `T2`.`driverId`\nWHERE\n  `T2`.`nationality` = 'German'\n  AND DATE_FORMAT(CAST(`T2`.`dob` AS DATETIME), '%Y') BETWEEN '1980' AND '1985'\nGROUP BY\n  `T2`.`forename`,\n  `T2`.`surname`\nORDER BY\n  AVG(`T1`.`duration`)\nLIMIT 3",
    "difficulty": "challenging"
  },
  {
    "question_id": 989,
    "db_id": "formula_1",
    "question": "Who is the champion of the Canadian Grand Prix in 2008? Indicate his finish time.",
    "evidence": "Only the time of the champion shows in the format of \"hour: minutes: seconds.millionsecond\";",
    "SQL": "SELECT\n  `T1`.`time`\nFROM `results` AS `T1`\nINNER JOIN `races` AS `T2`\n  ON `T1`.`raceId` = `T2`.`raceId`\nWHERE\n  `T2`.`name` = 'Canadian Grand Prix'\n  AND `T2`.`year` = 2008\n  AND `T1`.`time` LIKE '_:%:__.___'",
    "difficulty": "moderate"
  },
  {
    "question_id": 990,
    "db_id": "formula_1",
    "question": "What is the constructor reference name of the champion in the 2009 Singapore Grand Prix? Please give its website.",
    "evidence": "the time of the champion shows in the format of \"minutes: seconds.millionsecond\" in which Max(time); constructor reference name refers to constructorRef; website refers to url",
    "SQL": "SELECT\n  `T3`.`constructorRef`,\n  `T3`.`url`\nFROM `results` AS `T1`\nINNER JOIN `races` AS `T2`\n  ON `T1`.`raceId` = `T2`.`raceId`\nINNER JOIN `constructors` AS `T3`\n  ON `T1`.`constructorId` = `T3`.`constructorId`\nWHERE\n  `T2`.`name` = 'Singapore Grand Prix'\n  AND `T2`.`year` = 2009\n  AND `T1`.`time` LIKE '_:%:__.___'",
    "difficulty": "challenging"
  },
  {
    "question_id": 717,
    "db_id": "superhero",
    "question": "Please list all the superpowers of 3-D Man.",
    "evidence": "3-D Man refers to superhero_name = '3-D Man'; superpowers refers to power_name",
    "SQL": "SELECT  `T3`.`power_name` FROM `superhero` AS `T1` INNER JOIN `hero_power` AS `T2`  ON `T1`.`id` = `T2`.`hero_id` INNER JOIN `superpower` AS `T3`  ON `T2`.`power_id` = `T3`.`id` WHERE  `T1`.`superhero_name` = '3-D Man'",
    "difficulty": "simple"
  },
  {
    "question_id": 994,
    "db_id": "formula_1",
    "question": "Which constructor scored most points from Monaco Grand Prix between 1980 and 2010? List the score, name and nationality of this team.",
    "evidence": "Monaco Grand Priz refers to the race; race in year between 1980 and 2010",
    "SQL": "SELECT\n  SUM(`T1`.`points`),\n  `T2`.`name`,\n  `T2`.`nationality`\nFROM `constructorResults` AS `T1`\nINNER JOIN `constructors` AS `T2`\n  ON `T1`.`constructorId` = `T2`.`constructorId`\nINNER JOIN `races` AS `T3`\n  ON `T3`.`raceid` = `T1`.`raceid`\nWHERE\n  `T3`.`name` = 'Monaco Grand Prix' AND `T3`.`year` BETWEEN 1980 AND 2010\nGROUP BY\n  `T2`.`name`\nORDER BY\n  SUM(`T1`.`points`) DESC\nLIMIT 1",
    "difficulty": "challenging"
  },
  {
    "question_id": 1001,
    "db_id": "formula_1",
    "question": "What is full name of the racer who ranked 1st in the 3rd qualifying race held in the Marina Bay Street Circuit in 2008?",
    "evidence": "Ranked 1st in the 3rd qualifying race refer to MIN(q3); 2008 is the year of race; full name of racer = forename, surname",
    "SQL": "SELECT\n  `T2`.`forename`,\n  `T2`.`surname`\nFROM `qualifying` AS `T1`\nINNER JOIN `drivers` AS `T2`\n  ON `T1`.`driverId` = `T2`.`driverId`\nINNER JOIN `races` AS `T3`\n  ON `T1`.`raceid` = `T3`.`raceid`\nWHERE\n  NOT `q3` IS NULL\n  AND `T3`.`year` = 2008\n  AND `T3`.`circuitId` IN (\n    SELECT\n      `circuitId`\n    FROM `circuits`\n    WHERE\n      `name` = 'Marina Bay Street Circuit'\n  )\nORDER BY\n  CAST(SUBSTR(`q3`, 1, INSTR(`q3`, ':') - 1) AS SIGNED) * 60 + CAST(SUBSTR(`q3`, INSTR(`q3`, ':') + 1, INSTR(`q3`, '.') - INSTR(`q3`, ':') - 1) AS DOUBLE) + CAST(SUBSTR(`q3`, INSTR(`q3`, '.') + 1) AS DOUBLE) / 1000 ASC\nLIMIT 1",
    "difficulty": "challenging"
  },
  {
    "question_id": 1002,
    "db_id": "formula_1",
    "question": "As of the present, what is the full name of the youngest racer? Indicate her nationality and the name of the race to which he/she first joined.",
    "evidence": "full name refers to forename+surname; Youngest racer = MAX(dob)",
    "SQL": "SELECT `T1`.`forename`, `T1`.`surname`, `T1`.`nationality`, `T3`.`name` FROM `drivers` AS `T1` INNER JOIN `driverStandings` AS `T2` ON `T1`.`driverId` = `T2`.`driverId` INNER JOIN `races` AS `T3` ON `T2`.`raceId` = `T3`.`raceId` ORDER BY `T1`.`dob` DESC LIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 1003,
    "db_id": "formula_1",
    "question": "How many accidents did the driver who had the highest number accidents in the Canadian Grand Prix have?",
    "evidence": "number of accidents refers to the number where statusid = 3; Canadian Grand Prix refers to the race of name\n",
    "SQL": "SELECT\n  COUNT(`T1`.`driverId`)\nFROM `results` AS `T1`\nINNER JOIN `races` AS `T2`\n  ON `T1`.`raceId` = `T2`.`raceId`\nINNER JOIN `status` AS `T3`\n  ON `T1`.`statusId` = `T3`.`statusId`\nWHERE\n  `T3`.`statusId` = 3 AND `T2`.`name` = 'Canadian Grand Prix'\nGROUP BY\n  `T1`.`driverId`\nORDER BY\n  COUNT(`T1`.`driverId`) DESC\nLIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 1011,
    "db_id": "formula_1",
    "question": "Which top 20 driver created the shortest lap time ever record in a Formula_1 race? Please give them full names.",
    "evidence": "shortest lap time refers to MIN(time); the time format for the shortest lap time is 'MM:SS.mmm' or 'M:SS.mmm'; full name of the driver refers to forename, surname",
    "SQL": "WITH lap_times_in_seconds AS (SELECT driverId, (CASE WHEN SUBSTR(time, 1, INSTR(time, ':') - 1) <> '' THEN CAST(SUBSTR(time, 1, INSTR(time, ':') - 1) AS FLOAT) * 60 ELSE 0 END + CASE WHEN SUBSTR(time, INSTR(time, ':') + 1, INSTR(time, '.') - INSTR(time, ':') - 1) <> '' THEN CAST(SUBSTR(time, INSTR(time, ':') + 1, INSTR(time, '.') - INSTR(time, ':') - 1) AS FLOAT) ELSE 0 END + CASE WHEN SUBSTR(time, INSTR(time, '.') + 1) <> '' THEN CAST(SUBSTR(time, INSTR(time, '.') + 1) AS FLOAT) / 1000 ELSE 0 END) AS time_in_seconds FROM lapTimes) SELECT T2.forename, T2.surname, T1.driverId FROM (SELECT driverId, MIN(time_in_seconds) AS min_time_in_seconds FROM lap_times_in_seconds GROUP BY driverId) AS T1 INNER JOIN drivers AS T2 ON T1.driverId = T2.driverId ORDER BY T1.min_time_in_seconds ASC LIMIT 20",
    "difficulty": "challenging"
  },
  {
    "question_id": 1014,
    "db_id": "formula_1",
    "question": "Please list the lap records for the circuits in Italy.",
    "evidence": "lap record means the fastest time recorded which refers to time",
    "SQL": "WITH `fastest_lap_times` AS (\n  SELECT\n    `T1`.`raceId`,\n    `T1`.`FastestLapTime`,\n    (\n      CAST(SUBSTR(`T1`.`FastestLapTime`, 1, INSTR(`T1`.`FastestLapTime`, ':') - 1) AS DOUBLE) * 60\n    ) + (\n      CAST(SUBSTR(\n        `T1`.`FastestLapTime`,\n        INSTR(`T1`.`FastestLapTime`, ':') + 1,\n        INSTR(`T1`.`FastestLapTime`, '.') - INSTR(`T1`.`FastestLapTime`, ':') - 1\n      ) AS DOUBLE)\n    ) + (\n      CAST(SUBSTR(`T1`.`FastestLapTime`, INSTR(`T1`.`FastestLapTime`, '.') + 1) AS DOUBLE) / 1000\n    ) AS `time_in_seconds`\n  FROM `results` AS `T1`\n  WHERE\n    NOT `T1`.`FastestLapTime` IS NULL\n)\nSELECT\n  `T1`.`FastestLapTime` AS `lap_record`\nFROM `results` AS `T1`\nINNER JOIN `races` AS `T2`\n  ON `T1`.`raceId` = `T2`.`raceId`\nINNER JOIN `circuits` AS `T3`\n  ON `T2`.`circuitId` = `T3`.`circuitId`\nINNER JOIN (\n  SELECT\n    MIN(`fastest_lap_times`.`time_in_seconds`) AS `min_time_in_seconds`\n  FROM `fastest_lap_times`\n  INNER JOIN `races` AS `T2`\n    ON `fastest_lap_times`.`raceId` = `T2`.`raceId`\n  INNER JOIN `circuits` AS `T3`\n    ON `T2`.`circuitId` = `T3`.`circuitId`\n  WHERE\n    `T3`.`country` = 'Italy'\n) AS `T4`\n  ON (\n    CAST(SUBSTR(`T1`.`FastestLapTime`, 1, INSTR(`T1`.`FastestLapTime`, ':') - 1) AS DOUBLE) * 60\n  ) + (\n    CAST(SUBSTR(\n      `T1`.`FastestLapTime`,\n      INSTR(`T1`.`FastestLapTime`, ':') + 1,\n      INSTR(`T1`.`FastestLapTime`, '.') - INSTR(`T1`.`FastestLapTime`, ':') - 1\n    ) AS DOUBLE)\n  ) + (\n    CAST(SUBSTR(`T1`.`FastestLapTime`, INSTR(`T1`.`FastestLapTime`, '.') + 1) AS DOUBLE) / 1000\n  ) = `T4`.`min_time_in_seconds`\nLIMIT 1",
    "difficulty": "challenging"
  },
  {
    "question_id": 719,
    "db_id": "superhero",
    "question": "Among the superheroes with the super power of \"Super Strength\", how many of them have a height of over 200cm?",
    "evidence": "super power of \"Super Strength\" refers to power_name = 'Super Strength'; a height of over 200cm refers to height_cm > 200",
    "SQL": "SELECT\n  COUNT(`T1`.`id`)\nFROM `superhero` AS `T1`\nINNER JOIN `hero_power` AS `T2`\n  ON `T1`.`id` = `T2`.`hero_id`\nINNER JOIN `superpower` AS `T3`\n  ON `T2`.`power_id` = `T3`.`id`\nWHERE\n  `T3`.`power_name` = 'Super Strength' AND `T1`.`height_cm` > 200",
    "difficulty": "moderate"
  },
  {
    "question_id": 723,
    "db_id": "superhero",
    "question": "Among the superheroes with blue eyes, how many of them have the super power of \"Agility\"?",
    "evidence": "blue eyes refers to colour = 'Blue' and eye_colour_id = colour.id; super power of \"Agility\" refers to power_name = 'Agility'",
    "SQL": "SELECT\n  COUNT(`T1`.`id`)\nFROM `superhero` AS `T1`\nINNER JOIN `hero_power` AS `T2`\n  ON `T1`.`id` = `T2`.`hero_id`\nINNER JOIN `superpower` AS `T3`\n  ON `T2`.`power_id` = `T3`.`id`\nINNER JOIN `colour` AS `T4`\n  ON `T1`.`eye_colour_id` = `T4`.`id`\nWHERE\n  `T3`.`power_name` = 'Agility' AND `T4`.`colour` = 'Blue'",
    "difficulty": "moderate"
  },
  {
    "question_id": 724,
    "db_id": "superhero",
    "question": "Please list the superhero names of all the superheroes that have blue eyes and blond hair.",
    "evidence": "blue eyes refers to colour = 'Blue' and eye_colour_id = colour.id; blond hair refers to colour = 'Blond' and hair_colour_id = colour.id; super power of \"Agility\" refers to power_name = 'Agility'",
    "SQL": "SELECT\n  `T1`.`superhero_name`\nFROM `superhero` AS `T1`\nINNER JOIN `colour` AS `T2`\n  ON `T1`.`eye_colour_id` = `T2`.`id`\nINNER JOIN `colour` AS `T3`\n  ON `T1`.`hair_colour_id` = `T3`.`id`\nWHERE\n  `T2`.`colour` = 'Blue' AND `T3`.`colour` = 'Blond'",
    "difficulty": "challenging"
  },
  {
    "question_id": 726,
    "db_id": "superhero",
    "question": "Rank heroes published by Marvel Comics by their height in descending order.",
    "evidence": "name refers to superhero_name; the tallest hero refers to MAX(height_cm); published by Marvel Comics refers to publisher_name = 'Marvel Comics'",
    "SQL": "SELECT\n  `superhero_name`,\n  `height_cm`,\n  RANK() OVER (ORDER BY `height_cm` DESC) AS `HeightRank`\nFROM `superhero`\nINNER JOIN `publisher`\n  ON `superhero`.`publisher_id` = `publisher`.`id`\nWHERE\n  `publisher`.`publisher_name` = 'Marvel Comics'",
    "difficulty": "moderate"
  },
  {
    "question_id": 728,
    "db_id": "superhero",
    "question": "Rank superheroes from Marvel Comics by their eye color popularity, starting with the most common color.",
    "evidence": "the superheroes from Marvel Comics refers to publisher_name = 'Marvel Comics'; most common color refers to COUNT(superhero.id) DESC;",
    "SQL": "SELECT\n  `colour`.`colour` AS `EyeColor`,\n  COUNT(`superhero`.`id`) AS `Count`,\n  RANK() OVER (ORDER BY COUNT(`superhero`.`id`) DESC) AS `PopularityRank`\nFROM `superhero`\nINNER JOIN `colour`\n  ON `superhero`.`eye_colour_id` = `colour`.`id`\nINNER JOIN `publisher`\n  ON `superhero`.`publisher_id` = `publisher`.`id`\nWHERE\n  `publisher`.`publisher_name` = 'Marvel Comics'\nGROUP BY\n  `colour`.`colour`",
    "difficulty": "moderate"
  },
  {
    "question_id": 730,
    "db_id": "superhero",
    "question": "List the superheroes from Marvel Comics who have the super power of 'Super Strength'.",
    "evidence": "the superheroes from Marvel Comics refers to publisher_name = 'Marvel Comics'; super power of \"Super Strength\" refers to power_name = 'Super Strength';",
    "SQL": "SELECT\n  `superhero_name`\nFROM `superhero` AS `T1`\nWHERE\n  EXISTS(\n    SELECT\n      1\n    FROM `hero_power` AS `T2`\n    INNER JOIN `superpower` AS `T3`\n      ON `T2`.`power_id` = `T3`.`id`\n    WHERE\n      `T3`.`power_name` = 'Super Strength' AND `T1`.`id` = `T2`.`hero_id`\n  )\n  AND EXISTS(\n    SELECT\n      1\n    FROM `publisher` AS `T4`\n    WHERE\n      `T4`.`publisher_name` = 'Marvel Comics' AND `T1`.`publisher_id` = `T4`.`id`\n  )",
    "difficulty": "challenging"
  },
  {
    "question_id": 732,
    "db_id": "superhero",
    "question": "Which publisher published the slowest superhero?",
    "evidence": "the slowest superhero refers to attribute_name = 'Speed' where MIN(attribute_value); publisher refers to publisher_name",
    "SQL": "SELECT\n  `T2`.`publisher_name`\nFROM `superhero` AS `T1`\nINNER JOIN `publisher` AS `T2`\n  ON `T1`.`publisher_id` = `T2`.`id`\nINNER JOIN `hero_attribute` AS `T3`\n  ON `T1`.`id` = `T3`.`hero_id`\nINNER JOIN `attribute` AS `T4`\n  ON `T3`.`attribute_id` = `T4`.`id`\nWHERE\n  `T4`.`attribute_name` = 'Speed'\nORDER BY\n  `T3`.`attribute_value`\nLIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 733,
    "db_id": "superhero",
    "question": "How many gold-eyed superheroes did Marvel Comics publish?",
    "evidence": "gold-eyed refers to colour = 'Gold' where eye_colour_id = colour.id; superheroes that Marvel Comics published refers to publisher_name = 'Marvel Comics'",
    "SQL": "SELECT\n  COUNT(`T1`.`id`)\nFROM `superhero` AS `T1`\nINNER JOIN `publisher` AS `T2`\n  ON `T1`.`publisher_id` = `T2`.`id`\nINNER JOIN `colour` AS `T3`\n  ON `T1`.`eye_colour_id` = `T3`.`id`\nWHERE\n  `T2`.`publisher_name` = 'Marvel Comics' AND `T3`.`colour` = 'Gold'",
    "difficulty": "moderate"
  },
  {
    "question_id": 736,
    "db_id": "superhero",
    "question": "Who is the dumbest superhero?",
    "evidence": "the dumbest superhero refers to MIN(attribute_value) where attribute_name = 'Intelligence'",
    "SQL": "SELECT\n  `T1`.`superhero_name`\nFROM `superhero` AS `T1`\nINNER JOIN `hero_attribute` AS `T2`\n  ON `T1`.`id` = `T2`.`hero_id`\nINNER JOIN `attribute` AS `T3`\n  ON `T2`.`attribute_id` = `T3`.`id`\nWHERE\n  `T3`.`attribute_name` = 'Intelligence'\nORDER BY\n  `T2`.`attribute_value`\nLIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 737,
    "db_id": "superhero",
    "question": "What is Copycat's race?",
    "evidence": "Copycat is the superhero_name;",
    "SQL": "SELECT\n  `T2`.`race`\nFROM `superhero` AS `T1`\nINNER JOIN `race` AS `T2`\n  ON `T1`.`race_id` = `T2`.`id`\nWHERE\n  `T1`.`superhero_name` = 'Copycat'",
    "difficulty": "simple"
  },
  {
    "question_id": 738,
    "db_id": "superhero",
    "question": "Which superheroes have a durability attribute value of less than 50?",
    "evidence": "durability of less than 50 refers to attribute_name = 'Durability' AND attribute_value < 50",
    "SQL": "SELECT\n  `superhero_name`\nFROM `superhero` AS `T1`\nWHERE\n  EXISTS(\n    SELECT\n      1\n    FROM `hero_attribute` AS `T2`\n    INNER JOIN `attribute` AS `T3`\n      ON `T2`.`attribute_id` = `T3`.`id`\n    WHERE\n      `T3`.`attribute_name` = 'Durability'\n      AND `T2`.`attribute_value` < 50\n      AND `T1`.`id` = `T2`.`hero_id`\n  )",
    "difficulty": "simple"
  },
  {
    "question_id": 739,
    "db_id": "superhero",
    "question": "What are the names of the superheroes with the power of death touch?",
    "evidence": "name of superheroes refers to refers to superhero_name; the power of death touch refers to power_name = 'Death Touch'",
    "SQL": "SELECT\n  `T1`.`superhero_name`\nFROM `superhero` AS `T1`\nINNER JOIN `hero_power` AS `T2`\n  ON `T1`.`id` = `T2`.`hero_id`\nINNER JOIN `superpower` AS `T3`\n  ON `T2`.`power_id` = `T3`.`id`\nWHERE\n  `T3`.`power_name` = 'Death Touch'",
    "difficulty": "moderate"
  },
  {
    "question_id": 740,
    "db_id": "superhero",
    "question": "How many female superheroes have a strength value of 100?",
    "evidence": "female refers to gender = 'Female'; strength value of 100 refers to attribute_name = 'Strength' AND attribute_value = 100",
    "SQL": "SELECT\n  COUNT(`T1`.`id`)\nFROM `superhero` AS `T1`\nINNER JOIN `hero_attribute` AS `T2`\n  ON `T1`.`id` = `T2`.`hero_id`\nINNER JOIN `attribute` AS `T3`\n  ON `T2`.`attribute_id` = `T3`.`id`\nINNER JOIN `gender` AS `T4`\n  ON `T1`.`gender_id` = `T4`.`id`\nWHERE\n  `T3`.`attribute_name` = 'Strength'\n  AND `T2`.`attribute_value` = 100\n  AND `T4`.`gender` = 'Female'",
    "difficulty": "moderate"
  },
  {
    "question_id": 743,
    "db_id": "superhero",
    "question": "What is the percentage of superheroes who act in their own self-interest or make decisions based on their own moral code? Indicate how many of the said superheroes were published by Marvel Comics.",
    "evidence": "published by Marvel Comics refers to publisher_name = 'Marvel Comics'; superheroes who act in their own self-interest or make decisions based on their own moral code refers to alignment = 'Bad'; calculation = MULTIPLY(DIVIDE(SUM(alignment = 'Bad); count(id)), 100)",
    "SQL": "SELECT\n  (\n    CAST(COUNT(*) AS DOUBLE) * 100 / (\n      SELECT\n        COUNT(*)\n      FROM `superhero`\n    )\n  ),\n  CAST(SUM(CASE WHEN `T2`.`publisher_name` = 'Marvel Comics' THEN 1 ELSE 0 END) AS DOUBLE)\nFROM `superhero` AS `T1`\nINNER JOIN `publisher` AS `T2`\n  ON `T1`.`publisher_id` = `T2`.`id`\nINNER JOIN `alignment` AS `T3`\n  ON `T3`.`id` = `T1`.`alignment_id`\nWHERE\n  `T3`.`alignment` = 'Bad'",
    "difficulty": "challenging"
  },
  {
    "question_id": 744,
    "db_id": "superhero",
    "question": "Between DC and Marvel Comics, which publisher has published more superheroes? Find the difference in the number of superheroes they have published.",
    "evidence": "DC refers to publisher_name = 'DC Comics'; Marvel Comics refers to publisher_name = 'Marvel Comics'; calculation = SUBTRACT(SUM(publisher_name = 'Marvel Comics'), SUM(publisher_name = 'DC Comics'))",
    "SQL": "SELECT\n  SUM(CASE WHEN `T2`.`publisher_name` = 'Marvel Comics' THEN 1 ELSE 0 END) - SUM(CASE WHEN `T2`.`publisher_name` = 'DC Comics' THEN 1 ELSE 0 END)\nFROM `superhero` AS `T1`\nINNER JOIN `publisher` AS `T2`\n  ON `T1`.`publisher_id` = `T2`.`id`",
    "difficulty": "challenging"
  },
  {
    "question_id": 745,
    "db_id": "superhero",
    "question": "Give the publisher ID of Star Trek.",
    "evidence": "Star Trek is the publisher_name;",
    "SQL": "SELECT\n  `id`\nFROM `publisher`\nWHERE\n  `publisher_name` = 'Star Trek'",
    "difficulty": "simple"
  },
  {
    "question_id": 747,
    "db_id": "superhero",
    "question": "What is the total number of superheroes without full name?",
    "evidence": "superheroes without full name refers to full_name IS NULL",
    "SQL": "SELECT\n  COUNT(`id`)\nFROM `superhero`\nWHERE\n  `full_name` IS NULL",
    "difficulty": "simple"
  },
  {
    "question_id": 750,
    "db_id": "superhero",
    "question": "What is the average weight of all female superheroes?",
    "evidence": "female refers to gender = 'Female'; average weight refers to AVG(weight_kg)",
    "SQL": "SELECT\n  AVG(`T1`.`weight_kg`)\nFROM `superhero` AS `T1`\nINNER JOIN `gender` AS `T2`\n  ON `T1`.`gender_id` = `T2`.`id`\nWHERE\n  `T2`.`gender` = 'Female'",
    "difficulty": "simple"
  },
  {
    "question_id": 751,
    "db_id": "superhero",
    "question": "List down at least five superpowers of male superheroes.",
    "evidence": "male refers to gender = 'Male'; superpowers refers to power_name;",
    "SQL": "SELECT\n  `T3`.`power_name`\nFROM `superhero` AS `T1`\nINNER JOIN `hero_power` AS `T2`\n  ON `T1`.`id` = `T2`.`hero_id`\nINNER JOIN `superpower` AS `T3`\n  ON `T3`.`id` = `T2`.`power_id`\nINNER JOIN `gender` AS `T4`\n  ON `T4`.`id` = `T1`.`gender_id`\nWHERE\n  `T4`.`gender` = 'Male'\nLIMIT 5",
    "difficulty": "moderate"
  },
  {
    "question_id": 753,
    "db_id": "superhero",
    "question": "Among the superheroes with height from 170 to 190, list the names of the superheroes with no eye color.",
    "evidence": "height from 170 to 190 refers to height_cm BETWEEN 170 AND 190; no eye color refers to colour = 'No Colour'",
    "SQL": "SELECT DISTINCT\n  `T1`.`superhero_name`\nFROM `superhero` AS `T1`\nINNER JOIN `colour` AS `T2`\n  ON `T1`.`eye_colour_id` = `T2`.`id`\nWHERE\n  `T1`.`height_cm` BETWEEN 170 AND 190 AND `T2`.`colour` = 'No Colour'",
    "difficulty": "moderate"
  },
  {
    "question_id": 758,
    "db_id": "superhero",
    "question": "Provide the hair colour of the human superhero who is 185 cm tall.",
    "evidence": "185 cm tall refers to height_cm = 185; human superhero refers to race = 'human'; hair colour refers to colour where hair_colour_id = colour.id;",
    "SQL": "SELECT DISTINCT\n  `T3`.`colour`\nFROM `superhero` AS `T1`\nINNER JOIN `race` AS `T2`\n  ON `T1`.`race_id` = `T2`.`id`\nINNER JOIN `colour` AS `T3`\n  ON `T1`.`hair_colour_id` = `T3`.`id`\nWHERE\n  `T1`.`height_cm` = 185 AND `T2`.`race` = 'Human'",
    "difficulty": "moderate"
  },
  {
    "question_id": 760,
    "db_id": "superhero",
    "question": "In superheroes with height between 150 to 180, what is the percentage of heroes published by Marvel Comics?",
    "evidence": "height between 150 to 180 refers to height_cm BETWEEN 150 AND 180; heroes published by Marvel Comics refers to publisher_name = 'Marvel Comics'; calculation = MULTIPLY(DIVIDE(SUM(publisher.id = 13)), COUNT(publisher.id), 100)",
    "SQL": "SELECT\n  CAST(COUNT(CASE WHEN `T2`.`publisher_name` = 'Marvel Comics' THEN 1 ELSE NULL END) AS DOUBLE) * 100 / COUNT(`T1`.`id`)\nFROM `superhero` AS `T1`\nINNER JOIN `publisher` AS `T2`\n  ON `T1`.`publisher_id` = `T2`.`id`\nWHERE\n  `T1`.`height_cm` BETWEEN 150 AND 180",
    "difficulty": "challenging"
  },
  {
    "question_id": 761,
    "db_id": "superhero",
    "question": "Among the male superheroes, list the super hero names of superheroes with weight greater than the 79% average weight of all superheroes.",
    "evidence": "super hero names refers to superhero_name;male superheros refers to gender = 'Male';Calculation = weight_kg > MULTIPLY(AVG(weight_kg), 0.79)",
    "SQL": "SELECT\n  `T1`.`superhero_name`\nFROM `superhero` AS `T1`\nINNER JOIN `gender` AS `T2`\n  ON `T1`.`gender_id` = `T2`.`id`\nWHERE\n  `T2`.`gender` = 'Male'\n  AND `T1`.`weight_kg` * 100 > (\n    SELECT\n      AVG(`weight_kg`)\n    FROM `superhero`\n  ) * 79",
    "difficulty": "moderate"
  },
  {
    "question_id": 764,
    "db_id": "superhero",
    "question": "What are the superpowers of heroes with ID 1?",
    "evidence": "superpowers refers to power_name; heroes with ID 1 refers to hero_id = 1;",
    "SQL": "SELECT DISTINCT\n  `T2`.`power_name`\nFROM `hero_power` AS `T1`\nINNER JOIN `superpower` AS `T2`\n  ON `T1`.`power_id` = `T2`.`id`\nWHERE\n  `T1`.`hero_id` = 1",
    "difficulty": "simple"
  },
  {
    "question_id": 765,
    "db_id": "superhero",
    "question": "How many heroes have stealth power?",
    "evidence": "stealth power refers to power_name = 'Stealth';",
    "SQL": "SELECT\n  COUNT(`T1`.`hero_id`)\nFROM `hero_power` AS `T1`\nINNER JOIN `superpower` AS `T2`\n  ON `T1`.`power_id` = `T2`.`id`\nWHERE\n  `T2`.`power_name` = 'Stealth'",
    "difficulty": "simple"
  },
  {
    "question_id": 766,
    "db_id": "superhero",
    "question": "What is the hero's full name with the highest attribute in strength?",
    "evidence": "highest attribute in strength refers to MAX(attribute_value) WHERE attribute_name = 'strength';",
    "SQL": "SELECT `T1`.`full_name` FROM `superhero` AS `T1` INNER JOIN `hero_attribute` AS `T2` ON `T1`.`id` = `T2`.`hero_id` INNER JOIN `attribute` AS `T3` ON `T2`.`attribute_id` = `T3`.`id` WHERE `T3`.`attribute_name` = 'Strength' ORDER BY `T2`.`attribute_value` DESC LIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 769,
    "db_id": "superhero",
    "question": "Which superhero has the most durability published by Dark Horse Comics?",
    "evidence": "which superhero refers to superhero_name; most durability refers to MAX(attribute_value) WHERE attribute_name = 'durability'; published by Dark Horse Comics refers to publisher_name = 'Dark Horse Comics';",
    "SQL": "SELECT\n  `T1`.`superhero_name`\nFROM `superhero` AS `T1`\nINNER JOIN `hero_attribute` AS `T2`\n  ON `T1`.`id` = `T2`.`hero_id`\nINNER JOIN `attribute` AS `T3`\n  ON `T3`.`id` = `T2`.`attribute_id`\nINNER JOIN `publisher` AS `T4`\n  ON `T4`.`id` = `T1`.`publisher_id`\nWHERE\n  `T4`.`publisher_name` = 'Dark Horse Comics'\n  AND `T3`.`attribute_name` = 'Durability'\nORDER BY\n  `T2`.`attribute_value` DESC\nLIMIT 1",
    "difficulty": "challenging"
  },
  {
    "question_id": 772,
    "db_id": "superhero",
    "question": "List the eyes, hair and skin colour of all female superheroes published by Dark Horse Comics.",
    "evidence": "eyes refers to eye_colour_id; hair refers to hair_colour_id; skin colour refers to skin_colour_id; female superheroes refers to gender = 'Female'; published by Dark Horse Comics refers to publisher_name = 'Dark Horse Comics';",
    "SQL": "SELECT\n  `T1`.`eye_colour_id`,\n  `T1`.`hair_colour_id`,\n  `T1`.`skin_colour_id`\nFROM `superhero` AS `T1`\nINNER JOIN `publisher` AS `T2`\n  ON `T2`.`id` = `T1`.`publisher_id`\nINNER JOIN `gender` AS `T3`\n  ON `T3`.`id` = `T1`.`gender_id`\nWHERE\n  `T2`.`publisher_name` = 'Dark Horse Comics' AND `T3`.`gender` = 'Female'",
    "difficulty": "challenging"
  },
  {
    "question_id": 773,
    "db_id": "superhero",
    "question": "Which superhero has the same eyes, hair and skin colour? Indicate the publisher of the superhero.",
    "evidence": "which superhero refers to superhero_name; the same eyes, hair and skin colour refers to hair_colour_id = skin_colour_id AND hair_colour_id = eye_colour_id; publisher refers to publisher_name;",
    "SQL": "SELECT\n  `T1`.`superhero_name`,\n  `T2`.`publisher_name`\nFROM `superhero` AS `T1`\nINNER JOIN `publisher` AS `T2`\n  ON `T1`.`publisher_id` = `T2`.`id`\nWHERE\n  `T1`.`eye_colour_id` = `T1`.`hair_colour_id`\n  AND `T1`.`eye_colour_id` = `T1`.`skin_colour_id`",
    "difficulty": "challenging"
  },
  {
    "question_id": 775,
    "db_id": "superhero",
    "question": "What is the percentage of blue female superheroes among all female superheroes?",
    "evidence": "percentage = MULTIPLY(DIVIDE(SUM(colour = 'Blue' WHERE gender = 'Female'), COUNT(gender = 'Female')), 100); blue refers to the color = 'Blue' WHERE skin_colour_id = colour.id; female refers to gender = 'Female';",
    "SQL": "SELECT\n  CAST(COUNT(CASE WHEN `T3`.`colour` = 'Blue' THEN `T1`.`id` ELSE NULL END) AS DOUBLE) * 100 / COUNT(`T1`.`id`)\nFROM `superhero` AS `T1`\nINNER JOIN `gender` AS `T2`\n  ON `T1`.`gender_id` = `T2`.`id`\nINNER JOIN `colour` AS `T3`\n  ON `T1`.`skin_colour_id` = `T3`.`id`\nWHERE\n  `T2`.`gender` = 'Female'",
    "difficulty": "challenging"
  },
  {
    "question_id": 779,
    "db_id": "superhero",
    "question": "How many powers does Amazo hero have?",
    "evidence": "Amazo hero refers to superhero_name = 'Amazo';",
    "SQL": "SELECT\n  COUNT(`T1`.`power_id`)\nFROM `hero_power` AS `T1`\nINNER JOIN `superhero` AS `T2`\n  ON `T1`.`hero_id` = `T2`.`id`\nWHERE\n  `T2`.`superhero_name` = 'Amazo'",
    "difficulty": "simple"
  },
  {
    "question_id": 781,
    "db_id": "superhero",
    "question": "Provide the heights of the heroes whose eye colours are amber.",
    "evidence": "heights of the heroes refers to height_cm; eye colours are amber refers to colour.colour = 'Amber' WHERE eye_colour_id = colour.id;",
    "SQL": "SELECT\n  `T1`.`height_cm`\nFROM `superhero` AS `T1`\nINNER JOIN `colour` AS `T2`\n  ON `T1`.`eye_colour_id` = `T2`.`id`\nWHERE\n  `T2`.`colour` = 'Amber'",
    "difficulty": "simple"
  },
  {
    "question_id": 782,
    "db_id": "superhero",
    "question": "List the heroes' names whose eyes and hair colours are both black.",
    "evidence": "heroes' names refers to superhero_name; eyes and hair colours are both black refers to eye_colour_id AND hair_colour_id WHERE colour.colour = 'Black';",
    "SQL": "SELECT\n  `T1`.`superhero_name`\nFROM `superhero` AS `T1`\nINNER JOIN `colour` AS `T2`\n  ON `T1`.`eye_colour_id` = `T2`.`id` AND `T1`.`hair_colour_id` = `T2`.`id`\nWHERE\n  `T2`.`colour` = 'Black'",
    "difficulty": "moderate"
  },
  {
    "question_id": 785,
    "db_id": "superhero",
    "question": "Describe the names of neutral alignment superheroes.",
    "evidence": "names of superheroes refers to superhero_name; neutral alignment refers to alignment = 'Neutral';",
    "SQL": "SELECT\n  `T1`.`superhero_name`\nFROM `superhero` AS `T1`\nINNER JOIN `alignment` AS `T2`\n  ON `T1`.`alignment_id` = `T2`.`id`\nWHERE\n  `T2`.`alignment` = 'Neutral'",
    "difficulty": "simple"
  },
  {
    "question_id": 786,
    "db_id": "superhero",
    "question": "How many heroes have the highest attribute value in strength?",
    "evidence": "highest attribute value in strength refers to MAX(attribute_value) WHERE attribute_name = 'Strength';",
    "SQL": "SELECT\n  COUNT(`T1`.`hero_id`)\nFROM `hero_attribute` AS `T1`\nINNER JOIN `attribute` AS `T2`\n  ON `T1`.`attribute_id` = `T2`.`id`\nWHERE\n  `T2`.`attribute_name` = 'Strength'\n  AND `T1`.`attribute_value` = (\n    SELECT\n      MAX(`attribute_value`)\n    FROM `hero_attribute`\n  )",
    "difficulty": "moderate"
  },
  {
    "question_id": 788,
    "db_id": "superhero",
    "question": "How many percent of female heroes were published by Marvel Comics?",
    "evidence": "percent = MULTIPLY(DIVIDE(SUM(gender = 'Female' WHERE publisher_name = 'Marvel Comics'), COUNT(publisher_name = 'Marvel Comics')), 100); female heroes refers to gender = 'Female'; Marvel Comics refers to publisher_name = 'Marvel Comics';",
    "SQL": "SELECT\n  CAST(COUNT(CASE WHEN `T2`.`publisher_name` = 'Marvel Comics' AND `T3`.`gender` = 'Female' THEN 1 ELSE NULL END) AS DOUBLE) * 100 / COUNT(CASE WHEN `T2`.`publisher_name` = 'Marvel Comics' THEN 1 ELSE NULL END)\nFROM `superhero` AS `T1`\nINNER JOIN `publisher` AS `T2`\n  ON `T1`.`publisher_id` = `T2`.`id`\nINNER JOIN `gender` AS `T3`\n  ON `T1`.`gender_id` = `T3`.`id`\n",
    "difficulty": "challenging"
  },
  {
    "question_id": 790,
    "db_id": "superhero",
    "question": "Calculate the difference between Emil Blonsky's weight and Charles Chandler's weight.",
    "evidence": "difference = SUBTRACT(SUM(weight_kg WHERE full_name = 'Emil Blonsky'), SUM(weight_kg WHERE full_name = 'Charles Chandler')); Emil Blonsky is the full name of superhero; Charles Chandler is the full name of superhero;",
    "SQL": "SELECT\n  (\n    SELECT\n      `weight_kg`\n    FROM `superhero`\n    WHERE\n      `full_name` LIKE 'Emil Blonsky'\n  ) - (\n    SELECT\n      `weight_kg`\n    FROM `superhero`\n    WHERE\n      `full_name` LIKE 'Charles Chandler'\n  ) AS `CALCULATE`",
    "difficulty": "moderate"
  },
  {
    "question_id": 791,
    "db_id": "superhero",
    "question": "Calculate the average height for all superhero.",
    "evidence": "average = DIVIDE(SUM(height_cm), COUNT(all heros));",
    "SQL": "SELECT\n  CAST(SUM(`height_cm`) AS DOUBLE) / COUNT(`id`)\nFROM `superhero`",
    "difficulty": "simple"
  },
  {
    "question_id": 792,
    "db_id": "superhero",
    "question": "What is Abomination's superpower?",
    "evidence": "Abomination refers to superhero_name = 'Abomination'; superpower refers to power_name;",
    "SQL": "SELECT\n  `T3`.`power_name`\nFROM `superhero` AS `T1`\nINNER JOIN `hero_power` AS `T2`\n  ON `T1`.`id` = `T2`.`hero_id`\nINNER JOIN `superpower` AS `T3`\n  ON `T2`.`power_id` = `T3`.`id`\nWHERE\n  `T1`.`superhero_name` = 'Abomination'",
    "difficulty": "simple"
  },
  {
    "question_id": 794,
    "db_id": "superhero",
    "question": "Which hero was the fastest?",
    "evidence": "which hero refers to superhero_name; fastest refers to MAX(attribute_value) WHERE attribute_name = 'Speed';",
    "SQL": "SELECT `T1`.`superhero_name` FROM `superhero` AS `T1` INNER JOIN `hero_attribute` AS `T2` ON `T1`.`id` = `T2`.`hero_id` INNER JOIN `attribute` AS `T3` ON `T2`.`attribute_id` = `T3`.`id`WHERE `T3`.`attribute_name` = 'Speed' AND `T2`.`attribute_value` = (SELECT MAX(`attribute_value`) FROM `hero_attribute` AS `T2b` WHERE `T2b`.`attribute_id` = `T3`.`id` ) LIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 796,
    "db_id": "superhero",
    "question": "State all of 3-D Man's attributes along with their values.",
    "evidence": "3-D Man is the superhero_name. attributes refers to attribute_name; values refers to attribute_value;",
    "SQL": "SELECT\n  `T3`.`attribute_name`,\n  `T2`.`attribute_value`\nFROM `superhero` AS `T1`\nINNER JOIN `hero_attribute` AS `T2`\n  ON `T1`.`id` = `T2`.`hero_id`\nINNER JOIN `attribute` AS `T3`\n  ON `T2`.`attribute_id` = `T3`.`id`\nWHERE\n  `T1`.`superhero_name` = '3-D Man'",
    "difficulty": "moderate"
  },
  {
    "question_id": 797,
    "db_id": "superhero",
    "question": "Which superheroes have blue eyes with brown hair?",
    "evidence": "which superheroes refers to superhero_name; blue eyes refers to color = 'Blue' and color.id = eye_colour_id; brown hair refers to color = 'Brown' and color.id = hair_colour_id;",
    "SQL": "SELECT\n  `T1`.`superhero_name`\nFROM `superhero` AS `T1`\nINNER JOIN `colour` AS `T2`\n  ON `T1`.`eye_colour_id` = `T2`.`id`\nINNER JOIN `colour` AS `T3`\n  ON `T1`.`hair_colour_id` = `T3`.`id`\nWHERE\n  `T2`.`colour` = 'Blue' AND `T3`.`colour` = 'Brown'",
    "difficulty": "moderate"
  },
  {
    "question_id": 798,
    "db_id": "superhero",
    "question": "What is the publisher for Hawkman, Karate Kid and Speedy?",
    "evidence": "publisher refers to publisher_name; Hawkman refers to superhero_name = 'Hawkman'; Karate Kid refers to superhero_name = 'Karate Kid'; Speedy refers to superhero_name = 'Speedy';",
    "SQL": "SELECT\n  `T2`.`publisher_name`\nFROM `superhero` AS `T1`\nINNER JOIN `publisher` AS `T2`\n  ON `T1`.`publisher_id` = `T2`.`id`\nWHERE\n  `T1`.`superhero_name` IN ('Hawkman', 'Karate Kid', 'Speedy')",
    "difficulty": "moderate"
  },
  {
    "question_id": 800,
    "db_id": "superhero",
    "question": "Calculate the percentage of superheroes with blue eyes.",
    "evidence": "percentage = MULTIPLY(DIVIDE(SUM(superhero_name WHERE color = 'Blue'), COUNT(superhero_name)), 100.0); blue eyes refers to color = 'Blue' and color.id =  eye_colour_id = 7;",
    "SQL": "SELECT\n  CAST(COUNT(CASE WHEN `T2`.`colour` = 'Blue' THEN 1 ELSE NULL END) AS DOUBLE) * 100 / COUNT(`T1`.`id`)\nFROM `superhero` AS `T1`\nINNER JOIN `colour` AS `T2`\n  ON `T1`.`eye_colour_id` = `T2`.`id`",
    "difficulty": "moderate"
  },
  {
    "question_id": 801,
    "db_id": "superhero",
    "question": "Find the ratio between male superheroes and female superheroes.",
    "evidence": "ratio = DIVIDE(SUM(gender_id = 1) / SUM(gender_id = 2)); male superheroes refers to gender = 'Female'; female superheroes refers to gender = 'Male';",
    "SQL": "SELECT\n  CAST(COUNT(CASE WHEN `T2`.`gender` = 'Male' THEN `T1`.`id` ELSE NULL END) AS DOUBLE) / COUNT(CASE WHEN `T2`.`gender` = 'Female' THEN `T1`.`id` ELSE NULL END)\nFROM `superhero` AS `T1`\nINNER JOIN `gender` AS `T2`\n  ON `T1`.`gender_id` = `T2`.`id`",
    "difficulty": "moderate"
  },
  {
    "question_id": 806,
    "db_id": "superhero",
    "question": "Provide the eye colour of the superhero who has Karen Beecher-Duncan as their full name.",
    "evidence": "eye colour refers to colour.colour where eye_colour_id = colour.id; Karen Beecher-Duncan is the full name of superhero;",
    "SQL": "SELECT\n  `T2`.`colour`\nFROM `superhero` AS `T1`\nINNER JOIN `colour` AS `T2`\n  ON `T1`.`eye_colour_id` = `T2`.`id`\nWHERE\n  `T1`.`full_name` = 'Karen Beecher-Duncan'",
    "difficulty": "simple"
  },
  {
    "question_id": 819,
    "db_id": "superhero",
    "question": "In superheroes with missing weight data, calculate the difference between the number of superheroes with blue eyes and no eye color.",
    "evidence": "missing weight data refers to weight_kg = 0 OR T1.weight_kg = NULL; difference = SUBTRACT(SUM(colour.id = 7), SUM(colour.id = 1)); blue eyes refers to eye_colour_id WHERE colour.id = 7; no eye color refers to eye_colour_id WHERE colour.id = 1;",
    "SQL": "SELECT\n  SUM(CASE WHEN `T2`.`id` = 7 THEN 1 ELSE 0 END) - SUM(CASE WHEN `T2`.`id` = 1 THEN 1 ELSE 0 END)\nFROM `superhero` AS `T1`\nINNER JOIN `colour` AS `T2`\n  ON `T1`.`eye_colour_id` = `T2`.`id`\nWHERE\n  `T1`.`weight_kg` = 0 OR `T1`.`weight_kg` IS NULL",
    "difficulty": "challenging"
  },
  {
    "question_id": 822,
    "db_id": "superhero",
    "question": "How many green-skinned villains are there in the superhero universe?",
    "evidence": "green-skinned refers to colour.colour = 'Green' WHERE skin_colour_id = colour.id; villains refers to alignment = 'Bad';",
    "SQL": "SELECT\n  COUNT(`T1`.`id`)\nFROM `superhero` AS `T1`\nINNER JOIN `alignment` AS `T2`\n  ON `T1`.`alignment_id` = `T2`.`id`\nINNER JOIN `colour` AS `T3`\n  ON `T1`.`skin_colour_id` = `T3`.`id`\nWHERE\n  `T2`.`alignment` = 'Bad' AND `T3`.`colour` = 'Green'",
    "difficulty": "moderate"
  },
  {
    "question_id": 824,
    "db_id": "superhero",
    "question": "Identify superheroes who can control wind and list their names in alphabetical order.",
    "evidence": "superheroes refers to superhero_name; can control wind refers to power_name = 'Wind Control';",
    "SQL": "SELECT\n  `T1`.`superhero_name`\nFROM `superhero` AS `T1`\nINNER JOIN `hero_power` AS `T2`\n  ON `T1`.`id` = `T2`.`hero_id`\nINNER JOIN `superpower` AS `T3`\n  ON `T2`.`power_id` = `T3`.`id`\nWHERE\n  `T3`.`power_name` = 'Wind Control'\nORDER BY\n  `T1`.`superhero_name`",
    "difficulty": "moderate"
  },
  {
    "question_id": 825,
    "db_id": "superhero",
    "question": "Identify the gender of the superhero who has the ability of Phoenix Force.",
    "evidence": "ability of Phoenix Force refers to power_name = 'Phoenix Force';",
    "SQL": "SELECT\n  `T4`.`gender`\nFROM `superhero` AS `T1`\nINNER JOIN `hero_power` AS `T2`\n  ON `T1`.`id` = `T2`.`hero_id`\nINNER JOIN `superpower` AS `T3`\n  ON `T2`.`power_id` = `T3`.`id`\nINNER JOIN `gender` AS `T4`\n  ON `T1`.`gender_id` = `T4`.`id`\nWHERE\n  `T3`.`power_name` = 'Phoenix Force'",
    "difficulty": "moderate"
  },
  {
    "question_id": 829,
    "db_id": "superhero",
    "question": "Which publisher created more superheroes: DC or Marvel Comics? Find the difference in the number of superheroes.",
    "evidence": "DC refers to publisher_name = 'DC Comics'; Marvel Comics refers to publisher_name = 'Marvel Comics'; difference = SUBTRACT(SUM(publisher_name = 'DC Comics'), SUM(publisher_name = 'Marvel Comics'));",
    "SQL": "SELECT\n  SUM(CASE WHEN `T2`.`publisher_name` = 'DC Comics' THEN 1 ELSE 0 END) - SUM(CASE WHEN `T2`.`publisher_name` = 'Marvel Comics' THEN 1 ELSE 0 END)\nFROM `superhero` AS `T1`\nINNER JOIN `publisher` AS `T2`\n  ON `T1`.`publisher_id` = `T2`.`id`",
    "difficulty": "challenging"
  },
  {
    "question_id": 531,
    "db_id": "codebase_community",
    "question": "Which user has a higher reputation, Harlan or Jarrod Dixon?",
    "evidence": "\"Harlan\" and \"Jarrod Dixon\" are both DisplayName; highest reputation refers to Max(Reputation)",
    "SQL": "SELECT\n  `DisplayName`\nFROM `users`\nWHERE\n  `DisplayName` IN ('Harlan', 'Jarrod Dixon')\n  AND `Reputation` = (\n    SELECT\n      MAX(`Reputation`)\n    FROM `users`\n    WHERE\n      `DisplayName` IN ('Harlan', 'Jarrod Dixon')\n  )",
    "difficulty": "simple"
  },
  {
    "question_id": 532,
    "db_id": "codebase_community",
    "question": "Please list the display names of all the users whose accounts were created in the year 2011.",
    "evidence": "account created in the year 2011 refers to year(CreationDate) = 2011",
    "SQL": "SELECT\n  `DisplayName`\nFROM `users`\nWHERE\n  DATE_FORMAT(CAST(`CreationDate` AS DATETIME), '%Y') = '2011'",
    "difficulty": "simple"
  },
  {
    "question_id": 533,
    "db_id": "codebase_community",
    "question": "How many users last accessed the website after 2014/9/1?",
    "evidence": "last accessed after 2014/9/1 refers to LastAccessDate > '2014-09-01'",
    "SQL": "SELECT\n  COUNT(`Id`)\nFROM `users`\nWHERE\n  DATE(`LastAccessDate`) > '2014-09-01'",
    "difficulty": "simple"
  },
  {
    "question_id": 539,
    "db_id": "codebase_community",
    "question": "Who is the owner of the post \"Eliciting priors from experts\"?",
    "evidence": "\"Eliciting priors from experts\" is the Title of post; owner refers to DisplayName",
    "SQL": "SELECT\n  `T2`.`DisplayName`\nFROM `posts` AS `T1`\nINNER JOIN `users` AS `T2`\n  ON `T1`.`OwnerUserId` = `T2`.`Id`\nWHERE\n  `T1`.`Title` = 'Eliciting priors from experts'",
    "difficulty": "simple"
  },
  {
    "question_id": 537,
    "db_id": "codebase_community",
    "question": "How many posts does the user csgillespie own?",
    "evidence": "\"csgillespie\" is the DisplayName of user",
    "SQL": "SELECT\n  COUNT(`T1`.`id`)\nFROM `posts` AS `T1`\nINNER JOIN `users` AS `T2`\n  ON `T1`.`OwnerUserId` = `T2`.`Id`\nWHERE\n  `T2`.`DisplayName` = 'csgillespie'",
    "difficulty": "simple"
  },
  {
    "question_id": 544,
    "db_id": "codebase_community",
    "question": "What is the display name of the user who last edited the post \"Examples for teaching: Correlation does not mean causation\"?",
    "evidence": "\"Examples for teaching: Correlation does not mean causation\" is the Title of post; user who last edited refers to LastEditorUserId",
    "SQL": "SELECT\n  `T2`.`DisplayName`\nFROM `posts` AS `T1`\nINNER JOIN `users` AS `T2`\n  ON `T1`.`LastEditorUserId` = `T2`.`Id`\nWHERE\n  `T1`.`Title` = 'Examples for teaching: Correlation does not mean causation'",
    "difficulty": "moderate"
  },
  {
    "question_id": 547,
    "db_id": "codebase_community",
    "question": "Among the posts owned by an elder user, how many of them have a score of over 19?",
    "evidence": "elder users refers to Age > 65; Score of over 19 refers to Score > = 20",
    "SQL": "SELECT\n  COUNT(`T1`.`Id`)\nFROM `posts` AS `T1`\nINNER JOIN `users` AS `T2`\n  ON `T1`.`OwnerUserId` = `T2`.`Id`\nWHERE\n  `T1`.`Score` >= 20 AND `T2`.`Age` > 65",
    "difficulty": "simple"
  },
  {
    "question_id": 549,
    "db_id": "codebase_community",
    "question": "From which post is the tag \"bayesian\" excerpted from? Please give the body of the post.",
    "evidence": "\"bayesian\" is the TagName; excerpt from refers to ExcerptPostId",
    "SQL": "SELECT\n  `T2`.`Body`\nFROM `tags` AS `T1`\nINNER JOIN `posts` AS `T2`\n  ON `T2`.`Id` = `T1`.`ExcerptPostId`\nWHERE\n  `T1`.`TagName` = 'bayesian'",
    "difficulty": "simple"
  },
  {
    "question_id": 555,
    "db_id": "codebase_community",
    "question": "What is the average score of the posts owned by the user csgillespie?",
    "evidence": "\"csgillespie\" is the DisplayName of user; average score refers to AVG(Score)",
    "SQL": "SELECT\n  AVG(`T1`.`Score`)\nFROM `posts` AS `T1`\nINNER JOIN `users` AS `T2`\n  ON `T1`.`OwnerUserId` = `T2`.`Id`\nWHERE\n  `T2`.`DisplayName` = 'csgillespie'",
    "difficulty": "simple"
  },
  {
    "question_id": 557,
    "db_id": "codebase_community",
    "question": "Among the posts with a score of over 5, what is the percentage of them being owned by an elder user?",
    "evidence": "score of over 5 refers to Score > 5; elder user refers to Age > 65; percentage = Divide (Count(Id where Age>65), Count(Id)) * 100",
    "SQL": "SELECT\n  CAST(SUM(CASE WHEN `T2`.`Age` > 65 THEN 1 ELSE 0 END) AS DOUBLE) * 100 / COUNT(`T1`.`Id`)\nFROM `posts` AS `T1`\nINNER JOIN `users` AS `T2`\n  ON `T1`.`OwnerUserId` = `T2`.`Id`\nWHERE\n  `T1`.`Score` > 5",
    "difficulty": "moderate"
  },
  {
    "question_id": 563,
    "db_id": "codebase_community",
    "question": "User No.3025 gave a comment at 20:29:39 on 2014/4/23 to a post, how many favorite counts did that post get?",
    "evidence": "user no. 3025 refers to UserId = '3025'; comment at 20:29:39 on 2014/4/23 refers to CreationDate = '2014/4/23 20:29:39.0'",
    "SQL": "SELECT\n  `T1`.`FavoriteCount`\nFROM `posts` AS `T1`\nINNER JOIN `comments` AS `T2`\n  ON `T1`.`Id` = `T2`.`PostId`\nWHERE\n  `T2`.`CreationDate` = '2014-04-23 20:29:39.0' AND `T2`.`UserId` = 3025",
    "difficulty": "moderate"
  },
  {
    "question_id": 565,
    "db_id": "codebase_community",
    "question": "User No.23853 gave a comment to a post at 9:08:18 on 2013/7/12, was that post well-finished?",
    "evidence": "user no. 23853 refers to UserId = '23853'; at 9:08:18 on 2013/7/12 refers to CreationDate = '2013-07-12 09:08:18.0'; not well-finished refers to ClosedDate IS NULL and vice versa",
    "SQL": "SELECT\n  CASE\n    WHEN `T2`.`ClosedDate` IS NULL\n    THEN 'NOT well-finished'\n    ELSE 'well-finished'\n  END AS `resylt`\nFROM `comments` AS `T1`\nINNER JOIN `posts` AS `T2`\n  ON `T1`.`PostId` = `T2`.`Id`\nWHERE\n  `T1`.`UserId` = 23853 AND `T1`.`CreationDate` = '2013-07-12 09:08:18.0'",
    "difficulty": "moderate"
  },
  {
    "question_id": 567,
    "db_id": "codebase_community",
    "question": "For the user with the display name of \"Tiago Pasqualini\", how many posts did he/she own?",
    "evidence": "\"Tiago Pasqualini\" is the DisplayName;",
    "SQL": "SELECT\n  COUNT(`T1`.`Id`)\nFROM `users` AS `T1`\nINNER JOIN `posts` AS `T2`\n  ON `T1`.`Id` = `T2`.`OwnerUserId`\nWHERE\n  `T1`.`DisplayName` = 'Tiago Pasqualini'",
    "difficulty": "simple"
  },
  {
    "question_id": 568,
    "db_id": "codebase_community",
    "question": "Provide the display name of the user who made the vote No.6347.",
    "evidence": "vote no. 6347 refers to Id = '6347'",
    "SQL": "SELECT\n  `T1`.`DisplayName`\nFROM `users` AS `T1`\nINNER JOIN `votes` AS `T2`\n  ON `T1`.`Id` = `T2`.`UserId`\nWHERE\n  `T2`.`Id` = 6347",
    "difficulty": "simple"
  },
  {
    "question_id": 571,
    "db_id": "codebase_community",
    "question": "For the user No.24, how many times is the number of his/her posts compared to his/her votes?",
    "evidence": "user no. 24 refers to UserId = OwnerUserId = '24'; times of his/her post than votes = Divide (Count(post.Id), Count(votes.Id))",
    "SQL": "SELECT\n  CAST(COUNT(DISTINCT `T2`.`Id`) AS DOUBLE) / COUNT(DISTINCT `T1`.`Id`)\nFROM `votes` AS `T1`\nINNER JOIN `posts` AS `T2`\n  ON `T1`.`UserId` = `T2`.`OwnerUserId`\nWHERE\n  `T1`.`UserId` = 24",
    "difficulty": "moderate"
  },
  {
    "question_id": 572,
    "db_id": "codebase_community",
    "question": "How many views did the post titled 'Integration of Weka and/or RapidMiner into Informatica PowerCenter/Developer' get?",
    "evidence": "\"Integration of Weka and/or RapidMiner into Informatica PowerCenter/Developer\" is the Title of post; views refers to ViewCount",
    "SQL": "SELECT\n  `ViewCount`\nFROM `posts`\nWHERE\n  `Title` = 'Integration of Weka and/or RapidMiner into Informatica PowerCenter/Developer'",
    "difficulty": "moderate"
  },
  {
    "question_id": 573,
    "db_id": "codebase_community",
    "question": "Write the contents of comments with a score of 17.",
    "evidence": "score of 17 refers to Score = 17; contents of comments refers to Text",
    "SQL": "SELECT\n  `Text`\nFROM `comments`\nWHERE\n  `Score` = 17",
    "difficulty": "simple"
  },
  {
    "question_id": 576,
    "db_id": "codebase_community",
    "question": "Name the user that commented 'thank you user93!'",
    "evidence": "\"thank you user93\" is the Text of comment; user refers to DisplayName",
    "SQL": "SELECT\n  `T1`.`DisplayName`\nFROM `users` AS `T1`\nINNER JOIN `comments` AS `T2`\n  ON `T1`.`Id` = `T2`.`UserId`\nWHERE\n  `T2`.`Text` = 'thank you user93!'",
    "difficulty": "simple"
  },
  {
    "question_id": 578,
    "db_id": "codebase_community",
    "question": "Which user made a post titled 'Understanding what Dassault iSight is doing?' and how much is the reputation of the user?",
    "evidence": "\"Understanding what Dassault iSight is doing?\" is the Title of post; user refers to DisplayName;",
    "SQL": "SELECT\n  `T1`.`DisplayName`,\n  `T1`.`Reputation`\nFROM `users` AS `T1`\nINNER JOIN `posts` AS `T2`\n  ON `T1`.`Id` = `T2`.`OwnerUserId`\nWHERE\n  `T2`.`Title` = 'Understanding what Dassault iSight is doing?'",
    "difficulty": "moderate"
  },
  {
    "question_id": 581,
    "db_id": "codebase_community",
    "question": "Who is the owner of the post titled 'Open source tools for visualizing multi-dimensional data?'",
    "evidence": "'Open source tools for visualizing multi-dimensional data' is the Title of Post; owner refers to DisplayName;",
    "SQL": "SELECT\n  `T2`.`DisplayName`\nFROM `posts` AS `T1`\nINNER JOIN `users` AS `T2`\n  ON `T1`.`OwnerUserId` = `T2`.`Id`\nWHERE\n  `T1`.`Title` = 'Open source tools for visualizing multi-dimensional data?'",
    "difficulty": "moderate"
  },
  {
    "question_id": 584,
    "db_id": "codebase_community",
    "question": "Write all the comments left by users who edited the post titled 'Why square the difference instead of taking the absolute value in standard deviation?'",
    "evidence": "\"Why square the difference instead of taking the absolute value in standard deviation?\" is the Title of post;",
    "SQL": "SELECT\n  `T2`.`Comment`\nFROM `posts` AS `T1`\nINNER JOIN `postHistory` AS `T2`\n  ON `T1`.`Id` = `T2`.`PostId`\nWHERE\n  `T1`.`Title` = 'Why square the difference instead of taking the absolute value in standard deviation?'",
    "difficulty": "moderate"
  },
  {
    "question_id": 586,
    "db_id": "codebase_community",
    "question": "Which user added a bounty amount of 50 to the post title mentioning variance?",
    "evidence": "bounty amount of 50 refers to BountyAmount = 50; user refers to DisplayName; title mentioning variance refers to Title include 'variance'",
    "SQL": "SELECT\n  `T3`.`DisplayName`,\n  `T1`.`Title`\nFROM `posts` AS `T1`\nINNER JOIN `votes` AS `T2`\n  ON `T1`.`Id` = `T2`.`PostId`\nINNER JOIN `users` AS `T3`\n  ON `T3`.`Id` = `T2`.`UserId`\nWHERE\n  `T2`.`BountyAmount` = 50 AND `T1`.`Title` LIKE '%variance%'",
    "difficulty": "challenging"
  },
  {
    "question_id": 587,
    "db_id": "codebase_community",
    "question": "Calculate the average view count of each post tagged as 'humor' and list the title and the comment of each post.",
    "evidence": "tagged as  'humor' refers to tag = '<humor>'; comment of the post refers to Text; average view count = AVG(ViewCount)",
    "SQL": "SELECT AVG(T2.ViewCount) AS average_view_count, T2.Title, T1.Text FROM comments AS T1 INNER JOIN posts AS T2 ON T2.Id = T1.PostId WHERE T2.Tags = '<humor>' GROUP BY T2.Title, T1.Text",
    "difficulty": "moderate"
  },
  {
    "question_id": 592,
    "db_id": "codebase_community",
    "question": "How many users are awarded with more than 5 badges?",
    "evidence": "more than 5 badges refers to Count (Name) > 5; user refers to UserId",
    "SQL": "SELECT\n  COUNT(`UserId`)\nFROM (\n  SELECT\n    `UserId`,\n    COUNT(`Name`) AS `num`\n  FROM `badges`\n  GROUP BY\n    `UserId`\n) AS `T`\nWHERE\n  `T`.`num` > 5",
    "difficulty": "simple"
  },
  {
    "question_id": 595,
    "db_id": "codebase_community",
    "question": "Which user have only one post history per post and having at least 1000 views?",
    "evidence": "having at least 1000 view refers to Views > = 1000; user refers to UserId",
    "SQL": "SELECT\n  `T2`.`UserId`\nFROM `users` AS `T1`\nINNER JOIN `postHistory` AS `T2`\n  ON `T1`.`Id` = `T2`.`UserId`\nINNER JOIN `posts` AS `T3`\n  ON `T2`.`PostId` = `T3`.`Id`\nWHERE\n  `T3`.`ViewCount` >= 1000\nGROUP BY\n  `T2`.`UserId`\nHAVING\n  COUNT(DISTINCT `T2`.`PostHistoryTypeId`) = 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 598,
    "db_id": "codebase_community",
    "question": "What is the percentage difference of student badges given during 2010 and 2011?",
    "evidence": "student badges refers to badge's name = 'Student'; during 2010 refers to Year(Date) = 2010; during 2011 refers to Year(Date) = 2011; percentage difference = Subtract (Divide(Count(Name where Year(Date) = 2010), Count (Name)) *100, Divide(Count(Name where Year(Date) = 2011), Count(Name)) * 100)",
    "SQL": "SELECT\n  CAST(SUM(CASE WHEN DATE_FORMAT(CAST(`Date` AS DATETIME), '%Y') = '2010' THEN 1 ELSE 0 END) AS DOUBLE) * 100 / COUNT(`Id`) - CAST(SUM(CASE WHEN DATE_FORMAT(CAST(`Date` AS DATETIME), '%Y') = '2011' THEN 1 ELSE 0 END) AS DOUBLE) * 100 / COUNT(`Id`)\nFROM `badges`\nWHERE\n  `Name` = 'Student'",
    "difficulty": "challenging"
  },
  {
    "question_id": 604,
    "db_id": "codebase_community",
    "question": "What is the average of the up votes and the average user age for users creating more than 10 posts?",
    "evidence": "creating more than 10 post refers to Count (UserId) > 10; average of the up votes = Divide (Sum(UpVotes), Count (UserId)); average age = Divide (Sum(Age), Count(UserId))",
    "SQL": "SELECT\n  AVG(`T1`.`UpVotes`),\n  AVG(`T1`.`Age`)\nFROM `users` AS `T1`\nINNER JOIN (\n  SELECT\n    `OwnerUserId`,\n    COUNT(*) AS `post_count`\n  FROM `posts`\n  GROUP BY\n    `OwnerUserId`\n  HAVING\n    `post_count` > 10\n) AS `T2`\n  ON `T1`.`Id` = `T2`.`OwnerUserId`",
    "difficulty": "moderate"
  },
  {
    "question_id": 629,
    "db_id": "codebase_community",
    "question": "Calculate the ratio of votes in 2010 and 2011.",
    "evidence": "DIVIDE(COUNT(Id where YEAR(CreationDate) = 2010), COUNT(Id where YEAR(CreationDate) = 2011)) FROM votes;",
    "SQL": "SELECT\n  CAST(SUM(\n    CASE\n      WHEN DATE_FORMAT(CAST(`CreationDate` AS DATETIME), '%Y') = '2010'\n      THEN 1\n      ELSE 0\n    END\n  ) AS DOUBLE) / SUM(\n    CASE\n      WHEN DATE_FORMAT(CAST(`CreationDate` AS DATETIME), '%Y') = '2011'\n      THEN 1\n      ELSE 0\n    END\n  )\nFROM `votes`",
    "difficulty": "simple"
  },
  {
    "question_id": 633,
    "db_id": "codebase_community",
    "question": "Which post by slashnick has the most answers count? State the post ID.",
    "evidence": "most answers count refers to MAX(AnswerCount); post by slashnick refers to DisplayName = 'slashnick';",
    "SQL": "SELECT\n  `T2`.`PostId`\nFROM `users` AS `T1`\nINNER JOIN `postHistory` AS `T2`\n  ON `T1`.`Id` = `T2`.`UserId`\nINNER JOIN `posts` AS `T3`\n  ON `T2`.`PostId` = `T3`.`Id`\nWHERE\n  `T1`.`DisplayName` = 'slashnick'\nORDER BY\n  `T3`.`AnswerCount` DESC\nLIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 634,
    "db_id": "codebase_community",
    "question": "Among posts by Harvey Motulsky and Noah Snyder, which one has higher popularity?",
    "evidence": "Has higher popularity means the post has higher view count ; calculation = MAX(SUM(ViewCount)) where DisplayName = 'Harvey Motulsky' OR DisplayName = 'Noah Snyder';",
    "SQL": "SELECT\n  `T1`.`DisplayName`\nFROM `users` AS `T1`\nINNER JOIN `postHistory` AS `T2`\n  ON `T1`.`Id` = `T2`.`UserId`\nINNER JOIN `posts` AS `T3`\n  ON `T2`.`PostId` = `T3`.`Id`\nWHERE\n  `T1`.`DisplayName` = 'Harvey Motulsky' OR `T1`.`DisplayName` = 'Noah Snyder'\nGROUP BY\n  `T1`.`DisplayName`\nORDER BY\n  SUM(`T3`.`ViewCount`) DESC\nLIMIT 1",
    "difficulty": "challenging"
  },
  {
    "question_id": 637,
    "db_id": "codebase_community",
    "question": "State all the tags used by Mark Meckes in his posts that doesn't have comments.",
    "evidence": "used by Mark Meckes refers to DisplayName = 'Mark Meckes'; Doen't have comments refers to CommentCount = 0;",
    "SQL": "SELECT\n  `T3`.`Tags`\nFROM `users` AS `T1`\nINNER JOIN `postHistory` AS `T2`\n  ON `T1`.`Id` = `T2`.`UserId`\nINNER JOIN `posts` AS `T3`\n  ON `T3`.`Id` = `T2`.`PostId`\nWHERE\n  `T1`.`DisplayName` = 'Mark Meckes' AND `T3`.`CommentCount` = 0",
    "difficulty": "moderate"
  },
  {
    "question_id": 639,
    "db_id": "codebase_community",
    "question": "Based on posts posted by Community, calculate the percentage of posts that use the R language.",
    "evidence": "DIVIDE(COUNT(PostId WHERE TagName = 'r')), (COUNT(PostId WHERE DisplayName = 'Community')) as percentage; R language refers to tagname = 'r'",
    "SQL": "SELECT\n  CAST(SUM(CASE WHEN `T3`.`TagName` = 'r' THEN 1 ELSE 0 END) AS DOUBLE) * 100 / COUNT(`T1`.`Id`)\nFROM `users` AS `T1`\nINNER JOIN `postHistory` AS `T2`\n  ON `T1`.`Id` = `T2`.`UserId`\nINNER JOIN `tags` AS `T3`\n  ON `T3`.`ExcerptPostId` = `T2`.`PostId`\nWHERE\n  `T1`.`DisplayName` = 'Community'",
    "difficulty": "challenging"
  },
  {
    "question_id": 640,
    "db_id": "codebase_community",
    "question": "Calculate the difference in view count from post posted by Mornington and view count from posts posted by Amos.",
    "evidence": "calculation = SUBTRACT(SUM(ViewCount where DisplayName = 'Mornington'), SUM(ViewCount where DisplayName = 'Amos'));",
    "SQL": "SELECT\n  SUM(CASE WHEN `T1`.`DisplayName` = 'Mornington' THEN `T3`.`ViewCount` ELSE 0 END) - SUM(CASE WHEN `T1`.`DisplayName` = 'Amos' THEN `T3`.`ViewCount` ELSE 0 END) AS `diff`\nFROM `users` AS `T1`\nINNER JOIN `postHistory` AS `T2`\n  ON `T1`.`Id` = `T2`.`UserId`\nINNER JOIN `posts` AS `T3`\n  ON `T3`.`Id` = `T2`.`PostId`",
    "difficulty": "moderate"
  },
  {
    "question_id": 665,
    "db_id": "codebase_community",
    "question": "What is the average monthly number of links created in 2010 for posts that have no more than 2 answers?",
    "evidence": "calculation = DIVIDE(COUNT(Id where YEAR(CreationDate) = 2010 and AnswerCount < = 2), 12)",
    "SQL": "SELECT\n  CAST(COUNT(`T1`.`Id`) AS DOUBLE) / 12\nFROM `postLinks` AS `T1`\nINNER JOIN `posts` AS `T2`\n  ON `T1`.`PostId` = `T2`.`Id`\nWHERE\n  `T2`.`AnswerCount` <= 2\n  AND DATE_FORMAT(CAST(`T1`.`CreationDate` AS DATETIME), '%Y') = '2010'",
    "difficulty": "moderate"
  },
  {
    "question_id": 669,
    "db_id": "codebase_community",
    "question": "When did 'chl' cast its first vote in a post?",
    "evidence": "DisplayName = 'chl'; cast its first vote refers to MIN(CreationDate);",
    "SQL": "SELECT\n  `T2`.`CreationDate`\nFROM `users` AS `T1`\nINNER JOIN `votes` AS `T2`\n  ON `T1`.`Id` = `T2`.`UserId`\nWHERE\n  `T1`.`DisplayName` = 'chl'\nORDER BY\n  `T2`.`CreationDate`\nLIMIT 1",
    "difficulty": "simple"
  },
  {
    "question_id": 671,
    "db_id": "codebase_community",
    "question": "What is the display name of the user who acquired the first Autobiographer badge?",
    "evidence": "Autobiographer is the name of the badge; acquired the first refers to MIN(Date);",
    "SQL": "SELECT\n  `T1`.`DisplayName`\nFROM `users` AS `T1`\nINNER JOIN `badges` AS `T2`\n  ON `T1`.`Id` = `T2`.`UserId`\nWHERE\n  `T2`.`Name` = 'Autobiographer'\nORDER BY\n  `T2`.`Date`\nLIMIT 1",
    "difficulty": "simple"
  },
  {
    "question_id": 672,
    "db_id": "codebase_community",
    "question": "Among the users located in United Kingdom, how many users whose post have a total favorite amount of 4 or more?",
    "evidence": "favorite amount of 4 or more refers to FavoriteCount > = 4; Location = 'United Kingdom';",
    "SQL": "SELECT\n  COUNT(`T1`.`Id`)\nFROM `users` AS `T1`\nINNER JOIN `posts` AS `T2`\n  ON `T1`.`Id` = `T2`.`OwnerUserId`\nWHERE\n  `T1`.`Location` = 'United Kingdom' AND `T2`.`FavoriteCount` >= 4",
    "difficulty": "moderate"
  },
  {
    "question_id": 678,
    "db_id": "codebase_community",
    "question": "Which post by Harvey Motulsky has the most views? Please give the id and title of this post.",
    "evidence": "DisplayName = 'Harvey Motulsky'; the most views refer to MAX(ViewCount);",
    "SQL": "SELECT\n  `T2`.`Id`,\n  `T2`.`Title`\nFROM `users` AS `T1`\nINNER JOIN `posts` AS `T2`\n  ON `T1`.`Id` = `T2`.`OwnerUserId`\nWHERE\n  `T1`.`DisplayName` = 'Harvey Motulsky'\nORDER BY\n  `T2`.`ViewCount` DESC\nLIMIT 1",
    "difficulty": "simple"
  },
  {
    "question_id": 682,
    "db_id": "codebase_community",
    "question": "Which is the most valuable post in 2010? Please give its id and the owner's display name.",
    "evidence": "the most valuable post in 2010 refers to MAX(FavoriteCount) where year(CreationDate) = 2010;",
    "SQL": "SELECT\n  `T2`.`OwnerUserId`,\n  `T1`.`DisplayName`\nFROM `users` AS `T1`\nINNER JOIN `posts` AS `T2`\n  ON `T1`.`Id` = `T2`.`OwnerUserId`\nWHERE\n  DATE_FORMAT(CAST(`T1`.`CreationDate` AS DATETIME), '%Y') = '2010'\nORDER BY\n  `T2`.`FavoriteCount` DESC\nLIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 683,
    "db_id": "codebase_community",
    "question": "What is the percentage of posts whose owners had a reputation of over 1000 in 2011?",
    "evidence": "percentage = DIVIDE(COUNT(Id where YEAR(CreationDate) = 2011 and Reputation > 1000), COUNT(Id) ) * 100;",
    "SQL": "SELECT\n  CAST(SUM(\n    CASE\n      WHEN DATE_FORMAT(CAST(`T2`.`CreaionDate` AS DATETIME), '%Y') = '2011'\n      AND `T1`.`Reputation` > 1000\n      THEN 1\n      ELSE 0\n    END\n  ) AS DOUBLE) * 100 / COUNT(`T1`.`Id`)\nFROM `users` AS `T1`\nINNER JOIN `posts` AS `T2`\n  ON `T1`.`Id` = `T2`.`OwnerUserId`",
    "difficulty": "moderate"
  },
  {
    "question_id": 685,
    "db_id": "codebase_community",
    "question": "Identify the total views on the post 'Computer Game Datasets'. Name the user who posted it last time.",
    "evidence": "total views refer to ViewCount; Name the user refers to DisplayName; post 'Computer Game Datasets' refers to Text = 'Computer Game Datasets';",
    "SQL": "SELECT\n  `T2`.`ViewCount`,\n  `T3`.`DisplayName`\nFROM `postHistory` AS `T1`\nINNER JOIN `posts` AS `T2`\n  ON `T1`.`PostId` = `T2`.`Id`\nINNER JOIN `users` AS `T3`\n  ON `T2`.`LastEditorUserId` = `T3`.`Id`\nWHERE\n  `T1`.`Text` = 'Computer Game Datasets'",
    "difficulty": "moderate"
  },
  {
    "question_id": 687,
    "db_id": "codebase_community",
    "question": "How many comments were added to the post with the highest score?",
    "evidence": "the highest score refers to MAX(Score);",
    "SQL": "SELECT\n  COUNT(`T2`.`Id`)\nFROM `posts` AS `T1`\nINNER JOIN `comments` AS `T2`\n  ON `T1`.`Id` = `T2`.`PostId`\nGROUP BY\n  `T1`.`Id`\nORDER BY\n  `T1`.`Score` DESC\nLIMIT 1",
    "difficulty": "simple"
  },
  {
    "question_id": 694,
    "db_id": "codebase_community",
    "question": "Provide the text of the latest 10 comments to the post with the title 'Analysing wind data with R' and the display name of the user who left it.",
    "evidence": "the latest comment refers to MAX(CreationDate);",
    "SQL": "SELECT\n  `T3`.`Text`,\n  `T1`.`DisplayName`\nFROM `users` AS `T1`\nINNER JOIN `posts` AS `T2`\n  ON `T1`.`Id` = `T2`.`OwnerUserId`\nINNER JOIN `comments` AS `T3`\n  ON `T2`.`Id` = `T3`.`PostId`\nWHERE\n  `T2`.`Title` = 'Analysing wind data with R'\nORDER BY\n  `T1`.`CreationDate` DESC\nLIMIT 10",
    "difficulty": "moderate"
  },
  {
    "question_id": 701,
    "db_id": "codebase_community",
    "question": "Among all the posts posted by the most influential user, identify the percentage with a score above 50.",
    "evidence": "The higher reputation the user has the more influence; percentage = DIVIDE(COUNT(stats_posts.Id where Score > 50 and MAX(Reputation))), COUNT(stats_posts.Id where MAX(Reputation));",
    "SQL": "SELECT\n  CAST(SUM(CASE WHEN `T2`.`Score` > 50 THEN 1 ELSE 0 END) AS DOUBLE) * 100 / COUNT(`T1`.`Id`)\nFROM `users` AS `T1`\nINNER JOIN `posts` AS `T2`\n  ON `T1`.`Id` = `T2`.`OwnerUserId`\nINNER JOIN (\n  SELECT\n    MAX(`Reputation`) AS `max_reputation`\n  FROM `users`\n) AS `T3`\n  ON `T1`.`Reputation` = `T3`.`max_reputation`",
    "difficulty": "challenging"
  },
  {
    "question_id": 704,
    "db_id": "codebase_community",
    "question": "What is the excerpt post ID and wiki post ID of the tag named sample?",
    "evidence": "tag named sample refers to TagName = 'sample';",
    "SQL": "SELECT\n  `ExcerptPostId`,\n  `WikiPostId`\nFROM `tags`\nWHERE\n  `TagName` = 'sample'",
    "difficulty": "simple"
  },
  {
    "question_id": 705,
    "db_id": "codebase_community",
    "question": "Give the user's reputation and up vote number of the user that commented \"fine, you win :)\".",
    "evidence": "Text = 'fine, you win :)';",
    "SQL": "SELECT\n  `T2`.`Reputation`,\n  `T2`.`UpVotes`\nFROM `comments` AS `T1`\nINNER JOIN `users` AS `T2`\n  ON `T1`.`UserId` = `T2`.`Id`\nWHERE\n  `T1`.`Text` = 'fine, you win :)'",
    "difficulty": "simple"
  },
  {
    "question_id": 707,
    "db_id": "codebase_community",
    "question": "Among the posts with views ranging from 100 to 150, what is the comment with the highest score?",
    "evidence": "views ranging from 100 to 150 refers to ViewCount BETWEEN 100 and 150; comment with the highest score refers to Text where MAX(Score);",
    "SQL": "SELECT\n  `Text`\nFROM `comments`\nWHERE\n  `PostId` IN (\n    SELECT\n      `Id`\n    FROM `posts`\n    WHERE\n      `ViewCount` BETWEEN 100 AND 150\n  )\nORDER BY\n  `Score` DESC\nLIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 710,
    "db_id": "codebase_community",
    "question": "In posts with 1 comment, how many of the comments have 0 score?",
    "evidence": "in posts with 1 comment refers to CommentCount = 1;",
    "SQL": "SELECT\n  COUNT(`T1`.`id`)\nFROM `comments` AS `T1`\nINNER JOIN `posts` AS `T2`\n  ON `T1`.`PostId` = `T2`.`Id`\nWHERE\n  `T2`.`CommentCount` = 1 AND `T2`.`Score` = 0",
    "difficulty": "simple"
  },
  {
    "question_id": 716,
    "db_id": "codebase_community",
    "question": "Among the comments with scores between 5 to 10, what is the percentage of the users with 0 up votes?",
    "evidence": "percentage = DIVIDE(COUNT(UserId where UpVotes = 0 and Score BETWEEN 5 and 10))*100, (COUNT(UserId where Score BETWEEN 5 and 10));",
    "SQL": "SELECT\n  CAST(SUM(CASE WHEN `T1`.`UpVotes` = 0 THEN 1 ELSE 0 END) AS DOUBLE) * 100 / COUNT(`T1`.`Id`) AS `per`\nFROM `users` AS `T1`\nINNER JOIN `comments` AS `T2`\n  ON `T1`.`Id` = `T2`.`UserId`\nWHERE\n  `T2`.`Score` BETWEEN 5 AND 10",
    "difficulty": "moderate"
  },
  {
    "question_id": 340,
    "db_id": "card_games",
    "question": "Which are the cards that have incredibly powerful foils.",
    "evidence": "incredibly poweful foils refers to cardKingdomFoilId is not null AND cardKingdomId is not null",
    "SQL": "SELECT\n  `id`\nFROM `cards`\nWHERE\n  NOT `cardKingdomFoilId` IS NULL AND NOT `cardKingdomId` IS NULL",
    "difficulty": "simple"
  },
  {
    "question_id": 341,
    "db_id": "card_games",
    "question": "What are the borderless cards available without powerful foils?",
    "evidence": "borderless' refers to borderColor; poweful foils refers to cardKingdomFoilId paired with cardKingdomId AND cardKingdomId is not null",
    "SQL": "SELECT\n  `id`\nFROM `cards`\nWHERE\n  `borderColor` = 'borderless'\n  AND (\n    `cardKingdomId` IS NULL OR `cardKingdomId` IS NULL\n  )",
    "difficulty": "simple"
  },
  {
    "question_id": 344,
    "db_id": "card_games",
    "question": "List all the mythic rarity print cards banned in gladiator format.",
    "evidence": "mythic rarity printing refers to rarity = 'mythic'; card banned refers to status = 'Banned'; in gladiator format refers to format = 'gladiator';",
    "SQL": "SELECT DISTINCT\n  `T1`.`id`\nFROM `cards` AS `T1`\nINNER JOIN `legalities` AS `T2`\n  ON `T1`.`uuid` = `T2`.`uuid`\nWHERE\n  `T2`.`format` = 'gladiator'\n  AND `T2`.`status` = 'Banned'\n  AND `T1`.`rarity` = 'mythic'",
    "difficulty": "moderate"
  },
  {
    "question_id": 345,
    "db_id": "card_games",
    "question": "For artifact type of cards that do not have multiple faces on the same card, state its legalities status for vintage play format.",
    "evidence": "Artifact type of cards refers to types = 'Artifact'; card does not have multiple faces on the same card refers to side is NULL'; vintage play format refers to format = 'vintage';",
    "SQL": "SELECT DISTINCT\n  `T2`.`status`\nFROM `cards` AS `T1`\nINNER JOIN `legalities` AS `T2`\n  ON `T1`.`uuid` = `T2`.`uuid`\nWHERE\n  `T1`.`type` = 'Artifact' AND `T2`.`format` = 'vintage' AND `T1`.`side` IS NULL",
    "difficulty": "moderate"
  },
  {
    "question_id": 346,
    "db_id": "card_games",
    "question": "List all the card id and artist with unknown power which are legal for commander play format.",
    "evidence": "unknown power refers to power = '*' or POWER IS NULL; commander play format refers to format = 'commander'; legal for commander play format refers to format = 'commander' where status = 'Legal'",
    "SQL": "SELECT\n  `T1`.`id`,\n  `T1`.`artist`\nFROM `cards` AS `T1`\nINNER JOIN `legalities` AS `T2`\n  ON `T1`.`uuid` = `T2`.`uuid`\nWHERE\n  `T2`.`status` = 'Legal'\n  AND `T2`.`format` = 'commander'\n  AND (\n    `T1`.`power` IS NULL OR `T1`.`power` = '*'\n  )",
    "difficulty": "moderate"
  },
  {
    "question_id": 347,
    "db_id": "card_games",
    "question": "Find all cards illustrated by Stephen Daniel and describe the text of the ruling of these cards. State if these cards have missing or degraded properties and values.",
    "evidence": "cards have missing or degraded properties and value refers to hasContentWarning = 1; 'Stephen Daniele' is artist; Find all cards refers to return card id",
    "SQL": "SELECT\n  `T1`.`id`,\n  `T2`.`text`,\n  `T1`.`hasContentWarning`\nFROM `cards` AS `T1`\nINNER JOIN `rulings` AS `T2`\n  ON `T1`.`uuid` = `T2`.`uuid`\nWHERE\n  `T1`.`artist` = 'Stephen Daniele'",
    "difficulty": "moderate"
  },
  {
    "question_id": 349,
    "db_id": "card_games",
    "question": "Name the card and artist with the most ruling information. Also state if the card is a promotional printing.",
    "evidence": "with the most ruling information refers to Max(count(rulings.uuid)); the card is the promotional printing refers to isPromo = 1;",
    "SQL": "SELECT `T1`.`name`, `T1`.`artist`, `T1`.`isPromo` FROM `cards` AS `T1` INNER JOIN `rulings` AS `T2` ON `T1`.`uuid` = `T2`.`uuid` WHERE `T1`.`isPromo` = 1 AND `T1`.`artist` = ( SELECT `artist` FROM `cards` WHERE `isPromo` = 1 GROUP BY `artist` HAVING COUNT(DISTINCT `uuid`) = (SELECT MAX(card_counts.max_count) FROM (SELECT COUNT(DISTINCT `uuid`) AS max_count FROM `cards` WHERE `isPromo` = 1 GROUP BY `artist`) AS card_counts))LIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 352,
    "db_id": "card_games",
    "question": "Calculate the percentage of the cards availabe in Chinese Simplified.",
    "evidence": "Chinese Simplified' is the language; percentage = Divide(Sum(id where language = 'Chinese Simplified'), Count(id)) *100",
    "SQL": "SELECT\n  CAST(SUM(CASE WHEN `T2`.`language` = 'Chinese Simplified' THEN 1 ELSE 0 END) AS DOUBLE) * 100 / COUNT(`T1`.`id`)\nFROM `cards` AS `T1`\nINNER JOIN `foreign_data` AS `T2`\n  ON `T1`.`uuid` = `T2`.`uuid`",
    "difficulty": "moderate"
  },
  {
    "question_id": 356,
    "db_id": "card_games",
    "question": "How many cards have infinite power?",
    "evidence": "infinite power refers to power = '*';",
    "SQL": "SELECT\n  COUNT(*)\nFROM `cards`\nWHERE\n  `power` = '*'",
    "difficulty": "simple"
  },
  {
    "question_id": 358,
    "db_id": "card_games",
    "question": "What is the border color of card \"Ancestor's Chosen\"?",
    "evidence": "name of card  = 'Ancestor''s Chosen' ;",
    "SQL": "SELECT DISTINCT\n  `borderColor`\nFROM `cards`\nWHERE\n  `name` = 'Ancestor''s Chosen'",
    "difficulty": "simple"
  },
  {
    "question_id": 366,
    "db_id": "card_games",
    "question": "What is the rule of playing card \"Benalish Knight\"?",
    "evidence": "Benalish Knight' is the name of card; rule of playing card refers to format;",
    "SQL": "SELECT\n  `T2`.`format`\nFROM `cards` AS `T1`\nINNER JOIN `legalities` AS `T2`\n  ON `T1`.`uuid` = `T2`.`uuid`\nWHERE\n  `T1`.`name` = 'Benalish Knight'",
    "difficulty": "simple"
  },
  {
    "question_id": 368,
    "db_id": "card_games",
    "question": "What is the percentage of borderless cards?",
    "evidence": "borderless card refers to borderColor = 'borderless'; percentage = Divide(Count (id) where borderColor = 'borderless', Count(id)) *100",
    "SQL": "SELECT\n  CAST(SUM(CASE WHEN `borderColor` = 'borderless' THEN 1 ELSE 0 END) AS DOUBLE) * 100 / COUNT(`id`)\nFROM `cards`",
    "difficulty": "simple"
  },
  {
    "question_id": 371,
    "db_id": "card_games",
    "question": "What is the percentage of cards whose language is French among the Story Spotlight cards?",
    "evidence": "Story Spotlight card refers to isStorySpotlight = 1; French is the language; Percentage = Divide(Count(id) where language = 'French' and isStorySpotlight = 1, Count(id) where isStorySpotlight = 1)*100",
    "SQL": "SELECT\n  CAST(SUM(CASE WHEN `T2`.`language` = 'French' THEN 1 ELSE 0 END) AS DOUBLE) * 100 / COUNT(`T1`.`id`)\nFROM `cards` AS `T1`\nINNER JOIN `foreign_data` AS `T2`\n  ON `T1`.`uuid` = `T2`.`uuid`\nWHERE\n  `T1`.`isStorySpotlight` = 1",
    "difficulty": "challenging"
  },
  {
    "question_id": 377,
    "db_id": "card_games",
    "question": "How many cards with original type of \"Summon - Angel\" have subtype other than \"Angel\"?",
    "evidence": "subtype other than Angel refers to subtypes is not 'Angel';",
    "SQL": "SELECT\n  COUNT(`id`)\nFROM `cards`\nWHERE\n  `originalType` = 'Summon - Angel' AND `subtypes` <> 'Angel'",
    "difficulty": "simple"
  },
  {
    "question_id": 379,
    "db_id": "card_games",
    "question": "What are the cards belong to duel deck a? List the ID.",
    "evidence": "duel deck a refers to duelDeck = a;",
    "SQL": "SELECT\n  `id`\nFROM `cards`\nWHERE\n  `duelDeck` = 'a'",
    "difficulty": "simple"
  },
  {
    "question_id": 383,
    "db_id": "card_games",
    "question": "How many of the banned cards are white border?",
    "evidence": "banned card refers to status = 'Banned'; white border refers to borderColor = 'white';",
    "SQL": "SELECT\n  COUNT(`T1`.`id`)\nFROM `cards` AS `T1`\nINNER JOIN `legalities` AS `T2`\n  ON `T1`.`uuid` = `T2`.`uuid`\nWHERE\n  `T2`.`status` = 'Banned' AND `T1`.`borderColor` = 'white'",
    "difficulty": "simple"
  },
  {
    "question_id": 391,
    "db_id": "card_games",
    "question": "Among the Artifact cards, which are black color and comes with foreign languague translation?",
    "evidence": "Artifact card refers to originalType = 'Artifact'; black color refers to colors = 'B'; foreign language refers to language in foreign_data",
    "SQL": "SELECT DISTINCT\n  `T1`.`name`\nFROM `cards` AS `T1`\nINNER JOIN `foreign_data` AS `T2`\n  ON `T1`.`uuid` = `T2`.`uuid`\nWHERE\n  `T1`.`originalType` = 'Artifact' AND `T1`.`colors` = 'B'",
    "difficulty": "moderate"
  },
  {
    "question_id": 397,
    "db_id": "card_games",
    "question": "What is the mana cost of cards with a normal layout, a 2003 frame version, with a black border color, and available in paper and mtgo?",
    "evidence": "available in paper and mtgo refers to availability = 'mtgo,paper'; frameVersion = 2003;borderColor = 'black'",
    "SQL": "SELECT\n  `manaCost`\nFROM `cards`\nWHERE\n  `availability` = 'mtgo,paper'\n  AND `borderColor` = 'black'\n  AND `frameVersion` = 2003\n  AND `layout` = 'normal'",
    "difficulty": "moderate"
  },
  {
    "question_id": 402,
    "db_id": "card_games",
    "question": "What is the percentage of Story Spotlight cards that do not have a text box? List them by their ID.",
    "evidence": "Story Spotlight cards that do not have a text box refers to isStorySpotlight = 1 and isTextless = 0; Percentage = DIVIDE(SUM(count(id) where isStorylight = 1 AND isTextless = 0 ), SUM(count(id))) * 100",
    "SQL": "SELECT\n  CAST(SUM(CASE WHEN `isTextless` = 0  AND `isStorySpotlight` = 1 THEN 1 ELSE 0 END) AS DOUBLE) * 100 / COUNT(`id`)\nFROM `cards`",
    "difficulty": "moderate"
  },
  {
    "question_id": 405,
    "db_id": "card_games",
    "question": "How many Brazilian Portuguese translated sets are inside the Commander block?",
    "evidence": "Commander block refer to block = 'Commander'; sets refer to code = setCode; Portuguese refer to language = 'Portuguese (Brasil)'",
    "SQL": "SELECT\n  COUNT(`T1`.`id`)\nFROM `sets` AS `T1`\nINNER JOIN `set_translations` AS `T2`\n  ON `T1`.`code` = `T2`.`setCode`\nWHERE\n  `T2`.`language` = 'Portuguese (Brazil)' AND `T1`.`block` = 'Commander'",
    "difficulty": "moderate"
  },
  {
    "question_id": 407,
    "db_id": "card_games",
    "question": "Lists all types of cards in German.",
    "evidence": "German refer to language; all types refer to the subtypes, supertypes; subtypes is not null AND supertypes is not null",
    "SQL": "SELECT\n  `T1`.`subtypes`,\n  `T1`.`supertypes`\nFROM `cards` AS `T1`\nINNER JOIN `foreign_data` AS `T2`\n  ON `T1`.`uuid` = `T2`.`uuid`\nWHERE\n  `T2`.`language` = 'German'\n  AND NOT `T1`.`subtypes` IS NULL\n  AND NOT `T1`.`supertypes` IS NULL",
    "difficulty": "moderate"
  },
  {
    "question_id": 408,
    "db_id": "card_games",
    "question": "How many unknown power cards contain info about the triggered ability",
    "evidence": "unknown power cards refers to power is null or power = '*';contain info about the triggered ability refers to text contains 'triggered ability'",
    "SQL": "SELECT\n  Count(DISTINCT `T1`.`id`) \nFROM `cards` AS `T1`\nINNER JOIN `rulings` AS `T2`\n  ON `T1`.`uuid` = `T2`.`uuid`\nWHERE\n  (\n    `T1`.`power` IS NULL OR `T1`.`power` = '*'\n  )\n  AND `T2`.`text` LIKE '%triggered ability%'",
    "difficulty": "moderate"
  },
  {
    "question_id": 409,
    "db_id": "card_games",
    "question": "Indicates the number of cards with pre-modern format, ruling text \"This is a triggered mana ability.\" that do not have multiple faces.",
    "evidence": "pre-modern format refers to format = 'premodern' ;do not have multiple faces refers to side IS NULL",
    "SQL": "SELECT\n  COUNT(`T1`.`id`)\nFROM `cards` AS `T1`\nINNER JOIN `legalities` AS `T2`\n  ON `T1`.`uuid` = `T2`.`uuid`\nINNER JOIN `rulings` AS `T3`\n  ON `T1`.`uuid` = `T3`.`uuid`\nWHERE\n  `T2`.`format` = 'premodern'\n  AND `T3`.`text` = 'This is a triggered mana ability.'\n  AND `T1`.`Side` IS NULL",
    "difficulty": "moderate"
  },
  {
    "question_id": 412,
    "db_id": "card_games",
    "question": "What is the foreign name of the card in French of type Creature, normal layout and black border color, by artist Matthew D. Wilson?",
    "evidence": "in French refers to language = 'French'; black border color refers to borderColor = 'black'",
    "SQL": "SELECT\n  `name`\nFROM `foreign_data`\nWHERE\n  `uuid` IN (\n    SELECT\n      `uuid`\n    FROM `cards`\n    WHERE\n      `types` = 'Creature'\n      AND `layout` = 'normal'\n      AND `borderColor` = 'black'\n      AND `artist` = 'Matthew D. Wilson'\n  )\n  AND `language` = 'French'",
    "difficulty": "moderate"
  },
  {
    "question_id": 414,
    "db_id": "card_games",
    "question": "What language is the set of 180 cards that belongs to the Ravnica block translated into?",
    "evidence": "set of 180 cards refers to baseSetSize = 180",
    "SQL": "SELECT\n  `T2`.`language`\nFROM `sets` AS `T1`\nINNER JOIN `set_translations` AS `T2`\n  ON `T1`.`code` = `T2`.`setCode`\nWHERE\n  `T1`.`block` = 'Ravnica' AND `T1`.`baseSetSize` = 180",
    "difficulty": "simple"
  },
  {
    "question_id": 415,
    "db_id": "card_games",
    "question": "What percentage of cards with format commander and legal status do not have a content warning?",
    "evidence": "do not have a content warning refers to hasContentWarning = 0; percentage refers to DIVIDE(COUNT(hasContentWarning = 0),COUNT(ID))*100 where format = 'commander' AND Status = 'legal';",
    "SQL": "SELECT\n  CAST(SUM(CASE WHEN `T1`.`hasContentWarning` = 0 THEN 1 ELSE 0 END) AS DOUBLE) * 100 / COUNT(`T1`.`id`)\nFROM `cards` AS `T1`\nINNER JOIN `legalities` AS `T2`\n  ON `T1`.`uuid` = `T2`.`uuid`\nWHERE\n  `T2`.`format` = 'commander' AND `T2`.`status` = 'Legal'",
    "difficulty": "challenging"
  },
  {
    "question_id": 416,
    "db_id": "card_games",
    "question": "What percentage of cards without power are in French?",
    "evidence": "in French refers to language = 'French'; cards without power refers to power IS NULL OR power = '*'; percentage = DIVIDE(COUNT(language = 'French' and power is NULL or power = '*'), COUNT( power is NULL or power = '*'))*100",
    "SQL": "SELECT\n  CAST(SUM(CASE WHEN `T2`.`language` = 'French' THEN 1 ELSE 0 END) AS DOUBLE) * 100 / COUNT(`T1`.`id`)\nFROM `cards` AS `T1`\nINNER JOIN `foreign_data` AS `T2`\n  ON `T1`.`uuid` = `T2`.`uuid`\nWHERE\n  `T1`.`power` IS NULL OR `T1`.`power` = '*'",
    "difficulty": "challenging"
  },
  {
    "question_id": 422,
    "db_id": "card_games",
    "question": "What is the language of the card with the multiverse number 149934?",
    "evidence": "multiverse number 149934 refers to multiverseid = 149934;",
    "SQL": "SELECT\n  `language`\nFROM `foreign_data`\nWHERE\n  `multiverseid` = 149934",
    "difficulty": "simple"
  },
  {
    "question_id": 424,
    "db_id": "card_games",
    "question": "What proportion of cards do not have a text box with a normal layout?",
    "evidence": "do not have a text box refers to isTextless = 1; proportion refers to DIVIDE(COUNT(Textless = 1 and layout = 'normal'),COUNT(Textless))*100",
    "SQL": "SELECT\n  CAST(SUM(CASE WHEN `isTextless` = 1 AND `layout` = 'normal' THEN 1 ELSE 0 END) AS DOUBLE) * 100 / COUNT(*)\nFROM `cards`",
    "difficulty": "simple"
  },
  {
    "question_id": 427,
    "db_id": "card_games",
    "question": "What languages are available in the set known as Archenemy on the magic card market and having the code ARC?",
    "evidence": "known as Archenemy refers to mcmName = 'Archenemy'; having the code ARC refers to setCode = 'ARC'",
    "SQL": "SELECT\n  `T2`.`language`\nFROM `sets` AS `T1`\nINNER JOIN `set_translations` AS `T2`\n  ON `T1`.`code` = `T2`.`setCode`\nWHERE\n  `T1`.`mcmName` = 'Archenemy' AND `T2`.`setCode` = 'ARC'",
    "difficulty": "moderate"
  },
  {
    "question_id": 440,
    "db_id": "card_games",
    "question": "Which foreign language used by \"A Pedra Fellwar\"?",
    "evidence": "\"A Pedra Fellwar\" refers to name = 'A Pedra Fellwar'",
    "SQL": "SELECT DISTINCT\n  `language`\nFROM `foreign_data`\nWHERE\n  `name` = 'A Pedra Fellwar'",
    "difficulty": "simple"
  },
  {
    "question_id": 459,
    "db_id": "card_games",
    "question": "Which card costs more converted mana, \"Serra Angel\" or \"Shrine Keeper\"?",
    "evidence": "\"Serra Angel\" refers to name = 'Serra Angel'; \"Shrine Keeper\" refers to name = 'Shrine Keeper'; card costs more converted mana when the value of convertedManaCost is greater",
    "SQL": "SELECT\n  `name`\nFROM `cards`\nWHERE\n  `name` IN ('Serra Angel', 'Shrine Keeper')\nORDER BY\n  `convertedManaCost` DESC\nLIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 462,
    "db_id": "card_games",
    "question": "What's the Italian name of the set of cards with \"Ancestor's Chosen\" is in?",
    "evidence": "Italian is a language which refers to language = 'Italian'; with \"Ancestor's Chosen\" in the card set refers to name = 'Ancestor''s Chosen'",
    "SQL": "SELECT\n  `translation`\nFROM `set_translations`\nWHERE\n  `setCode` IN (\n    SELECT\n      `setCode`\n    FROM `cards`\n    WHERE\n      `name` = 'Ancestor''s Chosen'\n  )\n  AND `language` = 'Italian'",
    "difficulty": "moderate"
  },
  {
    "question_id": 465,
    "db_id": "card_games",
    "question": "For the set of cards with \"Ancestor's Chosen\" in it, is there a Korean version of it?",
    "evidence": "set of cards with \"Ancestor''s Chosen\" in it refers to name = 'Ancestor''s Chosen'; Korean version refers to language = 'Korean'",
    "SQL": "SELECT\n  CASE\n    WHEN SUM(\n      CASE\n        WHEN `T2`.`language` = 'Korean' AND NOT `T2`.`translation` IS NULL\n        THEN 1\n        ELSE 0\n      END\n    ) > 0\n    THEN 'YES'\n    ELSE 'NO'\n  END\nFROM `cards` AS `T1`\nINNER JOIN `set_translations` AS `T2`\n  ON `T2`.`setCode` = `T1`.`setCode`\nWHERE\n  `T1`.`name` = 'Ancestor''s Chosen'",
    "difficulty": "moderate"
  },
  {
    "question_id": 466,
    "db_id": "card_games",
    "question": "Among the cards in the set \"Hauptset Zehnte Edition\", how many of them are designed by Adam Rex?",
    "evidence": "card set \"Hauptset Zehnte Edition\" refers to translation = 'Hauptset Zehnte Edition'; designed by Adam refers to artist = 'Adam Rex'",
    "SQL": "SELECT\n  COUNT(`T1`.`id`)\nFROM `cards` AS `T1`\nINNER JOIN `set_translations` AS `T2`\n  ON `T2`.`setCode` = `T1`.`setCode`\nWHERE\n  `T2`.`translation` = 'Hauptset Zehnte Edition' AND `T1`.`artist` = 'Adam Rex'",
    "difficulty": "moderate"
  },
  {
    "question_id": 468,
    "db_id": "card_games",
    "question": "What is the Simplified Chinese translation of the name of the set \"Eighth Edition\"?",
    "evidence": "Eighth Edition is the name of card set which refers to name = 'Eighth Edition'; Simplified Chinese refers to language = 'Chinese Simplified'; translation of the name refers to translation",
    "SQL": "SELECT\n  `T2`.`translation`\nFROM `sets` AS `T1`\nINNER JOIN `set_translations` AS `T2`\n  ON `T2`.`setCode` = `T1`.`code`\nWHERE\n  `T1`.`name` = 'Eighth Edition' AND `T2`.`language` = 'Chinese Simplified'",
    "difficulty": "moderate"
  },
  {
    "question_id": 469,
    "db_id": "card_games",
    "question": "Did the set of cards with \"Angel of Mercy\" appear on Magic: The Gathering Online?",
    "evidence": "card set \"Angel of Mercy\" refers to name = 'Angel of Mercy'; appear on Magic: The Gathering Online refers to mtgoCode is NOT NULL and vice versa",
    "SQL": "SELECT\n  CASE WHEN NOT `T2`.`mtgoCode` IS NULL THEN 'YES' ELSE 'NO' END\nFROM `cards` AS `T1`\nINNER JOIN `sets` AS `T2`\n  ON `T2`.`code` = `T1`.`setCode`\nWHERE\n  `T1`.`name` = 'Angel of Mercy'",
    "difficulty": "moderate"
  },
  {
    "question_id": 472,
    "db_id": "card_games",
    "question": "Among the sets in the block \"Ice Age\", how many of them have an Italian translation?",
    "evidence": "sets in the block \"Ice Age\" refers to block = 'Ice Age'; Italian translation refers to language = 'Italian' and translation is not null",
    "SQL": "SELECT\n  COUNT(DISTINCT `T1`.`id`)\nFROM `sets` AS `T1`\nINNER JOIN `set_translations` AS `T2`\n  ON `T2`.`setCode` = `T1`.`code`\nWHERE\n  `T1`.`block` = 'Ice Age'\n  AND `T2`.`language` = 'Italian'\n  AND NOT `T2`.`translation` IS NULL",
    "difficulty": "moderate"
  },
  {
    "question_id": 473,
    "db_id": "card_games",
    "question": "Is the set of cards with Adarkar Valkyrie only available outside the United States?",
    "evidence": "card set Adarkar Valkyrie refers to name = 'Adarkar Valkyrie'; isForeignOnly = 1 means only available outside the United States;",
    "SQL": "SELECT\n  CASE WHEN `isForeignOnly` = 1 THEN 'YES' ELSE 'NO' END\nFROM `cards` AS `T1`\nINNER JOIN `sets` AS `T2`\n  ON `T2`.`code` = `T1`.`setCode`\nWHERE\n  `T1`.`name` = 'Adarkar Valkyrie'",
    "difficulty": "moderate"
  },
  {
    "question_id": 474,
    "db_id": "card_games",
    "question": "Among the sets of cards that have an Italian translation, how many of them have a base set number of under 100?",
    "evidence": "Italian translation refers to language = 'Italian'; have a translation means translation is not null; base set number of under 100 refers to baseSetSize < 10",
    "SQL": "SELECT\n  COUNT(`T1`.`id`)\nFROM `sets` AS `T1`\nINNER JOIN `set_translations` AS `T2`\n  ON `T2`.`setCode` = `T1`.`code`\nWHERE\n  NOT `T2`.`translation` IS NULL\n  AND `T1`.`baseSetSize` < 100\n  AND `T2`.`language` = 'Italian'",
    "difficulty": "moderate"
  },
  {
    "question_id": 477,
    "db_id": "card_games",
    "question": "Which of these artists have designed a card in the set Coldsnap, Jeremy Jarvis, Aaron Miller or Chippy?",
    "evidence": "card set Coldsnap refers to name = 'Coldsnap'; Jeremy Jarvis, Aaron Miller or Chippy are the name of artists which refers to artist IN ('Jeremy Jarvis', 'Aaron Miller','Chippy');",
    "SQL": "SELECT\n  `T1`.`artist`\nFROM `cards` AS `T1`\nINNER JOIN `sets` AS `T2`\n  ON `T2`.`code` = `T1`.`setCode`\nWHERE\n  (\n    `T2`.`name` = 'Coldsnap' AND `T1`.`artist` = 'Chippy'\n  )\n  OR (\n    `T2`.`name` = 'Coldsnap' AND `T1`.`artist` = 'Aaron Miller'\n  )\n  OR (\n    `T2`.`name` = 'Coldsnap' AND `T1`.`artist` = 'Jeremy Jarvis'\n  )\nGROUP BY\n  `T1`.`artist`",
    "difficulty": "challenging"
  },
  {
    "question_id": 479,
    "db_id": "card_games",
    "question": "Among the cards with converted mana cost higher than 5 in the set Coldsnap, how many of them have unknown power?",
    "evidence": "card set Coldsnap refers to name = 'Coldsnap'; converted mana cost higher than 5 refers to convertedManaCost > 5; unknown power refers to power = '*' or T1.power is null",
    "SQL": "SELECT\n  SUM(CASE WHEN `T1`.`power` = '*' OR `T1`.`power` IS NULL THEN 1 ELSE 0 END)\nFROM `cards` AS `T1`\nINNER JOIN `sets` AS `T2`\n  ON `T2`.`code` = `T1`.`setCode`\nWHERE\n  `T2`.`name` = 'Coldsnap' AND `T1`.`convertedManaCost` > 5",
    "difficulty": "moderate"
  },
  {
    "question_id": 480,
    "db_id": "card_games",
    "question": "What is the Italian flavor text of the card \"Ancestor's Chosen\"?",
    "evidence": "Italian refers to language = 'Italian'; flavor text refers to flavorText; \"Ancestor''s Chosen\" refers to name = 'Ancestor''s Chosen'",
    "SQL": "SELECT\n  `T2`.`flavorText`\nFROM `cards` AS `T1`\nINNER JOIN `foreign_data` AS `T2`\n  ON `T2`.`uuid` = `T1`.`uuid`\nWHERE\n  `T1`.`name` = 'Ancestor''s Chosen' AND `T2`.`language` = 'Italian'",
    "difficulty": "moderate"
  },
  {
    "question_id": 483,
    "db_id": "card_games",
    "question": "Please list the Italian text ruling of all the cards in the set Coldsnap.",
    "evidence": "card set Coldsnap refers to name = 'Coldsnap'; Italian refers to language = 'Italian'",
    "SQL": "SELECT DISTINCT\n  `T1`.`text`\nFROM `foreign_data` AS `T1`\nINNER JOIN `cards` AS `T2`\n  ON `T2`.`uuid` = `T1`.`uuid`\nINNER JOIN `sets` AS `T3`\n  ON `T3`.`code` = `T2`.`setCode`\nWHERE\n  `T3`.`name` = 'Coldsnap' AND `T1`.`language` = 'Italian'",
    "difficulty": "moderate"
  },
  {
    "question_id": 484,
    "db_id": "card_games",
    "question": "Please list the Italian names of the cards in the set Coldsnap with the highest converted mana cost.",
    "evidence": "card set Coldsnap refers to name = 'Coldsnap'; Italian refers to language = 'Italian'; highest converted mana cost refers to MAX(convertedManaCost)",
    "SQL": "SELECT\n  `T2`.`name`\nFROM `foreign_data` AS `T1`\nINNER JOIN `cards` AS `T2`\n  ON `T2`.`uuid` = `T1`.`uuid`\nINNER JOIN `sets` AS `T3`\n  ON `T3`.`code` = `T2`.`setCode`\nWHERE\n  `T3`.`name` = 'Coldsnap' AND `T1`.`language` = 'Italian'\nORDER BY\n  `T2`.`convertedManaCost` DESC",
    "difficulty": "moderate"
  },
  {
    "question_id": 486,
    "db_id": "card_games",
    "question": "What is the percentage of the cards with a converted mana cost of 7 in the set Coldsnap?",
    "evidence": "converted mana cost of 7 refers to convertedManaCost = 7; card set Coldsnap refers to name = 'Coldsnap'; percentage = DIVIDE(SUM(convertedManaCost = 7), SUM(convertedManaCost))*100",
    "SQL": "SELECT\n  CAST(SUM(CASE WHEN `T1`.`convertedManaCost` = 7 THEN 1 ELSE 0 END) AS DOUBLE) * 100 / COUNT(`T1`.`id`)\nFROM `cards` AS `T1`\nINNER JOIN `sets` AS `T2`\n  ON `T2`.`code` = `T1`.`setCode`\nWHERE\n  `T2`.`name` = 'Coldsnap'",
    "difficulty": "moderate"
  },
  {
    "question_id": 487,
    "db_id": "card_games",
    "question": "What is the percentage of incredibly powerful cards in the set Coldsnap?",
    "evidence": "card set Coldsnap refers to name = 'Coldsnap'; foil is incredibly powerful refers to cardKingdomFoilId is not null AND cardKingdomId is not null; the percentage of incredibly powerful cards in the set refers to DIVIDE(SUM(incredibly powerful), SUM(name = 'Coldsnap'))*100",
    "SQL": "SELECT\n  CAST(SUM(\n    CASE\n      WHEN NOT `T1`.`cardKingdomFoilId` IS NULL AND NOT `T1`.`cardKingdomId` IS NULL\n      THEN 1\n      ELSE 0\n    END\n  ) AS DOUBLE) * 100 / COUNT(`T1`.`id`)\nFROM `cards` AS `T1`\nINNER JOIN `sets` AS `T2`\n  ON `T2`.`code` = `T1`.`setCode`\nWHERE\n  `T2`.`name` = 'Coldsnap'",
    "difficulty": "challenging"
  },
  {
    "question_id": 518,
    "db_id": "card_games",
    "question": "Which of the play format has the highest number of banned status? Indicate the play format and the names of all the card meet the condition.",
    "evidence": "play format refers to format; banned status refers to status = 'Banned'; the highest number of banned status refers to MAX(COUNT(status = 'Banned'))",
    "SQL": "SELECT T2.format, T1.name FROM cards AS T1 INNER JOIN legalities AS T2 ON T2.uuid = T1.uuid INNER JOIN ( SELECT format FROM legalities WHERE status = 'Banned' GROUP BY format ORDER BY COUNT(*) DESC LIMIT 1 ) AS MaxBanned ON MaxBanned.format = T2.format WHERE T2.status = 'Banned'",
    "difficulty": "moderate"
  },
  {
    "question_id": 522,
    "db_id": "card_games",
    "question": "Which cards are ranked 1st on EDHRec? List all of the cards name and its banned play format.",
    "evidence": "ranked 1st on EDHRec refers to edhrecRank = 1; banned refers to status = 'Banned'; play format refers to format; cards name refers to name",
    "SQL": "SELECT\n  `T1`.`name`,\n  `T2`.`format`\nFROM `cards` AS `T1`\nINNER JOIN `legalities` AS `T2`\n  ON `T2`.`uuid` = `T1`.`uuid`\nWHERE\n  `T1`.`edhrecRank` = 1 AND `T2`.`status` = 'Banned'\nGROUP BY\n  `T1`.`name`,\n  `T2`.`format`",
    "difficulty": "moderate"
  },
  {
    "question_id": 528,
    "db_id": "card_games",
    "question": "List the names of all the cards in the set Hour of Devastation and find the formats in which these cards are legal.",
    "evidence": "the set Hour of Devastation refers to set.name = 'Hour of Devastation'; names of all the cards in the set refers to cards.name; legal cards refers to status = 'Legal'; the formats refers to format",
    "SQL": "SELECT DISTINCT\n  `T2`.`name`,\n  CASE WHEN `T1`.`status` = 'Legal' THEN `T1`.`format` ELSE NULL END\nFROM `legalities` AS `T1`\nINNER JOIN `cards` AS `T2`\n  ON `T2`.`uuid` = `T1`.`uuid`\nWHERE\n  `T2`.`setCode` IN (\n    SELECT\n      `code`\n    FROM `sets`\n    WHERE\n      `name` = 'Hour of Devastation'\n  )",
    "difficulty": "challenging"
  },
  {
    "question_id": 529,
    "db_id": "card_games",
    "question": "Find and list the names of sets which doesn't have Japanese translation but have Korean translation.",
    "evidence": "names of sets refers to name; doesn't have Japanese translation refers to language not like '%Japanese%'; have Korean translation refers to language = 'Korean'",
    "SQL": "SELECT\n  `name`\nFROM `sets`\nWHERE\n  `code` IN (\n    SELECT\n      `setCode`\n    FROM `set_translations`\n    WHERE\n      `language` = 'Korean' AND NOT `language` LIKE '%Japanese%'\n  )",
    "difficulty": "moderate"
  },
  {
    "question_id": 530,
    "db_id": "card_games",
    "question": "List all the frame styles and cards Allen Williams worked on and find any banned cards if there are any.",
    "evidence": "frame styles refers to frameVersion; cards Allen Williams worked on refers to artist = 'Allen Williams'; banned cards refers to status = 'Banned'",
    "SQL": "SELECT DISTINCT\n  `T1`.`frameVersion`,\n  `T1`.`name`,\n  CASE WHEN `T2`.`status` = 'Banned' THEN `T1`.`name` ELSE 'NO' END\nFROM `cards` AS `T1`\nINNER JOIN `legalities` AS `T2`\n  ON `T1`.`uuid` = `T2`.`uuid`\nWHERE\n  `T1`.`artist` = 'Allen Williams'",
    "difficulty": "moderate"
  },
  {
    "question_id": 195,
    "db_id": "toxicology",
    "question": "What is the most common bond type?",
    "evidence": "most common bond type refers MAX(COUNT(bond_type))",
    "SQL": "SELECT\n  `T`.`bond_type`\nFROM (\n  SELECT\n    `bond_type`,\n    COUNT(`bond_id`)\n  FROM `bond`\n  GROUP BY\n    `bond_type`\n  ORDER BY\n    COUNT(`bond_id`) DESC\n  LIMIT 1\n) AS `T`",
    "difficulty": "simple"
  },
  {
    "question_id": 197,
    "db_id": "toxicology",
    "question": "Calculate the average number of oxygen atoms in single-bonded molecules.",
    "evidence": "single-bonded molecules refers to bond_type = '-' ; average number of oxygen atom = AVG(element = 'o')",
    "SQL": "SELECT\n  AVG(`oxygen_count`)\nFROM (\n  SELECT\n    `T1`.`molecule_id`,\n    COUNT(`T1`.`element`) AS `oxygen_count`\n  FROM `atom` AS `T1`\n  INNER JOIN `bond` AS `T2`\n    ON `T1`.`molecule_id` = `T2`.`molecule_id`\n  WHERE\n    `T2`.`bond_type` = '-' AND `T1`.`element` = 'o'\n  GROUP BY\n    `T1`.`molecule_id`\n) AS `oxygen_counts`",
    "difficulty": "moderate"
  },
  {
    "question_id": 198,
    "db_id": "toxicology",
    "question": "On average how many carcinogenic molecules are single bonded?",
    "evidence": "carcinogenic molecules refers to label = '+'; single-bonded refers to bond_type = '-'; average = DIVIDE(SUM(bond_type = '-'), COUNT(atom_id))",
    "SQL": "SELECT\n  AVG(`single_bond_count`)\nFROM (\n  SELECT\n    `T3`.`molecule_id`,\n    COUNT(`T1`.`bond_type`) AS `single_bond_count`\n  FROM `bond` AS `T1`\n  INNER JOIN `atom` AS `T2`\n    ON `T1`.`molecule_id` = `T2`.`molecule_id`\n  INNER JOIN `molecule` AS `T3`\n    ON `T3`.`molecule_id` = `T2`.`molecule_id`\n  WHERE\n    `T1`.`bond_type` = '-' AND `T3`.`label` = '+'\n  GROUP BY\n    `T3`.`molecule_id`\n) AS `subquery`",
    "difficulty": "challenging"
  },
  {
    "question_id": 200,
    "db_id": "toxicology",
    "question": "Find the triple-bonded molecules which are carcinogenic.",
    "evidence": "triple-bonded molecules refers to bond_type = '#'; carcinogenic refers to label = '+'",
    "SQL": "SELECT DISTINCT\n  `T2`.`molecule_id`\nFROM `bond` AS `T1`\nINNER JOIN `molecule` AS `T2`\n  ON `T1`.`molecule_id` = `T2`.`molecule_id`\nWHERE\n  `T1`.`bond_type` = '#' AND `T2`.`label` = '+'",
    "difficulty": "simple"
  },
  {
    "question_id": 201,
    "db_id": "toxicology",
    "question": "What is the percentage of carbon in double-bond molecules?",
    "evidence": "carbon refers to element = 'c'; double-bond molecules refers to bond_type = '='; percentage = DIVIDE(SUM(element = 'c'), COUNT(atom_id))",
    "SQL": "SELECT\n  CAST(COUNT(DISTINCT CASE WHEN `T1`.`element` = 'c' THEN `T1`.`atom_id` ELSE NULL END) AS DOUBLE) * 100 / COUNT(DISTINCT `T1`.`atom_id`)\nFROM `atom` AS `T1`\nINNER JOIN `bond` AS `T2`\n  ON `T1`.`molecule_id` = `T2`.`molecule_id`\nWHERE\n  `T2`.`bond_type` = '='",
    "difficulty": "moderate"
  },
  {
    "question_id": 206,
    "db_id": "toxicology",
    "question": "What elements are in the TR004_8_9 bond atoms?",
    "evidence": "TR004_8_9 bond atoms refers to bond_id = 'TR004_8_9';",
    "SQL": "SELECT DISTINCT\n  `T1`.`element`\nFROM `atom` AS `T1`\nINNER JOIN `connected` AS `T2`\n  ON `T1`.`atom_id` = `T2`.`atom_id`\nWHERE\n  `T2`.`bond_id` = 'TR004_8_9'",
    "difficulty": "challenging"
  },
  {
    "question_id": 207,
    "db_id": "toxicology",
    "question": "What elements are in a double type bond?",
    "evidence": "double type bond refers to bond_type = '=';",
    "SQL": "SELECT DISTINCT\n  `T1`.`element`\nFROM `atom` AS `T1`\nINNER JOIN `bond` AS `T2`\n  ON `T1`.`molecule_id` = `T2`.`molecule_id`\nINNER JOIN `connected` AS `T3`\n  ON `T1`.`atom_id` = `T3`.`atom_id`\nWHERE\n  `T2`.`bond_type` = '='",
    "difficulty": "challenging"
  },
  {
    "question_id": 208,
    "db_id": "toxicology",
    "question": "Which type of label is the most numerous in atoms with hydrogen?",
    "evidence": "with hydrogen refers to element = 'h'; label most numerous in atoms refers to MAX(COUNT(label)); ",
    "SQL": "SELECT\n  `T`.`label`\nFROM (\n  SELECT\n    `T2`.`label`,\n    COUNT(`T2`.`molecule_id`)\n  FROM `atom` AS `T1`\n  INNER JOIN `molecule` AS `T2`\n    ON `T1`.`molecule_id` = `T2`.`molecule_id`\n  WHERE\n    `T1`.`element` = 'h'\n  GROUP BY\n    `T2`.`label`\n  ORDER BY\n    COUNT(`T2`.`molecule_id`) DESC\n  LIMIT 1\n) AS `t`",
    "difficulty": "moderate"
  },
  {
    "question_id": 212,
    "db_id": "toxicology",
    "question": "Which element is the least numerous in non-carcinogenic molecules?",
    "evidence": "label = '-' means molecules are non-carcinogenic; least numerous refers to MIN(COUNT(element));",
    "SQL": "SELECT\n  `T`.`element`\nFROM (\n  SELECT\n    `T1`.`element`,\n    COUNT(DISTINCT `T1`.`molecule_id`)\n  FROM `atom` AS `T1`\n  INNER JOIN `molecule` AS `T2`\n    ON `T1`.`molecule_id` = `T2`.`molecule_id`\n  WHERE\n    `T2`.`label` = '-'\n  GROUP BY\n    `T1`.`element`\n  ORDER BY\n    COUNT(DISTINCT `T1`.`molecule_id`) ASC\n  LIMIT 1\n) AS `t`",
    "difficulty": "challenging"
  },
  {
    "question_id": 213,
    "db_id": "toxicology",
    "question": "What type of bond is there between the atoms TR004_8 and TR004_20?",
    "evidence": "type of bond refers to bond_type; between the atoms TR004_8 and TR004_20 refers to atom_id = 'TR004_8' AND atom_id2 = 'TR004_20' OR another way around",
    "SQL": "SELECT\n  `T1`.`bond_type`\nFROM `bond` AS `T1`\nINNER JOIN `connected` AS `T2`\n  ON `T1`.`bond_id` = `T2`.`bond_id`\nWHERE\n  `T2`.`atom_id` = 'TR004_8'\n  AND `T2`.`atom_id2` = 'TR004_20'\n  OR `T2`.`atom_id2` = 'TR004_8'\n  AND `T2`.`atom_id` = 'TR004_20'",
    "difficulty": "moderate"
  },
  {
    "question_id": 215,
    "db_id": "toxicology",
    "question": "How many atoms with iodine and with sulfur type elements are there in single bond molecules?",
    "evidence": "with iodine element refer to element = 'i'; with sulfur element refers to element = 's'; single type bond refers to bond_type = '-'; Should consider the distinct atoms when counting;",
    "SQL": "SELECT\n  COUNT(DISTINCT CASE WHEN `T1`.`element` = 'i' THEN `T1`.`atom_id` ELSE NULL END) AS `iodine_nums`,\n  COUNT(DISTINCT CASE WHEN `T1`.`element` = 's' THEN `T1`.`atom_id` ELSE NULL END) AS `sulfur_nums`\nFROM `atom` AS `T1`\nINNER JOIN `connected` AS `T2`\n  ON `T1`.`atom_id` = `T2`.`atom_id`\nINNER JOIN `bond` AS `T3`\n  ON `T2`.`bond_id` = `T3`.`bond_id`\nWHERE\n  `T3`.`bond_type` = '-'",
    "difficulty": "challenging"
  },
  {
    "question_id": 218,
    "db_id": "toxicology",
    "question": "What percentage of carcinogenic-type molecules does not contain fluorine?",
    "evidence": "label = '+' mean molecules are carcinogenic; contain fluorine refers to element = 'f'; percentage = DIVIDE(SUM(element = 'f') * 100, COUNT(molecule_id)) where label = '+'; Should consider the distinct atoms when counting;",
    "SQL": "SELECT\n  CAST(COUNT(DISTINCT CASE WHEN `T1`.`element` <> 'f' THEN `T2`.`molecule_id` ELSE NULL END) AS DOUBLE) * 100 / COUNT(DISTINCT `T2`.`molecule_id`)\nFROM `atom` AS `T1`\nINNER JOIN `molecule` AS `T2`\n  ON `T1`.`molecule_id` = `T2`.`molecule_id`\nWHERE\n  `T2`.`label` = '+'",
    "difficulty": "challenging"
  },
  {
    "question_id": 219,
    "db_id": "toxicology",
    "question": "What is the percentage of carcinogenic molecules in triple type bonds?",
    "evidence": "label = '+' mean molecules are carcinogenic; triple bond refers to bond_type = '#'; percentage = DIVIDE(SUM(bond_type = '#') * 100, COUNT(bond_id)) as percent where label = '+'",
    "SQL": "SELECT\n  CAST(COUNT(DISTINCT CASE WHEN `T2`.`label` = '+' THEN `T2`.`molecule_id` ELSE NULL END) AS DOUBLE) * 100 / COUNT(DISTINCT `T2`.`molecule_id`)\nFROM `atom` AS `T1`\nINNER JOIN `molecule` AS `T2`\n  ON `T1`.`molecule_id` = `T2`.`molecule_id`\nINNER JOIN `bond` AS `T3`\n  ON `T2`.`molecule_id` = `T3`.`molecule_id`\nWHERE\n  `T3`.`bond_type` = '#'",
    "difficulty": "challenging"
  },
  {
    "question_id": 220,
    "db_id": "toxicology",
    "question": "Please list top three elements of the toxicology of the molecule TR000 in alphabetical order.",
    "evidence": "TR000 is the molecule id;",
    "SQL": "SELECT DISTINCT\n  `T`.`element`\nFROM `atom` AS `T`\nWHERE\n  `T`.`molecule_id` = 'TR000'\nORDER BY\n  `T`.`element`\nLIMIT 3",
    "difficulty": "challenging"
  },
  {
    "question_id": 226,
    "db_id": "toxicology",
    "question": "What is the percentage of double bonds in the molecule TR008? Please provide your answer as a percentage with five decimal places.",
    "evidence": "double bond refers to bond_type = '='; TR008 is the molecule id; percentage = DIVIDE(SUM(bond_type = '='), COUNT(bond_id)) as percent where molecule_id = 'TR008'",
    "SQL": "SELECT\n  ROUND(\n    CAST(COUNT(CASE WHEN `T`.`bond_type` = '=' THEN `T`.`bond_id` ELSE NULL END) AS DOUBLE) * 100 / COUNT(`T`.`bond_id`),\n    5\n  )\nFROM `bond` AS `T`\nWHERE\n  `T`.`molecule_id` = 'TR008'",
    "difficulty": "moderate"
  },
  {
    "question_id": 227,
    "db_id": "toxicology",
    "question": "What is the percentage of molecules that are carcinogenic? Please provide your answer as a percentage with three decimal places.",
    "evidence": "label = '+' mean molecules are carcinogenic; percentage = DIVIDE(SUM(label = '+'), COUNT(molecule_id)) as percent",
    "SQL": "SELECT\n  ROUND(\n    CAST(COUNT(CASE WHEN `T`.`label` = '+' THEN `T`.`molecule_id` ELSE NULL END) AS DOUBLE) * 100 / COUNT(`T`.`molecule_id`),\n    3\n  )\nFROM `molecule` AS `t`",
    "difficulty": "simple"
  },
  {
    "question_id": 228,
    "db_id": "toxicology",
    "question": "How much of the hydrogen in molecule TR206 is accounted for? Please provide your answer as a percentage with four decimal places.",
    "evidence": "hydrogen refers to element = 'h'; TR206 is the molecule id; percentage = DIVIDE(SUM(element = 'h'), COUNT(atom_id)) as percent where molecule_id = 'TR206'",
    "SQL": "SELECT\n  ROUND(\n    CAST(COUNT(CASE WHEN `T`.`element` = 'h' THEN `T`.`atom_id` ELSE NULL END) AS DOUBLE) * 100 / COUNT(`T`.`atom_id`),\n    4\n  )\nFROM `atom` AS `T`\nWHERE\n  `T`.`molecule_id` = 'TR206'",
    "difficulty": "moderate"
  },
  {
    "question_id": 230,
    "db_id": "toxicology",
    "question": "What are the elements of the toxicology and label of molecule TR060?",
    "evidence": "TR060 is the molecule id; ",
    "SQL": "SELECT DISTINCT\n  `T1`.`element`,\n  `T2`.`label`\nFROM `atom` AS `T1`\nINNER JOIN `molecule` AS `T2`\n  ON `T1`.`molecule_id` = `T2`.`molecule_id`\nWHERE\n  `T2`.`molecule_id` = 'TR060'",
    "difficulty": "challenging"
  },
  {
    "question_id": 231,
    "db_id": "toxicology",
    "question": "Which bond type accounted for the majority of the bonds found in molecule TR010 and state whether or not this molecule is carcinogenic?",
    "evidence": "TR010 is the molecule id; majority of the bond found refers to MAX(COUNT(bond_type)); ",
    "SQL": "SELECT\n  `T`.`bond_type`\nFROM (\n  SELECT\n    `T1`.`bond_type`,\n    COUNT(`T1`.`molecule_id`)\n  FROM `bond` AS `T1`\n  WHERE\n    `T1`.`molecule_id` = 'TR010'\n  GROUP BY\n    `T1`.`bond_type`\n  ORDER BY\n    COUNT(`T1`.`molecule_id`) DESC\n  LIMIT 1\n) AS `T`",
    "difficulty": "challenging"
  },
  {
    "question_id": 232,
    "db_id": "toxicology",
    "question": "Please list top three molecules that have single bonds between two atoms and are not carcinogenic in alphabetical order.",
    "evidence": "label = '-' means molecules are not carcinogenic; single type bond refers to bond_type = '-'; list top three molecules refers to return molecule_id and order by molecule_id;",
    "SQL": "SELECT DISTINCT\n  `T2`.`molecule_id`\nFROM `bond` AS `T1`\nINNER JOIN `molecule` AS `T2`\n  ON `T1`.`molecule_id` = `T2`.`molecule_id`\nWHERE\n  `T1`.`bond_type` = '-' AND `T2`.`label` = '-'\nORDER BY\n  `T2`.`molecule_id`\nLIMIT 3",
    "difficulty": "moderate"
  },
  {
    "question_id": 234,
    "db_id": "toxicology",
    "question": "How many bonds which involved atom 12 does molecule TR009 have?",
    "evidence": "TR009 is the molecule id;  involved atom 12 refers to atom_id = 'TR009_12' or atom_id2 = 'TR009_12'",
    "SQL": "SELECT\n  COUNT(`T2`.`bond_id`)\nFROM `bond` AS `T1`\nINNER JOIN `connected` AS `T2`\n  ON `T1`.`bond_id` = `T2`.`bond_id`\nWHERE\n  `T1`.`molecule_id` = 'TR009'\n  AND `T2`.`atom_id` = CONCAT(`T1`.`molecule_id`, '_1')\n  OR `T2`.`atom_id2` = CONCAT(`T1`.`molecule_id`, '_2')",
    "difficulty": "moderate"
  },
  {
    "question_id": 236,
    "db_id": "toxicology",
    "question": "What are the bond type and the atoms of the bond ID of TR001_6_9?",
    "evidence": "atoms refer to atom_id or atom_id2",
    "SQL": "SELECT\n  `T1`.`bond_type`,\n  `T2`.`atom_id`,\n  `T2`.`atom_id2`\nFROM `bond` AS `T1`\nINNER JOIN `connected` AS `T2`\n  ON `T1`.`bond_id` = `T2`.`bond_id`\nWHERE\n  `T2`.`bond_id` = 'TR001_6_9'",
    "difficulty": "moderate"
  },
  {
    "question_id": 239,
    "db_id": "toxicology",
    "question": "How many connections does the atom 19 have?",
    "evidence": "connections refers to bond_id; atom 19 refers to atom_id like 'TR%_19';",
    "SQL": "SELECT\n  COUNT(`T`.`bond_id`)\nFROM `connected` AS `T`\nWHERE\n  SUBSTR(`T`.`atom_id`, -2) = '19'",
    "difficulty": "simple"
  },
  {
    "question_id": 240,
    "db_id": "toxicology",
    "question": "List all the elements of the toxicology of the molecule \"TR004\".",
    "evidence": "TR004 is the molecule id;",
    "SQL": "SELECT DISTINCT\n  `T`.`element`\nFROM `atom` AS `T`\nWHERE\n  `T`.`molecule_id` = 'TR004'",
    "difficulty": "challenging"
  },
  {
    "question_id": 242,
    "db_id": "toxicology",
    "question": "Among all the atoms from 21 to 25, list all the molecules that are carcinogenic.",
    "evidence": "atoms from 21 to 25 refers to SUBSTR(atom_id, 7, 2) between '21' and '25'; label = '+' mean molecules are carcinogenic",
    "SQL": "SELECT DISTINCT\n  `T2`.`molecule_id`\nFROM `atom` AS `T1`\nINNER JOIN `molecule` AS `T2`\n  ON `T1`.`molecule_id` = `T2`.`molecule_id`\nWHERE\n  SUBSTR(`T1`.`atom_id`, -2) BETWEEN '21' AND '25' AND `T2`.`label` = '+'",
    "difficulty": "moderate"
  },
  {
    "question_id": 243,
    "db_id": "toxicology",
    "question": "What are the bonds that have phosphorus and nitrogen as their atom elements?",
    "evidence": "have phosphorus as atom elements refers to element = 'p'; have nitrogen as atom elements refers to element = 'n'",
    "SQL": "SELECT\n  `T2`.`bond_id`\nFROM `atom` AS `T1`\nINNER JOIN `connected` AS `T2`\n  ON `T1`.`atom_id` = `T2`.`atom_id`\nWHERE\n  `T2`.`bond_id` IN (\n    SELECT\n      `T3`.`bond_id`\n    FROM `connected` AS `T3`\n    INNER JOIN `atom` AS `T4`\n      ON `T3`.`atom_id` = `T4`.`atom_id`\n    WHERE\n      `T4`.`element` = 'p'\n  )\n  AND `T1`.`element` = 'n'",
    "difficulty": "moderate"
  },
  {
    "question_id": 244,
    "db_id": "toxicology",
    "question": "Is the molecule with the most double bonds carcinogenic?",
    "evidence": "double bond refers to bond_type = ' = '; label = '+' mean molecules are carcinogenic",
    "SQL": "SELECT\n  `T1`.`label`\nFROM `molecule` AS `T1`\nINNER JOIN (\n  SELECT\n    `T`.`molecule_id`,\n    COUNT(`T`.`bond_type`)\n  FROM `bond` AS `T`\n  WHERE\n    `T`.`bond_type` = '='\n  GROUP BY\n    `T`.`molecule_id`\n  ORDER BY\n    COUNT(`T`.`bond_type`) DESC\n  LIMIT 1\n) AS `T2`\n  ON `T1`.`molecule_id` = `T2`.`molecule_id`",
    "difficulty": "moderate"
  },
  {
    "question_id": 245,
    "db_id": "toxicology",
    "question": "What is the average number of bonds the atoms with the element iodine have?",
    "evidence": "atoms with the element iodine refers to element = 'i'; average = DIVIDE(COUND(bond_id), COUNT(atom_id)) where element = 'i'",
    "SQL": "SELECT\n  CAST(COUNT(`T2`.`bond_id`) AS DOUBLE) / COUNT(`T1`.`atom_id`)\nFROM `atom` AS `T1`\nINNER JOIN `connected` AS `T2`\n  ON `T1`.`atom_id` = `T2`.`atom_id`\nWHERE\n  `T1`.`element` = 'i'",
    "difficulty": "moderate"
  },
  {
    "question_id": 247,
    "db_id": "toxicology",
    "question": "List all the elements of atoms that can not bond with any other atoms.",
    "evidence": " atoms cannot bond with other atoms means atom_id NOT in connected table;",
    "SQL": "SELECT DISTINCT\n  `T`.`element`\nFROM `atom` AS `T`\nWHERE\n  NOT `T`.`element` IN (\n    SELECT DISTINCT\n      `T1`.`element`\n    FROM `atom` AS `T1`\n    INNER JOIN `connected` AS `T2`\n      ON `T1`.`atom_id` = `T2`.`atom_id`\n  )",
    "difficulty": "challenging"
  },
  {
    "question_id": 248,
    "db_id": "toxicology",
    "question": "What are the atoms of the triple bond with the molecule \"TR041\"?",
    "evidence": "TR041 is the molecule id; triple bond refers to bond_type = '#';",
    "SQL": "SELECT\n  `T2`.`atom_id`,\n  `T2`.`atom_id2`\nFROM `atom` AS `T1`\nINNER JOIN `connected` AS `T2`\n  ON `T1`.`atom_id` = `T2`.`atom_id`\nINNER JOIN `bond` AS `T3`\n  ON `T2`.`bond_id` = `T3`.`bond_id`\nWHERE\n  `T3`.`bond_type` = '#' AND `T3`.`molecule_id` = 'TR041'",
    "difficulty": "simple"
  },
  {
    "question_id": 249,
    "db_id": "toxicology",
    "question": "What are the elements of the atoms of TR144_8_19?",
    "evidence": "TR144_8_19 is the bond id; ",
    "SQL": "SELECT\n  `T2`.`element`\nFROM `connected` AS `T1`\nINNER JOIN `atom` AS `T2`\n  ON `T1`.`atom_id` = `T2`.`atom_id`\nWHERE\n  `T1`.`bond_id` = 'TR144_8_19'",
    "difficulty": "challenging"
  },
  {
    "question_id": 253,
    "db_id": "toxicology",
    "question": "List the elements of all the triple bonds.",
    "evidence": "triple bond refers to bond_type = '#';",
    "SQL": "SELECT DISTINCT\n  `T3`.`element`\nFROM `bond` AS `T1`\nINNER JOIN `connected` AS `T2`\n  ON `T1`.`bond_id` = `T2`.`bond_id`\nINNER JOIN `atom` AS `T3`\n  ON `T2`.`atom_id` = `T3`.`atom_id`\nWHERE\n  `T1`.`bond_type` = '#'",
    "difficulty": "challenging"
  },
  {
    "question_id": 255,
    "db_id": "toxicology",
    "question": "What proportion of single bonds are carcinogenic? Please provide your answer as a percentage with five decimal places.",
    "evidence": "single bond refers to bond_type = '-'; label = '+' mean molecules are carcinogenic; proportion = DIVIDE(SUM(label = '+') * 100, COUNT(bond_id)) where bond_type = '-'",
    "SQL": "SELECT\n  ROUND(\n    CAST(COUNT(CASE WHEN `T2`.`label` = '+' THEN `T1`.`bond_id` ELSE NULL END) AS DOUBLE) * 100 / COUNT(`T1`.`bond_id`),\n    5\n  )\nFROM `bond` AS `T1`\nINNER JOIN `molecule` AS `T2`\n  ON `T1`.`molecule_id` = `T2`.`molecule_id`\nWHERE\n  `T1`.`bond_type` = '-'",
    "difficulty": "moderate"
  },
  {
    "question_id": 260,
    "db_id": "toxicology",
    "question": "Calculate the total atoms with triple-bond molecules containing the element phosphorus or bromine.",
    "evidence": "triple bond refers to bond_type = '#'; phosphorus refers to element = 'p'; bromine refers to element = 'br'",
    "SQL": "SELECT\n  COUNT(`T1`.`atom_id`)\nFROM `atom` AS `T1`\nINNER JOIN `molecule` AS `T2`\n  ON `T1`.`molecule_id` = `T2`.`molecule_id`\nINNER JOIN `bond` AS `T3`\n  ON `T2`.`molecule_id` = `T3`.`molecule_id`\nWHERE\n  `T3`.`bond_type` = '#' AND `T1`.`element` IN ('p', 'br')",
    "difficulty": "moderate"
  },
  {
    "question_id": 263,
    "db_id": "toxicology",
    "question": "What is the composition of element chlorine in percentage among the single bond molecules?",
    "evidence": "element chlorine refers to element = 'cl'; single bond refers to bond_type = '-'; percentage = DIVIDE(SUM(element = 'cl'), COUNT(atom_id)) as percent where bond_type = '-'",
    "SQL": "SELECT\n  CAST(COUNT(CASE WHEN `T`.`element` = 'cl' THEN `T`.`atom_id` ELSE NULL END) AS DOUBLE) * 100 / COUNT(`T`.`atom_id`)\nFROM (\n  SELECT\n    `T1`.`atom_id`,\n    `T1`.`element`\n  FROM `atom` AS `T1`\n  INNER JOIN `molecule` AS `T2`\n    ON `T1`.`molecule_id` = `T2`.`molecule_id`\n  INNER JOIN `bond` AS `T3`\n    ON `T2`.`molecule_id` = `T3`.`molecule_id`\n  WHERE\n    `T3`.`bond_type` = '-'\n) AS `T`",
    "difficulty": "challenging"
  },
  {
    "question_id": 268,
    "db_id": "toxicology",
    "question": "What are the elements for bond id TR001_10_11?",
    "evidence": "TR001_10_11 is the bond id;",
    "SQL": "SELECT\n  `T2`.`element`\nFROM `connected` AS `T1`\nINNER JOIN `atom` AS `T2`\n  ON `T1`.`atom_id` = `T2`.`atom_id`\nWHERE\n  `T1`.`bond_id` = 'TR001_10_11'",
    "difficulty": "challenging"
  },
  {
    "question_id": 273,
    "db_id": "toxicology",
    "question": "What is the percentage of element chlorine in carcinogenic molecules?",
    "evidence": "chlorine refers to element = 'cl'; label = '+' mean molecules are carcinogenic; percentage = DIVIDE(SUM(element = 'pb'); COUNT(molecule_id)) as percentage where label = '+'",
    "SQL": "SELECT\n  CAST(COUNT(CASE WHEN `T1`.`element` = 'cl' THEN `T1`.`element` ELSE NULL END) AS DOUBLE) * 100 / COUNT(`T1`.`element`)\nFROM `atom` AS `T1`\nINNER JOIN `molecule` AS `T2`\n  ON `T1`.`molecule_id` = `T2`.`molecule_id`\nWHERE\n  `T2`.`label` = '+'",
    "difficulty": "moderate"
  },
  {
    "question_id": 281,
    "db_id": "toxicology",
    "question": "Tally the toxicology element of the 4th atom of each molecule that was carcinogenic.",
    "evidence": "label = '+' means molecules are carcinogenic; 4th atom of each molecule refers to substr(atom_id, 7, 1) = '4'; ",
    "SQL": "SELECT DISTINCT\n  `T1`.`element`\nFROM `atom` AS `T1`\nINNER JOIN `molecule` AS `T2`\n  ON `T1`.`molecule_id` = `T2`.`molecule_id`\nWHERE\n  `T2`.`label` = '+'\n  AND SUBSTR(`T1`.`atom_id`, -1) = '4'\n  AND LENGTH(`T1`.`atom_id`) = 7",
    "difficulty": "challenging"
  },
  {
    "question_id": 282,
    "db_id": "toxicology",
    "question": "What is the ratio of Hydrogen elements in molecule ID TR006? List the ratio with its label.",
    "evidence": "hydrogen refers to element = 'h'; ratio = DIVIDE(SUM(element = 'h'), count(element)) where molecule_id = 'TR006' ; label = '+' mean molecules are carcinogenic; label = '-' means molecules are non-carcinogenic",
    "SQL": "WITH SubQuery AS (SELECT DISTINCT T1.atom_id, T1.element, T1.molecule_id, T2.label FROM atom AS T1 INNER JOIN molecule AS T2 ON T1.molecule_id = T2.molecule_id WHERE T2.molecule_id = 'TR006') SELECT CAST(COUNT(CASE WHEN element = 'h' THEN atom_id ELSE NULL END) AS DECIMAL(10,2)) / NULLIF(COUNT(atom_id), 0) AS ratio, label FROM SubQuery GROUP BY label",
    "difficulty": "challenging"
  },
  {
    "question_id": 327,
    "db_id": "toxicology",
    "question": "Which non-carcinogenic molecules consisted more than 5 atoms?",
    "evidence": "label = '-' means molecules are non-carcinogenic; molecules consisted more than 5 atoms refers to COUNT(molecule_id) > 5",
    "SQL": "SELECT\n  `T`.`molecule_id`\nFROM (\n  SELECT\n    `T1`.`molecule_id`,\n    COUNT(`T2`.`atom_id`)\n  FROM `molecule` AS `T1`\n  INNER JOIN `atom` AS `T2`\n    ON `T1`.`molecule_id` = `T2`.`molecule_id`\n  WHERE\n    `T1`.`label` = '-'\n  GROUP BY\n    `T1`.`molecule_id`\n  HAVING\n    COUNT(`T2`.`atom_id`) > 5\n) AS `t`",
    "difficulty": "moderate"
  },
  {
    "question_id": 5,
    "db_id": "california_schools",
    "question": "How many schools with an average score in Math greater than 400 in the SAT test are exclusively virtual?",
    "evidence": "Exclusively virtual refers to Virtual = 'F'",
    "SQL": "SELECT  COUNT(DISTINCT `T2`.`School`) FROM `satscores` AS `T1` INNER JOIN `schools` AS `T2`  ON `T1`.`cds` = `T2`.`CDSCode` WHERE  `T2`.`Virtual` = 'F' AND `T1`.`AvgScrMath` > 400",
    "difficulty": "simple"
  },
  {
    "question_id": 11,
    "db_id": "california_schools",
    "question": "Please list the codes of the schools with a total enrollment of over 500.",
    "evidence": "Total enrollment can be represented by `Enrollment (K-12)` + `Enrollment (Ages 5-17)`",
    "SQL": "SELECT\n  `T2`.`CDSCode`\nFROM `schools` AS `T1`\nINNER JOIN `frpm` AS `T2`\n  ON `T1`.`CDSCode` = `T2`.`CDSCode`\nWHERE\n  `T2`.`Enrollment (K-12)` + `T2`.`Enrollment (Ages 5-17)` > 500",
    "difficulty": "simple"
  },
  {
    "question_id": 12,
    "db_id": "california_schools",
    "question": "Among the schools with an SAT excellence rate of over 0.3, what is the highest eligible free rate for students aged 5-17?",
    "evidence": "Excellence rate = NumGE1500 / NumTstTakr; Eligible free rates for students aged 5-17 = `Free Meal Count (Ages 5-17)` / `Enrollment (Ages 5-17)`",
    "SQL": "SELECT\n  MAX(\n    CAST(`T1`.`Free Meal Count (Ages 5-17)` AS DOUBLE) / `T1`.`Enrollment (Ages 5-17)`\n  )\nFROM `frpm` AS `T1`\nINNER JOIN `satscores` AS `T2`\n  ON `T1`.`CDSCode` = `T2`.`cds`\nWHERE\n  CAST(`T2`.`NumGE1500` AS DOUBLE) / `T2`.`NumTstTakr` > 0.3",
    "difficulty": "moderate"
  },
  {
    "question_id": 17,
    "db_id": "california_schools",
    "question": "Rank schools by their average score in Writing where the score is greater than 499, showing their charter numbers.",
    "evidence": "Valid charter number means the number is not null",
    "SQL": "SELECT\n  `CharterNum`,\n  `AvgScrWrite`,\n  RANK() OVER (ORDER BY `AvgScrWrite` DESC) AS `WritingScoreRank`\nFROM `schools` AS `T1`\nINNER JOIN `satscores` AS `T2`\n  ON `T1`.`CDSCode` = `T2`.`cds`\nWHERE\n  `T2`.`AvgScrWrite` > 499 AND NOT `CharterNum` IS NULL",
    "difficulty": "simple"
  },
  {
    "question_id": 23,
    "db_id": "california_schools",
    "question": "List the names of schools with more than 30 difference in enrollements between K-12 and ages 5-17? Please also give the full street adress of the schools.",
    "evidence": "Diffrence in enrollement = `Enrollment (K-12)` - `Enrollment (Ages 5-17)`",
    "SQL": "SELECT\n  `T1`.`School`,\n  `T1`.`Street`\nFROM `schools` AS `T1`\nINNER JOIN `frpm` AS `T2`\n  ON `T1`.`CDSCode` = `T2`.`CDSCode`\nWHERE\n  `T2`.`Enrollment (K-12)` - `T2`.`Enrollment (Ages 5-17)` > 30",
    "difficulty": "moderate"
  },
  {
    "question_id": 24,
    "db_id": "california_schools",
    "question": "Give the names of the schools with the percent eligible for free meals in K-12 is more than 0.1 and test takers whose test score is greater than or equal to 1500?",
    "evidence": "Percent eligible for free meals = Free Meal Count (K-12) / Total (Enrollment (K-12)",
    "SQL": "SELECT\n  `T2`.`School Name`\nFROM `satscores` AS `T1`\nINNER JOIN `frpm` AS `T2`\n  ON `T1`.`cds` = `T2`.`CDSCode`\nWHERE\n  CAST(`T2`.`Free Meal Count (K-12)` AS DOUBLE) / `T2`.`Enrollment (K-12)` > 0.1\n  AND `T1`.`NumGE1500` > 0",
    "difficulty": "moderate"
  },
  {
    "question_id": 25,
    "db_id": "california_schools",
    "question": "Name schools in Riverside which the average of average math score for SAT is grater than 400, what is the funding type of these schools?",
    "evidence": "Average of average math = sum(average math scores) / count(schools).",
    "SQL": "SELECT\n  `T1`.`sname`,\n  `T2`.`Charter Funding Type`\nFROM `satscores` AS `T1`\nINNER JOIN `frpm` AS `T2`\n  ON `T1`.`cds` = `T2`.`CDSCode`\nWHERE\n  `T2`.`District Name` LIKE 'Riverside%'\nGROUP BY\n  `T1`.`sname`,\n  `T2`.`Charter Funding Type`\nHAVING\n  CAST(SUM(`T1`.`AvgScrMath`) AS DOUBLE) / COUNT(`T1`.`cds`) > 400",
    "difficulty": "moderate"
  },
  {
    "question_id": 26,
    "db_id": "california_schools",
    "question": "State the names and full communication address of high schools in Monterey which has more than 800 free or reduced price meals for ages 15-17?",
    "evidence": "Full communication address should include Street, City, State and zip code if any.",
    "SQL": "SELECT\n  `T1`.`School Name`,\n  `T2`.`Street`,\n  `T2`.`City`,\n  `T2`.`State`,\n  `T2`.`Zip`\nFROM `frpm` AS `T1`\nINNER JOIN `schools` AS `T2`\n  ON `T1`.`CDSCode` = `T2`.`CDSCode`\nWHERE\n  `T2`.`County` = 'Monterey'\n  AND `T1`.`Free Meal Count (Ages 5-17)` > 800\n  AND `T1`.`School Type` = 'High Schools (Public)'",
    "difficulty": "moderate"
  },
  {
    "question_id": 27,
    "db_id": "california_schools",
    "question": "What is the average score in writing for the schools that were opened after 1991 or closed before 2000? List the school names along with the score. Also, list the communication number of the schools if there is any.",
    "evidence": "Communication number refers to phone number.",
    "SQL": "SELECT\n  `T2`.`School`,\n  `T1`.`AvgScrWrite`,\n  `T2`.`Phone`\nFROM `schools` AS `T2`\nLEFT JOIN `satscores` AS `T1`\n  ON `T2`.`CDSCode` = `T1`.`cds`\nWHERE\n  DATE_FORMAT(CAST(`T2`.`OpenDate` AS DATETIME), '%Y') > '1991'\n  OR DATE_FORMAT(CAST(`T2`.`ClosedDate` AS DATETIME), '%Y') < '2000'",
    "difficulty": "moderate"
  },
  {
    "question_id": 28,
    "db_id": "california_schools",
    "question": "Consider the average difference between K-12 enrollment and 15-17 enrollment of schools that are locally funded, list the names and DOC type of schools which has a difference above this average.",
    "evidence": "Difference between K-12 enrollment and 15-17 enrollment can be computed by `Enrollment (K-12)` - `Enrollment (Ages 5-17)`",
    "SQL": "SELECT\n  `T2`.`School`,\n  `T2`.`DOC`\nFROM `frpm` AS `T1`\nINNER JOIN `schools` AS `T2`\n  ON `T1`.`CDSCode` = `T2`.`CDSCode`\nWHERE\n  `T2`.`FundingType` = 'Locally funded'\n  AND (\n    `T1`.`Enrollment (K-12)` - `T1`.`Enrollment (Ages 5-17)`\n  ) > (\n    SELECT\n      AVG(`T3`.`Enrollment (K-12)` - `T3`.`Enrollment (Ages 5-17)`)\n    FROM `frpm` AS `T3`\n    INNER JOIN `schools` AS `T4`\n      ON `T3`.`CDSCode` = `T4`.`CDSCode`\n    WHERE\n      `T4`.`FundingType` = 'Locally funded'\n  )",
    "difficulty": "challenging"
  },
  {
    "question_id": 31,
    "db_id": "california_schools",
    "question": "What is the eligible free rate of the 10th and 11th schools with the highest enrolment for students in grades 1 through 12?",
    "evidence": "K-12 refers to students in grades 1 through 12; Eligible free rate for K-12 = `Free Meal Count (K-12)` / `Enrollment (K-12)`",
    "SQL": "SELECT\n  CAST(`Free Meal Count (K-12)` AS DOUBLE) / `Enrollment (K-12)`\nFROM `frpm`\nORDER BY\n  `Enrollment (K-12)` DESC\nLIMIT 2\nOFFSET 9",
    "difficulty": "moderate"
  },
  {
    "question_id": 32,
    "db_id": "california_schools",
    "question": "What is the eligible free or reduced price meal rate for the top 5 schools in grades 1-12 with the highest free or reduced price meal count of the schools with the ownership code 66?",
    "evidence": "grades 1-12 means K-12; Eligible free or reduced price meal rate for K-12 = `FRPM Count (K-12)` / `Enrollment (K-12)`",
    "SQL": "SELECT\n  CAST(`T1`.`FRPM Count (K-12)` AS DOUBLE) / `T1`.`Enrollment (K-12)`\nFROM `frpm` AS `T1`\nINNER JOIN `schools` AS `T2`\n  ON `T1`.`CDSCode` = `T2`.`CDSCode`\nWHERE\n  `T2`.`SOC` = 66\nORDER BY\n  `T1`.`FRPM Count (K-12)` DESC\nLIMIT 5",
    "difficulty": "moderate"
  },
  {
    "question_id": 37,
    "db_id": "california_schools",
    "question": "What is the complete address of the school with the lowest excellence rate? Indicate the Street, City, Zip and State.",
    "evidence": "Execellence Rate = NumGE1500 / NumTstTakr; complete address has Street, City, State, Zip code",
    "SQL": "SELECT\n  `T2`.`Street`,\n  `T2`.`City`,\n  `T2`.`State`,\n  `T2`.`Zip`\nFROM `satscores` AS `T1`\nINNER JOIN `schools` AS `T2`\n  ON `T1`.`cds` = `T2`.`CDSCode`\nORDER BY\n  CAST(`T1`.`NumGE1500` AS DOUBLE) / `T1`.`NumTstTakr` ASC\nLIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 36,
    "db_id": "california_schools",
    "question": "Under whose administration is the school with the highest number of students scoring 1500 or more on the SAT? Indicate their full names.",
    "evidence": "full name means first name, last name; There are at most 3 administrators for each school; SAT Scores are greater or equal to 1500 refers to NumGE1500",
    "SQL": "SELECT\n  `T2`.`AdmFName1`,\n  `T2`.`AdmLName1`,\n  `T2`.`AdmFName2`,\n  `T2`.`AdmLName2`,\n  `T2`.`AdmFName3`,\n  `T2`.`AdmLName3`\nFROM `satscores` AS `T1`\nINNER JOIN `schools` AS `T2`\n  ON `T1`.`cds` = `T2`.`CDSCode`\nORDER BY\n  `T1`.`NumGE1500` DESC\nLIMIT 1",
    "difficulty": "challenging"
  },
  {
    "question_id": 39,
    "db_id": "california_schools",
    "question": "What is the average number of test takers from Fresno schools that opened between 1/1/1980 and 12/31/1980?",
    "evidence": "between 1/1/1980 and 12/31/1980 means the year = 1980",
    "SQL": "SELECT\n  AVG(`T1`.`NumTstTakr`)\nFROM `satscores` AS `T1`\nINNER JOIN `schools` AS `T2`\n  ON `T1`.`cds` = `T2`.`CDSCode`\nWHERE\n  DATE_FORMAT(CAST(`T2`.`OpenDate` AS DATETIME), '%Y') = '1980'\n  AND `T2`.`County` = 'Fresno'",
    "difficulty": "simple"
  },
  {
    "question_id": 40,
    "db_id": "california_schools",
    "question": "What is the telephone number for the school with the lowest average score in reading in Fresno Unified?",
    "evidence": "Fresno Unified is a name of district;",
    "SQL": "SELECT\n  `T2`.`Phone`\nFROM `satscores` AS `T1`\nINNER JOIN `schools` AS `T2`\n  ON `T1`.`cds` = `T2`.`CDSCode`\nWHERE\n  `T2`.`District` = 'Fresno Unified' AND NOT `T1`.`AvgScrRead` IS NULL\nORDER BY\n  `T1`.`AvgScrRead` ASC\nLIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 41,
    "db_id": "california_schools",
    "question": "List the names of virtual schools that are among the top 5 in their respective counties based on average reading scores.",
    "evidence": "Exclusively virtual refers to Virtual = 'F'; respective counties means PARTITION BY County",
    "SQL": "SELECT  School FROM ( SELECT T2.School, T1.AvgScrRead, RANK() OVER (PARTITION BY T2.County ORDER BY T1.AvgScrRead DESC) AS rnk FROM satscores AS T1 INNER JOIN schools AS T2 ON T1.cds = T2.CDSCode WHERE T2.Virtual = 'F') ranked_schools WHERE rnk <= 5",
    "difficulty": "simple"
  },
  {
    "question_id": 45,
    "db_id": "california_schools",
    "question": "What is the average writing score of each of the schools managed by Ricci Ulrich? List the schools and the corresponding average writing scores.",
    "evidence": "Usually, administrators manage the school stuff.",
    "SQL": "SELECT\n  `T2`.`School`,\n  `T1`.`AvgScrWrite`\nFROM `satscores` AS `T1`\nINNER JOIN `schools` AS `T2`\n  ON `T1`.`cds` = `T2`.`CDSCode`\nWHERE\n  `T2`.`AdmFName1` = 'Ricci' AND `T2`.`AdmLName1` = 'Ulrich'",
    "difficulty": "moderate"
  },
  {
    "question_id": 46,
    "db_id": "california_schools",
    "question": "Which state special schools have the highest number of enrollees from grades 1 through 12?",
    "evidence": "State Special Schools refers to DOC = 31; Grades 1 through 12 means K-12",
    "SQL": "SELECT\n  `T2`.`School`\nFROM `frpm` AS `T1`\nINNER JOIN `schools` AS `T2`\n  ON `T1`.`CDSCode` = `T2`.`CDSCode`\nWHERE\n  `T2`.`DOC` = 31\nORDER BY\n  `T1`.`Enrollment (K-12)` DESC\nLIMIT 1",
    "difficulty": "simple"
  },
  {
    "question_id": 47,
    "db_id": "california_schools",
    "question": "What is the monthly average number of schools that opened in Alameda County under the jurisdiction of the Elementary School District in 1980?",
    "evidence": "Elementary School District refers to DOC = 52; Monthly average number of schools that opened in 1980 = count(schools that opened in 1980) / 12",
    "SQL": "SELECT\n  CAST(COUNT(`School`) AS DOUBLE) / 12\nFROM `schools`\nWHERE\n  `DOC` = 52\n  AND `County` = 'Alameda'\n  AND DATE_FORMAT(CAST(`OpenDate` AS DATETIME), '%Y') = '1980'",
    "difficulty": "moderate"
  },
  {
    "question_id": 48,
    "db_id": "california_schools",
    "question": "What is the ratio of merged Unified School District schools in Orange County to merged Elementary School District schools?",
    "evidence": "Elementary School District refers to DOC = 52; Unified School District refers to DOC = 54.",
    "SQL": "SELECT\n  CAST(SUM(CASE WHEN `DOC` = 54 THEN 1 ELSE 0 END) AS DOUBLE) / SUM(CASE WHEN `DOC` = 52 THEN 1 ELSE 0 END)\nFROM `schools`\nWHERE\n  `StatusType` = 'Merged' AND `County` = 'Orange'",
    "difficulty": "moderate"
  },
  {
    "question_id": 50,
    "db_id": "california_schools",
    "question": "What is the postal street address for the school with the 7th highest Math average? Indicate the school's name.",
    "evidence": "Postal street and mailing street are synonyms.",
    "SQL": "SELECT\n  `T2`.`MailStreet`,\n  `T2`.`School`\nFROM `satscores` AS `T1`\nINNER JOIN `schools` AS `T2`\n  ON `T1`.`cds` = `T2`.`CDSCode`\nORDER BY\n  `T1`.`AvgScrMath` DESC\nLIMIT 1\nOFFSET 6",
    "difficulty": "simple"
  },
  {
    "question_id": 62,
    "db_id": "california_schools",
    "question": "What is the total number of non-chartered schools in the county of Los Angeles with a percent (%) of eligible free meals for grades 1 through 12 that is less than 0.18%?",
    "evidence": "non-chartered schools refer to schools whose Charter = 0; K-12 means grades 1 through 12; percent of eligible free rate for K-12 = `Free Meal Count (K-12)` * 100 / `Enrollment (K-12)`",
    "SQL": "SELECT\n  COUNT(`T2`.`School`)\nFROM `frpm` AS `T1`\nINNER JOIN `schools` AS `T2`\n  ON `T1`.`CDSCode` = `T2`.`CDSCode`\nWHERE\n  `T2`.`County` = 'Los Angeles'\n  AND `T2`.`Charter` = 0\n  AND CAST(`T1`.`Free Meal Count (K-12)` AS DOUBLE) * 100 / `T1`.`Enrollment (K-12)` < 0.18",
    "difficulty": "challenging"
  },
  {
    "question_id": 72,
    "db_id": "california_schools",
    "question": "How many students from the ages of 5 to 17 are enrolled at the State Special School school in Fremont for the 2014-2015 academic year?",
    "evidence": "State Special School means EdOpsCode = 'SSS'",
    "SQL": "SELECT\n  `T1`.`Enrollment (Ages 5-17)`\nFROM `frpm` AS `T1`\nINNER JOIN `schools` AS `T2`\n  ON `T1`.`CDSCode` = `T2`.`CDSCode`\nWHERE\n  `T2`.`EdOpsCode` = 'SSS'\n  AND `T2`.`City` = 'Fremont'\n  AND `T1`.`Academic Year` BETWEEN 2014 AND 2015",
    "difficulty": "moderate"
  },
  {
    "question_id": 77,
    "db_id": "california_schools",
    "question": "Which schools served a grade span of Kindergarten to 9th grade in the county of Los Angeles and what is its Percent (%) Eligible FRPM (Ages 5-17)?",
    "evidence": "Percent (%) Eligible FRPM (Ages 5-17) can be acquired by `FRPM Count (Ages 5-17)` / `Enrollment (Ages 5-17)` * 100",
    "SQL": "SELECT\n  `T2`.`School`,\n  `T1`.`FRPM Count (Ages 5-17)` * 100 / `T1`.`Enrollment (Ages 5-17)`\nFROM `frpm` AS `T1`\nINNER JOIN `schools` AS `T2`\n  ON `T1`.`CDSCode` = `T2`.`CDSCode`\nWHERE\n  `T2`.`County` = 'Los Angeles' AND `T2`.`GSserved` = 'K-9'",
    "difficulty": "moderate"
  },
  {
    "question_id": 79,
    "db_id": "california_schools",
    "question": "Between San Diego and Santa Barbara, which county offers the most number of schools that does not offer physical building? Indicate the amount.",
    "evidence": "'Does not offer physical building' means Virtual = F in the database.",
    "SQL": "SELECT\n  `County`,\n  COUNT(`Virtual`)\nFROM `schools`\nWHERE\n  (\n    `County` = 'San Diego' OR `County` = 'Santa Barbara'\n  ) AND `Virtual` = 'F'\nGROUP BY\n  `County`\nORDER BY\n  COUNT(`Virtual`) DESC\nLIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 82,
    "db_id": "california_schools",
    "question": "What is the grade span offered in the school with the highest longitude?",
    "evidence": "the highest longitude refers to the school with the maximum absolute longitude value.",
    "SQL": "SELECT\n  `GSoffered`\nFROM `schools`\nORDER BY\n  ABS(`longitude`) DESC\nLIMIT 1",
    "difficulty": "simple"
  },
  {
    "question_id": 83,
    "db_id": "california_schools",
    "question": "Of the schools that offers a magnet program serving a grade span of Kindergarten to 8th grade, how many offers Multiple Provision Types? List the number of cities that offers a Kindergarten to 8th grade span and indicate how many schools are there serving such grade span for each city.",
    "evidence": "Kindergarten to 8th grade refers to K-8; 'Offers a magnet program' means Magnet = 1; Multiple Provision Types refers to `NSLP Provision Status` = 'Multiple Provision Types'",
    "SQL": "SELECT\n  `T2`.`City`,\n  COUNT(`T2`.`CDSCode`)\nFROM `frpm` AS `T1`\nINNER JOIN `schools` AS `T2`\n  ON `T1`.`CDSCode` = `T2`.`CDSCode`\nWHERE\n  `T2`.`Magnet` = 1\n  AND `T2`.`GSoffered` = 'K-8'\n  AND `T1`.`NSLP Provision Status` = 'Multiple Provision Types'\nGROUP BY\n  `T2`.`City`",
    "difficulty": "challenging"
  },
  {
    "question_id": 85,
    "db_id": "california_schools",
    "question": "What is the Percent (%) Eligible Free (K-12) in the school administered by an administrator whose first name is Alusine. List the district code of the school.",
    "evidence": "Percent (%) Eligible Free (K-12) = `Free Meal Count (K-12)` / `Enrollment (K-12)` * 100%",
    "SQL": "SELECT\n  `T1`.`Free Meal Count (K-12)` * 100 / `T1`.`Enrollment (K-12)`,\n  `T1`.`District Code`\nFROM `frpm` AS `T1`\nINNER JOIN `schools` AS `T2`\n  ON `T1`.`CDSCode` = `T2`.`CDSCode`\nWHERE\n  `T2`.`AdmFName1` = 'Alusine'",
    "difficulty": "moderate"
  },
  {
    "question_id": 87,
    "db_id": "california_schools",
    "question": "What are the valid e-mail addresses of the administrator of the school located in the San Bernardino county, City of San Bernardino City Unified that opened between 1/1/2009 to 12/31/2010 whose school types are public Intermediate/Middle Schools and Unified Schools?",
    "evidence": "Intermediate/Middle Schools refers to SOC = 62; Unified School refers to DOC = 54; years between 2009 and 2010 can refer to 'between 1/1/2009 to 12/31/2010'",
    "SQL": "SELECT\n  `T2`.`AdmEmail1`,\n  `T2`.`AdmEmail2`\nFROM `frpm` AS `T1`\nINNER JOIN `schools` AS `T2`\n  ON `T1`.`CDSCode` = `T2`.`CDSCode`\nWHERE\n  `T2`.`County` = 'San Bernardino'\n  AND `T2`.`City` = 'San Bernardino'\n  AND `T2`.`DOC` = 54\n  AND DATE_FORMAT(CAST(`T2`.`OpenDate` AS DATETIME), '%Y') BETWEEN '2009' AND '2010'\n  AND `T2`.`SOC` = 62",
    "difficulty": "challenging"
  },
  {
    "question_id": 89,
    "db_id": "financial",
    "question": "How many accounts who choose issuance after transaction are staying in East Bohemia region?",
    "evidence": "A3 contains the data of region; 'POPLATEK PO OBRATU' represents for 'issuance after transaction'.",
    "SQL": "SELECT\n  COUNT(`T2`.`account_id`)\nFROM `district` AS `T1`\nINNER JOIN `account` AS `T2`\n  ON `T1`.`district_id` = `T2`.`district_id`\nWHERE\n  `T1`.`A3` = 'east Bohemia' AND `T2`.`frequency` = 'POPLATEK PO OBRATU'",
    "difficulty": "moderate"
  },
  {
    "question_id": 92,
    "db_id": "financial",
    "question": "List out the no. of districts that have female average salary is more than 6000 but less than 10000?",
    "evidence": "A11 refers to average salary; Female mapps to gender = 'F'",
    "SQL": "SELECT\n  COUNT(DISTINCT `T2`.`district_id`)\nFROM `client` AS `T1`\nINNER JOIN `district` AS `T2`\n  ON `T1`.`district_id` = `T2`.`district_id`\nWHERE\n  `T1`.`gender` = 'F' AND `T2`.`A11` BETWEEN 6000 AND 10000",
    "difficulty": "simple"
  },
  {
    "question_id": 93,
    "db_id": "financial",
    "question": "How many male customers who are living in North Bohemia have average salary greater than 8000?",
    "evidence": "Male means that gender = 'M'; A3 refers to region; A11 pertains to average salary.",
    "SQL": "SELECT\n  COUNT(`T1`.`client_id`)\nFROM `client` AS `T1`\nINNER JOIN `district` AS `T2`\n  ON `T1`.`district_id` = `T2`.`district_id`\nWHERE\n  `T1`.`gender` = 'M' AND `T2`.`A3` = 'north Bohemia' AND `T2`.`A11` > 8000",
    "difficulty": "moderate"
  },
  {
    "question_id": 94,
    "db_id": "financial",
    "question": "List out the account numbers of female clients who are oldest and has lowest average salary, calculate the gap between this lowest average salary with the highest average salary?",
    "evidence": "Female means gender = 'F'; A11 refers to average salary; Gap = highest average salary - lowest average salary; If the person A's birthdate > B's birthdate, it means that person B is order than person A.",
    "SQL": "SELECT\n  `T1`.`account_id`,\n  (\n    SELECT\n      MAX(`A11`) - MIN(`A11`)\n    FROM `district`\n  )\nFROM `account` AS `T1`\nINNER JOIN `district` AS `T2`\n  ON `T1`.`district_id` = `T2`.`district_id`\nINNER JOIN `disp` AS `T3`\n  ON `T1`.`account_id` = `T3`.`account_id`\nINNER JOIN `client` AS `T4`\n  ON `T3`.`client_id` = `T4`.`client_id`\nWHERE\n  `T2`.`district_id` = (\n    SELECT\n      `district_id`\n    FROM `client`\n    WHERE\n      `gender` = 'F'\n    ORDER BY\n      `birth_date` ASC\n    LIMIT 1\n  )\nORDER BY\n  `T2`.`A11` DESC\nLIMIT 1",
    "difficulty": "challenging"
  },
  {
    "question_id": 95,
    "db_id": "financial",
    "question": "List out the account numbers of clients who are youngest and have highest average salary?",
    "evidence": "If the person A's birthdate < B's birthdate, it means that person B is younger than person A; A11 refers to average salary",
    "SQL": "SELECT\n  `T1`.`account_id`\nFROM `account` AS `T1`\nINNER JOIN `disp` AS `T2`\n  ON `T1`.`account_id` = `T2`.`account_id`\nINNER JOIN `client` AS `T3`\n  ON `T2`.`client_id` = `T3`.`client_id`\nINNER JOIN `district` AS `T4`\n  ON `T4`.`district_id` = `T1`.`district_id`\nWHERE\n  `T2`.`client_id` = (\n    SELECT\n      `client_id`\n    FROM `client`\n    ORDER BY\n      `birth_date` DESC\n    LIMIT 1\n  )\nGROUP BY\n  `T4`.`A11`,\n  `T1`.`account_id`",
    "difficulty": "moderate"
  },
  {
    "question_id": 98,
    "db_id": "financial",
    "question": "Among the accounts who have approved loan date in 1997, list out the accounts that have the lowest approved amount and choose weekly issuance statement.",
    "evidence": "'POPLATEK TYDNE' stands for weekly issuance",
    "SQL": "SELECT\n  `T2`.`account_id`\nFROM `loan` AS `T1`\nINNER JOIN `account` AS `T2`\n  ON `T1`.`account_id` = `T2`.`account_id`\nWHERE\n  DATE_FORMAT(CAST(`T1`.`date` AS DATETIME), '%Y') = '1997'\n  AND `T2`.`frequency` = 'POPLATEK TYDNE'\nORDER BY\n  `T1`.`amount`\nLIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 99,
    "db_id": "financial",
    "question": "Among the accounts who have loan validity more than 12 months, list out the accounts that have the highest approved amount and have account opening date in 1993.",
    "evidence": "Loan validity more than 12 months refers to duration > 12",
    "SQL": "SELECT\n  `T1`.`account_id`\nFROM `loan` AS `T1`\nINNER JOIN `account` AS `T2`\n  ON `T1`.`account_id` = `T2`.`account_id`\nWHERE\n  DATE_FORMAT(CAST(`T2`.`date` AS DATETIME), '%Y') = '1993'\n  AND `T1`.`duration` > 12\nORDER BY\n  `T1`.`amount` DESC\nLIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 100,
    "db_id": "financial",
    "question": "Among the account opened, how many female customers who were born before 1950 and stayed in Sokolov?",
    "evidence": "Customers refer to clients; Female refers to gender = 'F'; Names of districts appear in column A2",
    "SQL": "SELECT\n  COUNT(`T2`.`client_id`)\nFROM `district` AS `T1`\nINNER JOIN `client` AS `T2`\n  ON `T1`.`district_id` = `T2`.`district_id`\nWHERE\n  `T2`.`gender` = 'F'\n  AND DATE_FORMAT(CAST(`T2`.`birth_date` AS DATETIME), '%Y') < '1950'\n  AND `T1`.`A2` = 'Sokolov'",
    "difficulty": "moderate"
  },
  {
    "question_id": 112,
    "db_id": "financial",
    "question": "For the female client who was born in 1976/1/29, which district did she opened her account?",
    "evidence": "Female refers to gender = 'F'; A2 refers to district names",
    "SQL": "SELECT\n  `T1`.`A2`\nFROM `district` AS `T1`\nINNER JOIN `client` AS `T2`\n  ON `T1`.`district_id` = `T2`.`district_id`\nWHERE\n  `T2`.`birth_date` = '1976-01-29' AND `T2`.`gender` = 'F'",
    "difficulty": "simple"
  },
  {
    "question_id": 115,
    "db_id": "financial",
    "question": "For the branch which located in the south Bohemia with biggest number of inhabitants, what is the percentage of the male clients?",
    "evidence": "Percentage of the male clients = DIVIDE(COUNT(male clients), COUNT(clients)) * 100; Male refers to gender = 'M', A3 is the region name. A4 contains the information about inhabitants.",
    "SQL": "SELECT\n  CAST(SUM(`T1`.`gender` = 'M') AS DOUBLE) * 100 / COUNT(`T1`.`client_id`)\nFROM `client` AS `T1`\nINNER JOIN `district` AS `T2`\n  ON `T1`.`district_id` = `T2`.`district_id`\nWHERE\n  `T2`.`A3` = 'south Bohemia'\nGROUP BY\n  `T2`.`A4`\nORDER BY\n  `T2`.`A4` DESC\nLIMIT 1",
    "difficulty": "challenging"
  },
  {
    "question_id": 116,
    "db_id": "financial",
    "question": "For the client whose loan was approved first in 1993/7/5, what is the increase rate of his/her account balance from 1993/3/22 to 1998/12/27?",
    "evidence": "Increase rate of his/her account balance = [(balance of date A - balance of date B) / balance of Date B] * 100%",
    "SQL": "SELECT\n  CAST((\n    SUM(CASE WHEN `T3`.`date` = '1998-12-27' THEN `T3`.`balance` ELSE 0 END) - SUM(CASE WHEN `T3`.`date` = '1993-03-22' THEN `T3`.`balance` ELSE 0 END)\n  ) AS DOUBLE) * 100 / SUM(CASE WHEN `T3`.`date` = '1993-03-22' THEN `T3`.`balance` ELSE 0 END)\nFROM `loan` AS `T1`\nINNER JOIN `account` AS `T2`\n  ON `T1`.`account_id` = `T2`.`account_id`\nINNER JOIN `trans` AS `T3`\n  ON `T3`.`account_id` = `T2`.`account_id`\nWHERE\n  `T1`.`date` = '1993-07-05'",
    "difficulty": "challenging"
  },
  {
    "question_id": 117,
    "db_id": "financial",
    "question": "What is the percentage of loan amount that has been fully paid with no issue.",
    "evidence": "Loan paid with no issue means contract finished, no problems; status = 'A' means contract finished, no problems; Percentage of accounts by condition = [(total(amount) & condition) / (total amount)] * 100%",
    "SQL": "SELECT\n  (\n    CAST(SUM(CASE WHEN `status` = 'A' THEN `amount` ELSE 0 END) AS DOUBLE) * 100\n  ) / SUM(`amount`)\nFROM `loan`",
    "difficulty": "moderate"
  },
  {
    "question_id": 118,
    "db_id": "financial",
    "question": "For loan amount less than USD100,000, what is the percentage of accounts that is still running with no issue.",
    "evidence": "Status = 'C' stands for running contract, ok so far; Percentage of accounts by condition = [(total(amount) & condition) / (total amount)] * 100.",
    "SQL": "SELECT\n  CAST(SUM(`status` = 'C') AS DOUBLE) * 100 / COUNT(`account_id`)\nFROM `loan`\nWHERE\n  `amount` < 100000",
    "difficulty": "moderate"
  },
  {
    "question_id": 125,
    "db_id": "financial",
    "question": "For loans contracts which are still running where client are in debt, list the district of the and the state the percentage unemployment rate increment from year 1995 to 1996.",
    "evidence": "Unemployment increment rate in percentage = [(unemployment rate 2016 - unemployment rate 2015) / unemployment rate 2015] * 100; unemployment rate 2015 appears in the A12; unemployment rate 2016 appears in the A13; Loan contracts which are still running where client are in debt can be presented as status = 'D'",
    "SQL": "SELECT\n  CAST((\n    `T3`.`A13` - `T3`.`A12`\n  ) AS DOUBLE) * 100 / `T3`.`A12`\nFROM `loan` AS `T1`\nINNER JOIN `account` AS `T2`\n  ON `T1`.`account_id` = `T2`.`account_id`\nINNER JOIN `district` AS `T3`\n  ON `T2`.`district_id` = `T3`.`district_id`\nWHERE\n  `T1`.`status` = 'D'",
    "difficulty": "challenging"
  },
  {
    "question_id": 128,
    "db_id": "financial",
    "question": "List the top nine districts, by descending order, from the highest to the lowest, the number of female account holders.",
    "evidence": "A2 refers to districts; Female refers to gender = 'F'",
    "SQL": "SELECT\n  `T2`.`A2`,\n  COUNT(`T1`.`client_id`)\nFROM `client` AS `T1`\nINNER JOIN `district` AS `T2`\n  ON `T1`.`district_id` = `T2`.`district_id`\nWHERE\n  `T1`.`gender` = 'F'\nGROUP BY\n  `T2`.`district_id`,\n  `T2`.`A2`\nORDER BY\n  COUNT(`T1`.`client_id`) DESC\nLIMIT 9",
    "difficulty": "moderate"
  },
  {
    "question_id": 136,
    "db_id": "financial",
    "question": "Between 1/1/1995 and 12/31/1997, how many loans in the amount of at least 250,000 per account that chose monthly statement issuance were approved?",
    "evidence": "Frequency = 'POPLATEK MESICNE' stands for monthly issurance",
    "SQL": "SELECT\n  COUNT(`T1`.`account_id`)\nFROM `account` AS `T1`\nINNER JOIN `loan` AS `T2`\n  ON `T1`.`account_id` = `T2`.`account_id`\nWHERE\n  `T2`.`date` BETWEEN '1995-01-01' AND '1997-12-31'\n  AND `T1`.`frequency` = 'POPLATEK MESICNE'\n  AND `T2`.`amount` >= 250000",
    "difficulty": "moderate"
  },
  {
    "question_id": 137,
    "db_id": "financial",
    "question": "How many accounts have running contracts in Branch location 1?",
    "evidence": "Status = 'C' stands for running contract, OK so far; Status = 'D' stands for running contract, client in debt",
    "SQL": "SELECT\n  COUNT(`T1`.`account_id`)\nFROM `account` AS `T1`\nINNER JOIN `district` AS `T2`\n  ON `T1`.`district_id` = `T2`.`district_id`\nINNER JOIN `loan` AS `T3`\n  ON `T1`.`account_id` = `T3`.`account_id`\nWHERE\n  `T1`.`district_id` = 1 AND (\n    `T3`.`status` = 'C' OR `T3`.`status` = 'D'\n  )",
    "difficulty": "moderate"
  },
  {
    "question_id": 138,
    "db_id": "financial",
    "question": "In the branch where the second-highest number of crimes were committed in 1995 occurred, how many male clients are there?",
    "evidence": "Male refers to gender = 'M'; A15 stands for no. of commited crimes 1995",
    "SQL": "SELECT\n  COUNT(`T1`.`client_id`)\nFROM `client` AS `T1`\nINNER JOIN `district` AS `T2`\n  ON `T1`.`district_id` = `T2`.`district_id`\nWHERE\n  `T1`.`gender` = 'M'\n  AND `T2`.`A15` = (\n    SELECT\n      `T3`.`A15`\n    FROM `district` AS `T3`\n    ORDER BY\n      `T3`.`A15` DESC\n    LIMIT 1\n    OFFSET 1\n  )",
    "difficulty": "moderate"
  },
  {
    "question_id": 129,
    "db_id": "financial",
    "question": "Which are the top ten withdrawals (non-credit card) by district names for the month of January 1996?",
    "evidence": "Non-credit card withdraws refers to type = 'VYDAJ'; January 1996 can be found by date LIKE '1996-01%' in the database; A2 means district names",
    "SQL": "SELECT DISTINCT\n  `T1`.`A2`\nFROM `district` AS `T1`\nINNER JOIN `account` AS `T2`\n  ON `T1`.`district_id` = `T2`.`district_id`\nINNER JOIN `trans` AS `T3`\n  ON `T2`.`account_id` = `T3`.`account_id`\nWHERE\n  `T3`.`type` = 'VYDAJ' AND `T3`.`date` LIKE '1996-01%'\nORDER BY\n  `A2` ASC\nLIMIT 10",
    "difficulty": "moderate"
  },
  {
    "question_id": 137,
    "db_id": "financial",
    "question": "How many accounts have running contracts in Branch location 1?",
    "evidence": "Status = 'C' stands for running contract, OK so far; Status = 'D' stands for running contract, client in debt",
    "SQL": "SELECT\n  COUNT(`T1`.`account_id`)\nFROM `account` AS `T1`\nINNER JOIN `district` AS `T2`\n  ON `T1`.`district_id` = `T2`.`district_id`\nINNER JOIN `loan` AS `T3`\n  ON `T1`.`account_id` = `T3`.`account_id`\nWHERE\n  `T1`.`district_id` = 1 AND (\n    `T3`.`status` = 'C' OR `T3`.`status` = 'D'\n  )",
    "difficulty": "moderate"
  },
  {
    "question_id": 138,
    "db_id": "financial",
    "question": "In the branch where the second-highest number of crimes were committed in 1995 occurred, how many male clients are there?",
    "evidence": "Male refers to gender = 'M'; A15 stands for no. of commited crimes 1995",
    "SQL": "SELECT\n  COUNT(`T1`.`client_id`)\nFROM `client` AS `T1`\nINNER JOIN `district` AS `T2`\n  ON `T1`.`district_id` = `T2`.`district_id`\nWHERE\n  `T1`.`gender` = 'M'\n  AND `T2`.`A15` = (\n    SELECT\n      `T3`.`A15`\n    FROM `district` AS `T3`\n    ORDER BY\n      `T3`.`A15` DESC\n    LIMIT 1\n    OFFSET 1\n  )",
    "difficulty": "moderate"
  },
  {
    "question_id": 145,
    "db_id": "financial",
    "question": "Who are the account holder identification numbers whose who have transactions on the credit card with the amount is less than the average, in 1998?",
    "evidence": "Operation = 'VYBER KARTOU' refers to credit card withdrawal",
    "SQL": "SELECT\n  `T1`.`account_id`\nFROM `trans` AS `T1`\nINNER JOIN `account` AS `T2`\n  ON `T1`.`account_id` = `T2`.`account_id`\nWHERE\n  DATE_FORMAT(CAST(`T1`.`date` AS DATETIME), '%Y') = '1998'\n  AND `T1`.`operation` = 'VYBER KARTOU'\n  AND `T1`.`amount` < (\n    SELECT\n      AVG(`amount`)\n    FROM `trans`\n    WHERE\n      DATE_FORMAT(CAST(`date` AS DATETIME), '%Y') = '1998'\n  )",
    "difficulty": "moderate"
  },
  {
    "question_id": 149,
    "db_id": "financial",
    "question": "Please list the account types that are not eligible for loans, and the average income of residents in the district where the account is located exceeds $8000 but is no more than $9000.",
    "evidence": "A11 represents the average salary; Salary and income share the similar meanings; when the account type = 'OWNER', it's eligible for loans",
    "SQL": "SELECT\n  `T3`.`type`\nFROM `district` AS `T1`\nINNER JOIN `account` AS `T2`\n  ON `T1`.`district_id` = `T2`.`district_id`\nINNER JOIN `disp` AS `T3`\n  ON `T2`.`account_id` = `T3`.`account_id`\nWHERE\n  `T3`.`type` <> 'OWNER' AND `T1`.`A11` BETWEEN 8000 AND 9000",
    "difficulty": "challenging"
  },
  {
    "question_id": 152,
    "db_id": "financial",
    "question": "What is the average number of crimes committed in 1995 in regions where the number exceeds 4000 and the region has accounts that are opened starting from the year 1997?",
    "evidence": "A3 refers to region names; A15 stands for the average number of crimes commited in 1995.",
    "SQL": "SELECT\n  AVG(`T1`.`A15`)\nFROM `district` AS `T1`\nINNER JOIN `account` AS `T2`\n  ON `T1`.`district_id` = `T2`.`district_id`\nWHERE\n  DATE_FORMAT(CAST(`T2`.`date` AS DATETIME), '%Y') >= '1997' AND `T1`.`A15` > 4000",
    "difficulty": "moderate"
  },
  {
    "question_id": 159,
    "db_id": "financial",
    "question": "List all the withdrawals in cash transactions that the client with the id 3356 makes.",
    "evidence": "operation = 'VYBER' refers to withdrawal in cash",
    "SQL": "SELECT\n  `T4`.`trans_id`\nFROM `client` AS `T1`\nINNER JOIN `disp` AS `T2`\n  ON `T1`.`client_id` = `T2`.`client_id`\nINNER JOIN `account` AS `T3`\n  ON `T2`.`account_id` = `T3`.`account_id`\nINNER JOIN `trans` AS `T4`\n  ON `T3`.`account_id` = `T4`.`account_id`\nWHERE\n  `T1`.`client_id` = 3356 AND `T4`.`operation` = 'VYBER'",
    "difficulty": "simple"
  },
  {
    "question_id": 168,
    "db_id": "financial",
    "question": "What percentage of clients who opened their accounts in the district with an average salary of over 10000 are women?",
    "evidence": "Female refers to gender = 'F'; Woman and female are closed; Average salary can be found in A11",
    "SQL": "SELECT\n  CAST(SUM(`T2`.`gender` = 'F') AS DOUBLE) * 100 / COUNT(`T2`.`client_id`)\nFROM `district` AS `T1`\nINNER JOIN `client` AS `T2`\n  ON `T1`.`district_id` = `T2`.`district_id`\nWHERE\n  `T1`.`A11` > 10000",
    "difficulty": "moderate"
  },
  {
    "question_id": 169,
    "db_id": "financial",
    "question": "What was the growth rate of the total amount of loans across all accounts for a male client between 1996 and 1997?",
    "evidence": "Growth rate = (sum of amount_1997 - sum of amount_1996) / (sum of amount_1996) * 100%; Male refers to gender = 'M'",
    "SQL": "SELECT\n  CAST((\n    SUM(\n      CASE\n        WHEN DATE_FORMAT(CAST(`T1`.`date` AS DATETIME), '%Y') = '1997'\n        THEN `T1`.`amount`\n        ELSE 0\n      END\n    ) - SUM(\n      CASE\n        WHEN DATE_FORMAT(CAST(`T1`.`date` AS DATETIME), '%Y') = '1996'\n        THEN `T1`.`amount`\n        ELSE 0\n      END\n    )\n  ) AS DOUBLE) * 100 / SUM(\n    CASE\n      WHEN DATE_FORMAT(CAST(`T1`.`date` AS DATETIME), '%Y') = '1996'\n      THEN `T1`.`amount`\n      ELSE 0\n    END\n  )\nFROM `loan` AS `T1`\nINNER JOIN `account` AS `T2`\n  ON `T1`.`account_id` = `T2`.`account_id`\nINNER JOIN `disp` AS `T3`\n  ON `T3`.`account_id` = `T2`.`account_id`\nINNER JOIN `client` AS `T4`\n  ON `T4`.`client_id` = `T3`.`client_id`\nWHERE\n  `T4`.`gender` = 'M' AND `T3`.`type` = 'OWNER'",
    "difficulty": "challenging"
  },
  {
    "question_id": 173,
    "db_id": "financial",
    "question": "How often does account number 3 request an account statement to be released? What was the aim of debiting 3539 in total?",
    "evidence": "k_symbol refers to the purpose of payments",
    "SQL": "SELECT\n  `T1`.`frequency`,\n  `T2`.`k_symbol`\nFROM `account` AS `T1`\nINNER JOIN (\n  SELECT\n    `account_id`,\n    `k_symbol`,\n    SUM(`amount`) AS `total_amount`\n  FROM `order`\n  GROUP BY\n    `account_id`,\n    `k_symbol`\n) AS `T2`\n  ON `T1`.`account_id` = `T2`.`account_id`\nWHERE\n  `T1`.`account_id` = 3 AND `T2`.`total_amount` = 3539",
    "difficulty": "challenging"
  },
  {
    "question_id": 186,
    "db_id": "financial",
    "question": "What percentage of male clients request for weekly statements to be issued?",
    "evidence": "Percentage of male clients = [count(male clients who requested weekly statements / count(clients who requested weekly statements)] * 100%; Male means gender = 'M'; 'POPLATEK TYDNE' stands for weekly issuance",
    "SQL": "SELECT\n  CAST(SUM(`T1`.`gender` = 'M') AS DOUBLE) * 100 / COUNT(`T1`.`client_id`)\nFROM `client` AS `T1`\nINNER JOIN `district` AS `T3`\n  ON `T1`.`district_id` = `T3`.`district_id`\nINNER JOIN `account` AS `T2`\n  ON `T2`.`district_id` = `T3`.`district_id`\nINNER JOIN `disp` AS `T4`\n  ON `T1`.`client_id` = `T4`.`client_id` AND `T2`.`account_id` = `T4`.`account_id`\nWHERE\n  `T2`.`frequency` = 'POPLATEK TYDNE'",
    "difficulty": "moderate"
  },
  {
    "question_id": 189,
    "db_id": "financial",
    "question": "Name the account numbers of female clients who are oldest and have lowest average salary?",
    "evidence": "Female refers to 'F' in the gender; A11 contains information about average salary",
    "SQL": "SELECT\n  `T3`.`account_id`\nFROM `client` AS `T1`\nINNER JOIN `district` AS `T2`\n  ON `T1`.`district_id` = `T2`.`district_id`\nINNER JOIN `account` AS `T3`\n  ON `T2`.`district_id` = `T3`.`district_id`\nINNER JOIN `disp` AS `T4`\n  ON `T1`.`client_id` = `T4`.`client_id` AND `T4`.`account_id` = `T3`.`account_id`\nWHERE\n  `T1`.`gender` = 'F'\nORDER BY\n  `T1`.`birth_date` ASC,\n  `T2`.`A11` ASC\nLIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 192,
    "db_id": "financial",
    "question": "What is the average amount of loan which are still on running contract with statement issuance after each transaction?",
    "evidence": "status = 'C' stands for running contract, OK so far; status = 'D' stands for running contract, client in debt. 'POPLATEK PO OBRATU' stands for issuance after transaction",
    "SQL": "SELECT\n  AVG(`T2`.`amount`)\nFROM `account` AS `T1`\nINNER JOIN `loan` AS `T2`\n  ON `T1`.`account_id` = `T2`.`account_id`\nWHERE\n  `T2`.`status` IN ('C', 'D') AND `T1`.`frequency` = 'POPLATEK PO OBRATU'",
    "difficulty": "moderate"
  },
  {
    "question_id": 194,
    "db_id": "financial",
    "question": "Provide the IDs and age of the client with high level credit card, which is eligible for loans.",
    "evidence": "the credit card is high-level refers to card.type = 'gold'; eligible for loans refers to disp.type = 'OWNER'",
    "SQL": "SELECT\n  `T1`.`client_id`,\n  DATE_FORMAT(CAST(CURRENT_TIMESTAMP() AS DATETIME), '%Y') - DATE_FORMAT(CAST(`T3`.`birth_date` AS DATETIME), '%Y')\nFROM `disp` AS `T1`\nINNER JOIN `card` AS `T2`\n  ON `T2`.`disp_id` = `T1`.`disp_id`\nINNER JOIN `client` AS `T3`\n  ON `T1`.`client_id` = `T3`.`client_id`\nWHERE\n  `T2`.`type` = 'gold' AND `T1`.`type` = 'OWNER'",
    "difficulty": "moderate"
  }
]
