[
  {
    "question_id": 1471,
    "db_id": "debit_card_specializing",
    "question": "What is the ratio of customers who pay in EUR against customers who pay in CZK?",
    "evidence": "ratio of customers who pay in EUR against customers who pay in CZK = count(Currency = 'EUR') / count(Currency = 'CZK').",
    "SQL": "SELECT CAST(SUM(IIF(Currency = 'EUR', 1, 0)) AS FLOAT) / SUM(IIF(Currency = 'CZK', 1, 0)) AS ratio FROM customers",
    "difficulty": "simple"
  },
  {
    "question_id": 1472,
    "db_id": "debit_card_specializing",
    "question": "In 2012, who had the least consumption in LAM?",
    "evidence": "Year 2012 can be presented as Between 201201 And 201212; The first 4 strings of the Date values in the yearmonth table can represent year.",
    "SQL": "SELECT T1.CustomerID FROM customers AS T1 INNER JOIN yearmonth AS T2 ON T1.CustomerID = T2.CustomerID WHERE T1.Segment = 'LAM' AND SUBSTR(T2.Date, 1, 4) = '2012' GROUP BY T1.CustomerID ORDER BY SUM(T2.Consumption) ASC LIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 1473,
    "db_id": "debit_card_specializing",
    "question": "What was the average monthly consumption of customers in SME for the year 2013?",
    "evidence": "Average Monthly consumption = AVG(Consumption) / 12; Year 2013 can be presented as Between 201301 And 201312; The first 4 strings of the Date values in the yearmonth table can represent year.",
    "SQL": "SELECT AVG(T2.Consumption) / 12 FROM customers AS T1 INNER JOIN yearmonth AS T2 ON T1.CustomerID = T2.CustomerID WHERE SUBSTR(T2.Date, 1, 4) = '2013' AND T1.Segment = 'SME'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1476,
    "db_id": "debit_card_specializing",
    "question": "What was the difference in gas consumption between CZK-paying customers and EUR-paying customers in 2012?",
    "evidence": "Year 2012 can be presented as Between 201201 And 201212; The first 4 strings of the Date values in the yearmonth table can represent year; Difference in Consumption = CZK customers consumption in 2012 - EUR customers consumption in 2012",
    "SQL": "SELECT SUM(IIF(T1.Currency = 'CZK', T2.Consumption, 0)) - SUM(IIF(T1.Currency = 'EUR', T2.Consumption, 0)) FROM customers AS T1 INNER JOIN yearmonth AS T2 ON T1.CustomerID = T2.CustomerID WHERE SUBSTR(T2.Date, 1, 4) = '2012'",
    "difficulty": "challenging"
  },
  {
    "question_id": 1479,
    "db_id": "debit_card_specializing",
    "question": "Which year recorded the most consumption of gas paid in CZK?",
    "evidence": "The first 4 strings of the Date values in the yearmonth table can represent year.",
    "SQL": "SELECT SUBSTR(T2.Date, 1, 4) FROM customers AS T1 INNER JOIN yearmonth AS T2 ON T1.CustomerID = T2.CustomerID WHERE T1.Currency = 'CZK' GROUP BY SUBSTR(T2.Date, 1, 4) ORDER BY SUM(T2.Consumption) DESC LIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 1480,
    "db_id": "debit_card_specializing",
    "question": "What was the gas consumption peak month for SME customers in 2013?",
    "evidence": "Year 2013 can be presented as Between 201301 And 201312; The first 4 strings of the Date values in the yearmonth table can represent year; The 5th and 6th string of the date can refer to month.",
    "SQL": "SELECT SUBSTR(T2.Date, 5, 2) FROM customers AS T1 INNER JOIN yearmonth AS T2 ON T1.CustomerID = T2.CustomerID WHERE SUBSTR(T2.Date, 1, 4) = '2013' AND T1.Segment = 'SME' GROUP BY SUBSTR(T2.Date, 5, 2) ORDER BY SUM(T2.Consumption) DESC LIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 1481,
    "db_id": "debit_card_specializing",
    "question": "What is the difference in the annual average consumption of the customers with the least amount of consumption paid in CZK for 2013 between SME and LAM, LAM and KAM, and KAM and SME?",
    "evidence": "annual average consumption of customer with the lowest consumption in each segment = total consumption per year / the number of customer with lowest consumption in each segment; Difference in annual average = SME's annual average - LAM's annual average; Difference in annual average = LAM's annual average - KAM's annual average; Year 2013 can be presented as Between 201301 And 201312; The first 4 strings of the Date values in the yearmonth table can represent year.",
    "SQL": "SELECT CAST(SUM(IIF(T1.Segment = 'SME', T2.Consumption, 0)) AS REAL) / COUNT(T1.CustomerID) - CAST(SUM(IIF(T1.Segment = 'LAM', T2.Consumption, 0)) AS REAL) / COUNT(T1.CustomerID) , CAST(SUM(IIF(T1.Segment = 'LAM', T2.Consumption, 0)) AS REAL) / COUNT(T1.CustomerID) - CAST(SUM(IIF(T1.Segment = 'KAM', T2.Consumption, 0)) AS REAL) / COUNT(T1.CustomerID) , CAST(SUM(IIF(T1.Segment = 'KAM', T2.Consumption, 0)) AS REAL) / COUNT(T1.CustomerID) - CAST(SUM(IIF(T1.Segment = 'SME', T2.Consumption, 0)) AS REAL) / COUNT(T1.CustomerID) FROM customers AS T1 INNER JOIN yearmonth AS T2 ON T1.CustomerID = T2.CustomerID WHERE T1.Currency = 'CZK' AND T2.Consumption = ( SELECT MIN(Consumption) FROM yearmonth ) AND T2.Date BETWEEN 201301 AND 201312",
    "difficulty": "challenging"
  },
  {
    "question_id": 1482,
    "db_id": "debit_card_specializing",
    "question": "Which of the three segments—SME, LAM and KAM—has the biggest and lowest percentage increases in consumption paid in EUR between 2012 and 2013?",
    "evidence": "Increase or Decrease = consumption for 2013 - consumption for 2012; Percentage of Increase = (Increase or Decrease / consumption for 2013) * 100%; The first 4 strings of the Date values in the yearmonth table can represent year",
    "SQL": "SELECT CAST((SUM(IIF(T1.Segment = 'SME' AND T2.Date LIKE '2013%', T2.Consumption, 0)) - SUM(IIF(T1.Segment = 'SME' AND T2.Date LIKE '2012%', T2.Consumption, 0))) AS FLOAT) * 100 / SUM(IIF(T1.Segment = 'SME' AND T2.Date LIKE '2012%', T2.Consumption, 0)), CAST(SUM(IIF(T1.Segment = 'LAM' AND T2.Date LIKE '2013%', T2.Consumption, 0)) - SUM(IIF(T1.Segment = 'LAM' AND T2.Date LIKE '2012%', T2.Consumption, 0)) AS FLOAT) * 100 / SUM(IIF(T1.Segment = 'LAM' AND T2.Date LIKE '2012%', T2.Consumption, 0)) , CAST(SUM(IIF(T1.Segment = 'KAM' AND T2.Date LIKE '2013%', T2.Consumption, 0)) - SUM(IIF(T1.Segment = 'KAM' AND T2.Date LIKE '2012%', T2.Consumption, 0)) AS FLOAT) * 100 / SUM(IIF(T1.Segment = 'KAM' AND T2.Date LIKE '2012%', T2.Consumption, 0)) FROM customers AS T1 INNER JOIN yearmonth AS T2 ON T1.CustomerID = T2.CustomerID",
    "difficulty": "challenging"
  },
  {
    "question_id": 1483,
    "db_id": "debit_card_specializing",
    "question": "How much did customer 6 consume in total between August and November 2013?",
    "evidence": "Between August And November 2013 refers to Between 201308 And 201311; The first 4 strings of the Date values in the yearmonth table can represent year; The 5th and 6th string of the date can refer to month.",
    "SQL": "SELECT SUM(Consumption) FROM yearmonth WHERE CustomerID = 6 AND Date BETWEEN '201308' AND '201311'",
    "difficulty": "simple"
  },
  {
    "question_id": 1484,
    "db_id": "debit_card_specializing",
    "question": "How many more \"discount\" gas stations does the Czech Republic have compared to Slovakia?",
    "evidence": "Czech Republic can be represented as the Country value in gasstations table is 'CZE'; Slovakia can be represented as the Country value in the gasstations table is 'SVK';  Computation of more \"discount\" gas stations= Total no. of discount gas stations in Czech Republic - Total no. of discount gas stations in Slovakia",
    "SQL": "SELECT SUM(IIF(Country = 'CZE', 1, 0)) - SUM(IIF(Country = 'SVK', 1, 0)) FROM gasstations WHERE Segment = 'Discount'",
    "difficulty": "simple"
  },
  {
    "question_id": 1486,
    "db_id": "debit_card_specializing",
    "question": "Is it true that more SMEs pay in Czech koruna than in euros? If so, how many more?",
    "evidence": "Amount of more SMEs = Total of SMEs pay using Currency CZK - Total of SMEs pay using Currency EUR",
    "SQL": "SELECT SUM(Currency = 'CZK') - SUM(Currency = 'EUR') FROM customers WHERE Segment = 'SME'",
    "difficulty": "simple"
  },
  {
    "question_id": 1490,
    "db_id": "debit_card_specializing",
    "question": "How many percent of LAM customer consumed more than 46.73?",
    "evidence": "Percentage of LAM customer consumed more than 46.73 = (Total no. of LAM customers who consumed more than 46.73 / Total no. of LAM customers) * 100.",
    "SQL": "SELECT CAST(SUM(IIF(T2.Consumption > 46.73, 1, 0)) AS FLOAT) * 100 / COUNT(T1.CustomerID) FROM customers AS T1 INNER JOIN yearmonth AS T2 ON T1.CustomerID = T2.CustomerID WHERE T1.Segment = 'LAM'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1493,
    "db_id": "debit_card_specializing",
    "question": "In February 2012, what percentage of customers consumed more than 528.3?",
    "evidence": "February 2012 refers to '201202' in yearmonth.date; The first 4 strings of the Date values in the yearmonth table can represent year; The 5th and 6th string of the date can refer to month.",
    "SQL": "SELECT CAST(SUM(IIF(Consumption > 528.3, 1, 0)) AS FLOAT) * 100 / COUNT(CustomerID) FROM yearmonth WHERE Date = '201202'",
    "difficulty": "simple"
  },
  {
    "question_id": 1498,
    "db_id": "debit_card_specializing",
    "question": "What is the highest monthly consumption in the year 2012?",
    "evidence": "The first 4 strings of the Date values in the yearmonth table can represent year; The 5th and 6th string of the date can refer to month.",
    "SQL": "SELECT SUM(Consumption) FROM yearmonth WHERE SUBSTR(Date, 1, 4) = '2012' GROUP BY SUBSTR(Date, 5, 2) ORDER BY SUM(Consumption) DESC LIMIT 1",
    "difficulty": "simple"
  },
  {
    "question_id": 1500,
    "db_id": "debit_card_specializing",
    "question": "Please list the product description of the products consumed in September, 2013.",
    "evidence": "September 2013 refers to 201309; The first 4 strings of the Date values in the yearmonth table can represent year; The 5th and 6th string of the date can refer to month.",
    "SQL": "SELECT T3.Description FROM transactions_1k AS T1 INNER JOIN yearmonth AS T2 ON T1.CustomerID = T2.CustomerID INNER JOIN products AS T3 ON T1.ProductID = T3.ProductID WHERE T2.Date = '201309'",
    "difficulty": "simple"
  },
  {
    "question_id": 1501,
    "db_id": "debit_card_specializing",
    "question": "Please list the countries of the gas stations with transactions taken place in June, 2013.",
    "evidence": "June 2013 refers to '201306'; The first 4 strings of the Date values in the yearmonth table can represent year; The 5th and 6th string of the date can refer to month;",
    "SQL": "SELECT DISTINCT T2.Country FROM transactions_1k AS T1 INNER JOIN gasstations AS T2 ON T1.GasStationID = T2.GasStationID INNER JOIN yearmonth AS T3 ON T1.CustomerID = T3.CustomerID WHERE T3.Date = '201306'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1505,
    "db_id": "debit_card_specializing",
    "question": "Among the customers who paid in euro, how many of them have a monthly consumption of over 1000?",
    "evidence": "Pays in euro = Currency = 'EUR'.",
    "SQL": "SELECT COUNT(*) FROM yearmonth AS T1 INNER JOIN customers AS T2 ON T1.CustomerID = T2.CustomerID WHERE T2.Currency = 'EUR' AND T1.Consumption > 1000.00",
    "difficulty": "simple"
  },
  {
    "question_id": 1506,
    "db_id": "debit_card_specializing",
    "question": "Please list the product descriptions of the transactions taken place in the gas stations in the Czech Republic.",
    "evidence": "Czech Republic can be represented as the Country value in the gasstations table is 'CZE'; ",
    "SQL": "SELECT DISTINCT T3.Description FROM transactions_1k AS T1 INNER JOIN gasstations AS T2 ON T1.GasStationID = T2.GasStationID INNER JOIN products AS T3 ON T1.ProductID = T3.ProductID WHERE T2.Country = 'CZE'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1507,
    "db_id": "debit_card_specializing",
    "question": "Please list the disparate time of the transactions taken place in the gas stations from chain no. 11.",
    "evidence": "",
    "SQL": "SELECT DISTINCT T1.Time FROM transactions_1k AS T1 INNER JOIN gasstations AS T2 ON T1.GasStationID = T2.GasStationID WHERE T2.ChainID = 11",
    "difficulty": "simple"
  },
  {
    "question_id": 1509,
    "db_id": "debit_card_specializing",
    "question": "Among the transactions made in the gas stations in the Czech Republic, how many of them are taken place after 2012/1/1?",
    "evidence": "Czech Republic can be represented as the Country value in the gasstations table is 'CZE'",
    "SQL": "SELECT COUNT(T1.TransactionID) FROM transactions_1k AS T1 INNER JOIN gasstations AS T2 ON T1.GasStationID = T2.GasStationID WHERE T2.Country = 'CZE' AND STRFTIME('%Y', T1.Date) >= '2012'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1514,
    "db_id": "debit_card_specializing",
    "question": "What kind of currency did the customer paid at 16:25:00 in 2012/8/24?",
    "evidence": "'2012/8/24' can be represented by '2012-08-24'; ",
    "SQL": "SELECT DISTINCT T3.Currency FROM transactions_1k AS T1 INNER JOIN gasstations AS T2 ON T1.GasStationID = T2.GasStationID INNER JOIN customers AS T3 ON T1.CustomerID = T3.CustomerID WHERE T1.Date = '2012-08-24' AND T1.Time = '16:25:00'",
    "difficulty": "simple"
  },
  {
    "question_id": 1515,
    "db_id": "debit_card_specializing",
    "question": "What segment did the customer have at 2012/8/23 21:20:00?",
    "evidence": "'2012/8/23' can be represented by '2012-08-23'",
    "SQL": "SELECT T2.Segment FROM transactions_1k AS T1 INNER JOIN customers AS T2 ON T1.CustomerID = T2.CustomerID WHERE T1.date = '2012-08-23' AND T1.time = '21:20:00'",
    "difficulty": "simple"
  },
  {
    "question_id": 1521,
    "db_id": "debit_card_specializing",
    "question": "For all the transactions happened during 8:00-9:00 in 2012/8/26, how many happened in CZE?",
    "evidence": "Czech Republic can be represented as the Country value in the gasstations table is 'CZE'; '2012/8/26' can be represented by '2012-08-26'; during 8:00-9:00 can be represented as Time BETWEEN '08:00:00' AND '09:00:00'",
    "SQL": "SELECT COUNT(T1.TransactionID) FROM transactions_1k AS T1 INNER JOIN gasstations AS T2 ON T1.GasStationID = T2.GasStationID WHERE T1.Date = '2012-08-26' AND T1.Time BETWEEN '08:00:00' AND '09:00:00' AND T2.Country = 'CZE'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1524,
    "db_id": "debit_card_specializing",
    "question": "What's the nationality of the customer who spent 548.4 in 2012/8/24?",
    "evidence": "'2012/8/24' can be represented by '2012-08-24'",
    "SQL": "SELECT T2.Country FROM transactions_1k AS T1 INNER JOIN gasstations AS T2 ON T1.GasStationID = T2.GasStationID WHERE T1.Date = '2012-08-24' AND T1.Price  = 548.4",
    "difficulty": "simple"
  },
  {
    "question_id": 1525,
    "db_id": "debit_card_specializing",
    "question": "What is the percentage of the customers who used EUR in 2012/8/25?",
    "evidence": "'2012/8/25' can be represented by '2012-08-25'",
    "SQL": "SELECT CAST(SUM(IIF(T2.Currency = 'EUR', 1, 0)) AS FLOAT) * 100 / COUNT(T1.CustomerID) FROM transactions_1k AS T1 INNER JOIN customers AS T2 ON T1.CustomerID = T2.CustomerID WHERE T1.Date = '2012-08-25'",
    "difficulty": "simple"
  },
  {
    "question_id": 1526,
    "db_id": "debit_card_specializing",
    "question": "For the customer who paid 634.8 in 2012/8/25, what was the consumption decrease rate from Year 2012 to 2013?",
    "evidence": "'2012/8/24' can be represented by '2012-08-24'; Consumption decrease rate = (consumption_2012 - consumption_2013) / consumption_2012",
    "SQL": "SELECT CAST(SUM(IIF(SUBSTRING(Date, 1, 4) = '2012', Consumption, 0)) - SUM(IIF(SUBSTRING(Date, 1, 4) = '2013', Consumption, 0)) AS FLOAT) / SUM(IIF(SUBSTRING(Date, 1, 4) = '2012', Consumption, 0)) FROM yearmonth WHERE CustomerID = ( SELECT T1.CustomerID FROM transactions_1k AS T1 INNER JOIN gasstations AS T2 ON T1.GasStationID = T2.GasStationID WHERE T1.Date = '2012-08-25' AND T1.Price  = 1513.12 )",
    "difficulty": "challenging"
  },
  {
    "question_id": 1528,
    "db_id": "debit_card_specializing",
    "question": "What is the percentage of \"premium\" against the overall segment in Country = \"SVK\"?",
    "evidence": "",
    "SQL": "SELECT CAST(SUM(IIF(Country = 'SVK' AND Segment = 'Premium', 1, 0)) AS FLOAT) * 100 / SUM(IIF(Country = 'SVK', 1, 0)) FROM gasstations",
    "difficulty": "simple"
  },
  {
    "question_id": 1529,
    "db_id": "debit_card_specializing",
    "question": "What is the amount spent by customer \"38508\" at the gas stations? How much had the customer spent in January 2012?",
    "evidence": "January 2012 refers to the Date value = '201201'",
    "SQL": "SELECT SUM(T1.Price ) , SUM(IIF(T3.Date = '201201', T1.Price, 0)) FROM transactions_1k AS T1 INNER JOIN gasstations AS T2 ON T1.GasStationID = T2.GasStationID INNER JOIN yearmonth AS T3 ON T1.CustomerID = T3.CustomerID WHERE T1.CustomerID = '38508'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1531,
    "db_id": "debit_card_specializing",
    "question": "Who is the top spending customer and how much is the average price per single item purchased by this customer? What currency was being used?",
    "evidence": "average price per single item = Total(price) / Total(amount)",
    "SQL": "SELECT T2.CustomerID, SUM(T2.Price / T2.Amount), T1.Currency FROM customers AS T1 INNER JOIN transactions_1k AS T2 ON T1.CustomerID = T2.CustomerID WHERE T2.CustomerID = ( SELECT CustomerID FROM yearmonth ORDER BY Consumption DESC LIMIT 1 ) GROUP BY T2.CustomerID, T1.Currency",
    "difficulty": "moderate"
  },
  {
    "question_id": 1533,
    "db_id": "debit_card_specializing",
    "question": "For all the people who paid more than 29.00 per unit of product id No.5. Give their consumption status in the August of 2012.",
    "evidence": "August of 2012 refers to the Date value = '201208' ; Price per unit of product = Price / Amount;",
    "SQL": "SELECT T2.Consumption FROM transactions_1k AS T1 INNER JOIN yearmonth AS T2 ON T1.CustomerID = T2.CustomerID WHERE T1.Price / T1.Amount > 29.00 AND T1.ProductID = 5 AND T2.Date = '201208'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1312,
    "db_id": "student_club",
    "question": "What's Angela Sanders's major?",
    "evidence": "Angela Sanders is the full name; full name refers to first_name, last_name; major refers to major_name.",
    "SQL": "SELECT T2.major_name FROM member AS T1 INNER JOIN major AS T2 ON T1.link_to_major = T2.major_id WHERE T1.first_name = 'Angela' AND T1.last_name = 'Sanders'",
    "difficulty": "simple"
  },
  {
    "question_id": 1317,
    "db_id": "student_club",
    "question": "Among the students from the Student_Club who attended the event \"Women's Soccer\", how many of them want a T-shirt that's in medium size?",
    "evidence": "Women's Soccer is an event name; T-shirt that is in medium size refers to t_shirt_size = 'Medium'",
    "SQL": "SELECT COUNT(T1.event_id) FROM event AS T1 INNER JOIN attendance AS T2 ON T1.event_id = T2.link_to_event INNER JOIN member AS T3 ON T2.link_to_member = T3.member_id WHERE T1.event_name = 'Women''s Soccer' AND T3.t_shirt_size = 'Medium'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1322,
    "db_id": "student_club",
    "question": "Among the events attended by more than 10 members of the Student_Club, how many of them are meetings?",
    "evidence": "meetings events refers to type = 'Meeting'; attended by more than 10 members refers to COUNT(event_id) > 10",
    "SQL": "SELECT T1.event_name FROM event AS T1  INNER JOIN attendance AS T2 ON T1.event_id = T2.link_to_event GROUP BY T1.event_id  HAVING COUNT(T2.link_to_event) > 10 EXCEPT SELECT T1.event_name  FROM event AS T1  WHERE T1.type = 'Meeting'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1323,
    "db_id": "student_club",
    "question": "List all the names of events that had an attendance of over 20 students but were not fundraisers.",
    "evidence": "name of events refers to event_name; attendance of over 20 students COUNT(event_id) > 20.",
    "SQL": "SELECT T1.event_name FROM event AS T1 INNER JOIN attendance AS T2 ON T1.event_id = T2.link_to_event GROUP BY T1.event_id HAVING COUNT(T2.link_to_event) > 20 EXCEPT SELECT T1.event_name FROM event AS T1  WHERE T1.type = 'Fundraiser'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1331,
    "db_id": "student_club",
    "question": "What is the amount of the funds that the Vice President received?",
    "evidence": "'Vice President' is a position of Student Club; funds received refers to amount.",
    "SQL": "SELECT T2.amount FROM member AS T1 INNER JOIN income AS T2 ON T1.member_id = T2.link_to_member WHERE T1.position = 'Vice President'",
    "difficulty": "simple"
  },
  {
    "question_id": 1334,
    "db_id": "student_club",
    "question": "List the full name of the Student_Club members that grew up in Illinois state.",
    "evidence": "full name of member refers to first_name, last_name",
    "SQL": "SELECT T1.first_name, T1.last_name FROM member AS T1 INNER JOIN zip_code AS T2 ON T1.zip = T2.zip_code WHERE T2.state = 'Illinois'",
    "difficulty": "simple"
  },
  {
    "question_id": 1338,
    "db_id": "student_club",
    "question": "Was each expense in October Meeting on October 8, 2019 approved?",
    "evidence": "event_name = 'October Meeting' where event_date = '2019-10-08'; approved = True means expenses was approved; approved = False means expenses was not approved",
    "SQL": "SELECT T3.approved FROM event AS T1 INNER JOIN budget AS T2 ON T1.event_id = T2.link_to_event INNER JOIN expense AS T3 ON T2.budget_id = T3.link_to_budget WHERE T1.event_name = 'October Meeting' AND T1.event_date LIKE '2019-10-08%'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1339,
    "db_id": "student_club",
    "question": "Calculate the total average cost that Elijah Allen spent in the events on September and October.",
    "evidence": "Elijah Allen is the full name; full name refers to first_name, last_name; The 5th and 6th string of the expense_date in the expense table can refer to month; events in September and October refers to month(expense_date) = 9 OR month(expense_date) = 10",
    "SQL": "SELECT AVG(T2.cost) FROM member AS T1 INNER JOIN expense AS T2 ON T1.member_id = T2.link_to_member WHERE T1.last_name = 'Allen' AND T1.first_name = 'Elijah' AND (SUBSTR(T2.expense_date, 6, 2) = '09' OR SUBSTR(T2.expense_date, 6, 2) = '10')",
    "difficulty": "challenging"
  },
  {
    "question_id": 1340,
    "db_id": "student_club",
    "question": "Calculate the difference of the total amount spent in all events by the Student_Club in year 2019 and 2020.",
    "evidence": "The first 4 strings of the event_date values in the event table can represent year; The difference of the total amount spent = SUBTRACT(spent where YEAR(event_date) = 2019, spent where YEAR(event_date) = 2020)",
    "SQL": "SELECT SUM(CASE WHEN SUBSTR(T1.event_date, 1, 4) = '2019' THEN T2.spent ELSE 0 END) - SUM(CASE WHEN SUBSTR(T1.event_date, 1, 4) = '2020' THEN T2.spent ELSE 0 END) AS num FROM event AS T1 INNER JOIN budget AS T2 ON T1.event_id = T2.link_to_event",
    "difficulty": "moderate"
  },
  {
    "question_id": 1344,
    "db_id": "student_club",
    "question": "What was the notes of the fundraising on 2019/9/14?",
    "evidence": "fundraising on 2019/9/14 refers to source = 'Fundraising' where date_received = '2019-09-14'",
    "SQL": "SELECT notes FROM income WHERE source = 'Fundraising' AND date_received = '2019-09-14'",
    "difficulty": "simple"
  },
  {
    "question_id": 1346,
    "db_id": "student_club",
    "question": "Tell the phone number of \"Carlo Jacobs\".",
    "evidence": "Carlo Jacobs is the full name; full name refers to first_name, last_name;",
    "SQL": "SELECT phone FROM member WHERE first_name = 'Carlo' AND last_name = 'Jacobs'",
    "difficulty": "simple"
  },
  {
    "question_id": 1350,
    "db_id": "student_club",
    "question": "What is the status of the event which bought \"Post Cards, Posters\" on 2019/8/20?",
    "evidence": "'Post Cards, Posters' is an expense description; on 2019/8/20 refers to expense_date = '2019-8-20'; status of event refers to event_status",
    "SQL": "SELECT T1.event_status FROM budget AS T1 INNER JOIN expense AS T2 ON T1.budget_id = T2.link_to_budget WHERE T2.expense_description = 'Post Cards, Posters' AND T2.expense_date = '2019-08-20'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1351,
    "db_id": "student_club",
    "question": "What was Brent Thomason's major?",
    "evidence": "Brent Thomason is the full name; full name refers to first_name, last_name; major refers to major_name",
    "SQL": "SELECT T2.major_name FROM member AS T1 INNER JOIN major AS T2 ON T1.link_to_major = T2.major_id WHERE T1.first_name = 'Brent' AND T1.last_name = 'Thomason'",
    "difficulty": "simple"
  },
  {
    "question_id": 1352,
    "db_id": "student_club",
    "question": "For all the club members from \"Business\" major, how many of them wear medium size t-shirt?",
    "evidence": "'Business' is a major name; wear medium size t-shirt refers to t_shirt_size = 'Medium'",
    "SQL": "SELECT COUNT(T1.member_id) FROM member AS T1 INNER JOIN major AS T2 ON T1.link_to_major = T2.major_id WHERE T2.major_name = 'Business' AND T1.t_shirt_size = 'Medium'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1356,
    "db_id": "student_club",
    "question": "Which department was the President of the club in?",
    "evidence": "'President' is a position of Student Club",
    "SQL": "SELECT T2.department FROM member AS T1 INNER JOIN major AS T2 ON T1.link_to_major = T2.major_id WHERE T1.position = 'President'",
    "difficulty": "simple"
  },
  {
    "question_id": 1357,
    "db_id": "student_club",
    "question": "State the date Connor Hilton paid his/her dues.",
    "evidence": "Connor Hilton is the full name; full name refers to first_name, last_name; date the dues was paid refers to date_received where source = 'Dues';",
    "SQL": "SELECT T2.date_received FROM member AS T1 INNER JOIN income AS T2 ON T1.member_id = T2.link_to_member WHERE T1.first_name = 'Connor' AND T1.last_name = 'Hilton' AND T2.source = 'Dues'",
    "difficulty": "simple"
  },
  {
    "question_id": 1359,
    "db_id": "student_club",
    "question": "How many times was the budget in Advertisement for \"Yearly Kickoff\" meeting more than \"October Meeting\"?",
    "evidence": "budget in Advertisement refer to category = 'Advertisement' in the budget table; DIVIDE(SUM(amount when event_name = 'Yearly Kickoff'), SUM(amount when event_name = 'October Meeting'))",
    "SQL": "SELECT CAST(SUM(CASE WHEN T2.event_name = 'Yearly Kickoff' THEN T1.amount ELSE 0 END) AS REAL) / SUM(CASE WHEN T2.event_name = 'October Meeting' THEN T1.amount ELSE 0 END) FROM budget AS T1 INNER JOIN event AS T2 ON T1.link_to_event = T2.event_id WHERE T1.category = 'Advertisement' AND T2.type = 'Meeting'",
    "difficulty": "challenging"
  },
  {
    "question_id": 1361,
    "db_id": "student_club",
    "question": "What is the total cost of the pizzas for all the events?",
    "evidence": "total cost of the pizzas refers to SUM(cost) where expense_description = 'Pizza'",
    "SQL": "SELECT SUM(cost) FROM expense WHERE expense_description = 'Pizza'",
    "difficulty": "simple"
  },
  {
    "question_id": 1362,
    "db_id": "student_club",
    "question": "How many cities are there in Orange County, Virginia?",
    "evidence": "Orange County is the county name, Virginia is the state name",
    "SQL": "SELECT COUNT(city) FROM zip_code WHERE county = 'Orange County' AND state = 'Virginia'",
    "difficulty": "simple"
  },
  {
    "question_id": 1368,
    "db_id": "student_club",
    "question": "What does the person with the phone number \"809-555-3360\" major in?",
    "evidence": "major in refers to major_name",
    "SQL": "SELECT T2.major_name FROM member AS T1 INNER JOIN major AS T2 ON T1.link_to_major = T2.major_id WHERE T1.phone = '809-555-3360'",
    "difficulty": "simple"
  },
  {
    "question_id": 1371,
    "db_id": "student_club",
    "question": "How many members attended the \"Women's Soccer\" event?",
    "evidence": "'Women's Soccer' is the event name;",
    "SQL": "SELECT COUNT(T2.link_to_member) FROM event AS T1 INNER JOIN attendance AS T2 ON T1.event_id = T2.link_to_event WHERE T1.event_name = 'Women''s Soccer'",
    "difficulty": "simple"
  },
  {
    "question_id": 1375,
    "db_id": "student_club",
    "question": "List all the members of the \"School of Applied Sciences, Technology and Education\" department.",
    "evidence": "list all members means to list all the full name; full name refers to first_name, last_name;",
    "SQL": "SELECT T1.first_name, T1.last_name FROM member AS T1 INNER JOIN major AS T2 ON T1.link_to_major = T2.major_id WHERE T2.department = 'School of Applied Sciences, Technology and Education'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1376,
    "db_id": "student_club",
    "question": "Among all the closed events, which event has the highest spend-to-budget ratio?",
    "evidence": "closed events refers to event_name where status = 'Closed'; highest spend-to budget ratio refers to MAX(DIVIDE(spent, amount))",
    "SQL": "SELECT T2.event_name FROM budget AS T1 INNER JOIN event AS T2 ON T1.link_to_event = T2.event_id WHERE T2.status = 'Closed' ORDER BY T1.spent / T1.amount DESC LIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 1378,
    "db_id": "student_club",
    "question": "What is the highest amount of budget spend for an event?",
    "evidence": "highest amount of budget spend refers to MAX(spent)",
    "SQL": "SELECT MAX(spent) FROM budget",
    "difficulty": "simple"
  },
  {
    "question_id": 1380,
    "db_id": "student_club",
    "question": "What is the total amount of money spent for food?",
    "evidence": "total amount of money spent refers to SUM(spent); spent for food refers to category = 'Food'",
    "SQL": "SELECT SUM(spent) FROM budget WHERE category = 'Food'",
    "difficulty": "simple"
  },
  {
    "question_id": 1381,
    "db_id": "student_club",
    "question": "List the name of students that have attended more than 7 events.",
    "evidence": "name of students means the full name; full name refers to first_name, last_name; attended more than 7 events refers to COUNT(link_to_event) > 7",
    "SQL": "SELECT T1.first_name, T1.last_name FROM member AS T1 INNER JOIN attendance AS T2 ON T1.member_id = T2.link_to_member GROUP BY T2.link_to_member HAVING COUNT(T2.link_to_event) > 7",
    "difficulty": "moderate"
  },
  {
    "question_id": 1387,
    "db_id": "student_club",
    "question": "Which student has been entrusted to manage the budget for the Yearly Kickoff?",
    "evidence": "name of students means the full name; full name refers to first_name, last_name;'Yearly Kickoff' is an event name;",
    "SQL": "SELECT T4.first_name, T4.last_name FROM event AS T1 INNER JOIN budget AS T2 ON T1.event_id = T2.link_to_event INNER JOIN expense AS T3 ON T2.budget_id = T3.link_to_budget INNER JOIN member AS T4 ON T3.link_to_member = T4.member_id WHERE T1.event_name = 'Yearly Kickoff'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1389,
    "db_id": "student_club",
    "question": "Which event has the lowest cost?",
    "evidence": "event refers to event_name; lowest cost means MIN(cost)",
    "SQL": "SELECT T1.event_name FROM event AS T1 INNER JOIN budget AS T2 ON T1.event_id = T2.link_to_event INNER JOIN expense AS T3 ON T2.budget_id = T3.link_to_budget ORDER BY T3.cost LIMIT 1",
    "difficulty": "simple"
  },
  {
    "question_id": 1390,
    "db_id": "student_club",
    "question": "Based on the total cost for all event, what is the percentage of cost for Yearly Kickoff event?",
    "evidence": "percentage = DIVIDE(SUM(cost where event_name = 'Yearly Kickoff'), SUM(cost)) * 100",
    "SQL": "SELECT CAST(SUM(CASE WHEN T1.event_name = 'Yearly Kickoff' THEN T3.cost ELSE 0 END) AS REAL) * 100 / SUM(T3.cost) FROM event AS T1 INNER JOIN budget AS T2 ON T1.event_id = T2.link_to_event INNER JOIN expense AS T3 ON T2.budget_id = T3.link_to_budget",
    "difficulty": "moderate"
  },
  {
    "question_id": 1392,
    "db_id": "student_club",
    "question": "Indicate the top source of funds received in September 2019 based on their amount.",
    "evidence": "top source funds refers to MAX(source); September 2019 means date_received BETWEEN '2019-09-01' and '2019-09-30'",
    "SQL": "SELECT source FROM income WHERE date_received BETWEEN '2019-09-01' and '2019-09-30' ORDER BY source DESC LIMIT 1",
    "difficulty": "simple"
  },
  {
    "question_id": 1394,
    "db_id": "student_club",
    "question": "How many members of the Student_Club have major in 'Physics Teaching'?",
    "evidence": "'Physics Teaching' is the major_name;",
    "SQL": "SELECT COUNT(T2.member_id) FROM major AS T1 INNER JOIN member AS T2 ON T1.major_id = T2.link_to_major WHERE T1.major_name = 'Physics Teaching'",
    "difficulty": "simple"
  },
  {
    "question_id": 1398,
    "db_id": "student_club",
    "question": "Name the event with the highest amount spent on advertisement.",
    "evidence": "Name of event refers to event_name; highest amount spent on advertisement refers to MAX(spent) where category = 'Advertisement'",
    "SQL": "SELECT T2.event_name FROM budget AS T1 INNER JOIN event AS T2 ON T1.link_to_event = T2.event_id WHERE T1.category = 'Advertisement' ORDER BY T1.spent DESC LIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 1399,
    "db_id": "student_club",
    "question": "Did Maya Mclean attend the 'Women's Soccer' event?",
    "evidence": "Maya Mclean is the full name; full name refers to first_name, last_name; 'Women's Soccer' is an event_name",
    "SQL": "SELECT CASE WHEN T3.event_name = 'Women''s Soccer' THEN 'YES' END AS result FROM member AS T1 INNER JOIN attendance AS T2 ON T1.member_id = T2.link_to_member INNER JOIN event AS T3 ON T2.link_to_event = T3.event_id WHERE T1.first_name = 'Maya' AND T1.last_name = 'Mclean'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1401,
    "db_id": "student_club",
    "question": "Indicate the cost of posters for 'September Speaker' event.",
    "evidence": "'Posters' is the expense description; 'September Speaker' is an event name",
    "SQL": "SELECT T3.cost FROM event AS T1 INNER JOIN budget AS T2 ON T1.event_id = T2.link_to_event INNER JOIN expense AS T3 ON T2.budget_id = T3.link_to_budget WHERE T1.event_name = 'September Speaker' AND T3.expense_description = 'Posters'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1403,
    "db_id": "student_club",
    "question": "Indicate the name of the closed event whose cost has exceeded the budget the most.",
    "evidence": "closed events refers to event_name where status = 'Closed'; exceed the budget the most refers to MIN(remaining) where remaining < 0",
    "SQL": "SELECT T2.event_name FROM budget AS T1 INNER JOIN event AS T2 ON T2.event_id = T1.link_to_event WHERE T1.event_status = 'Closed' AND T1.remaining < 0 ORDER BY T1.remaining LIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 1404,
    "db_id": "student_club",
    "question": "Identify the type of expenses and their total value approved for 'October Meeting' event.",
    "evidence": "total value refers to SUM(cost); 'October Meeting' is an event name;",
    "SQL": "SELECT T1.type, SUM(T3.cost) FROM event AS T1 INNER JOIN budget AS T2 ON T1.event_id = T2.link_to_event INNER JOIN expense AS T3 ON T2.budget_id = T3.link_to_budget WHERE T1.event_name = 'October Meeting'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1405,
    "db_id": "student_club",
    "question": "Calculate the amount budgeted for 'April Speaker' event. List all the budgeted categories for said event in an ascending order based on their amount budgeted.",
    "evidence": "'April Speaker' is an event name; amount budgeted refers to SUM(amount); budget categories refers to category",
    "SQL": "SELECT T2.category, SUM(T2.amount) FROM event AS T1 JOIN budget AS T2 ON T1.event_id = T2.link_to_event WHERE T1.event_name = 'April Speaker' GROUP BY T2.category ORDER BY SUM(T2.amount) ASC",
    "difficulty": "moderate"
  },
  {
    "question_id": 1409,
    "db_id": "student_club",
    "question": "Mention the total expense used on 8/20/2019.",
    "evidence": "total expense refers SUM(cost) where expense_date = '2019-08-20'",
    "SQL": "SELECT SUM(cost) FROM expense WHERE expense_date = '2019-08-20'",
    "difficulty": "simple"
  },
  {
    "question_id": 1410,
    "db_id": "student_club",
    "question": "List out the full name and total cost that member id \"rec4BLdZHS2Blfp4v\" incurred?",
    "evidence": "full name refers to first_name, last name",
    "SQL": "SELECT T1.first_name, T1.last_name, SUM(T2.cost) FROM member AS T1 INNER JOIN expense AS T2 ON T1.member_id = T2.link_to_member WHERE T1.member_id = 'rec4BLdZHS2Blfp4v'",
    "difficulty": "simple"
  },
  {
    "question_id": 1411,
    "db_id": "student_club",
    "question": "State what kind of expenses that Sacha Harrison incurred?",
    "evidence": "kind of expenses refers to expense_description; Sacha Harrison is the full name; full name refers to first_name, last_name;",
    "SQL": "SELECT T2.expense_description FROM member AS T1 INNER JOIN expense AS T2 ON T1.member_id = T2.link_to_member WHERE T1.first_name = 'Sacha' AND T1.last_name = 'Harrison'",
    "difficulty": "simple"
  },
  {
    "question_id": 1422,
    "db_id": "student_club",
    "question": "State the category of events were held at MU 215.",
    "evidence": "'MU 215' is the location of event; ",
    "SQL": "SELECT DISTINCT T2.category FROM event AS T1 INNER JOIN budget AS T2 ON T1.event_id = T2.link_to_event WHERE T1.location = 'MU 215'",
    "difficulty": "simple"
  },
  {
    "question_id": 1426,
    "db_id": "student_club",
    "question": "List the last name of members with a major in environmental engineering and include its department and college name.",
    "evidence": "'Environmental Engineering' is the major_name;",
    "SQL": "SELECT T2.last_name, T1.department, T1.college FROM major AS T1 INNER JOIN member AS T2 ON T1.major_id = T2.link_to_major WHERE T2.position = 'Member' AND T1.major_name = 'Environmental Engineering'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1427,
    "db_id": "student_club",
    "question": "What are the budget category of the events located at MU 215 and a guest speaker type with a 0 budget spent?",
    "evidence": "budget category refers to category; events located at refers to location; type = 'Guest Speaker'; 0 budget spent refers to spent = 0; ",
    "SQL": "SELECT DISTINCT T2.category, T1.type FROM event AS T1 INNER JOIN budget AS T2 ON T1.event_id = T2.link_to_event WHERE T1.location = 'MU 215' AND T2.spent = 0 AND T1.type = 'Guest Speaker'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1432,
    "db_id": "student_club",
    "question": "Among the members with t-shirt size of medium, what is the percentage of the amount 50 received by the Student_Club?",
    "evidence": "t_shirt_size = 'Medium' where position = 'Member'; percentage = DIVIDE(COUNT(amount = 50), COUNT(member_id)) * 100",
    "SQL": "SELECT CAST(SUM(CASE WHEN T2.amount = 50 THEN 1.0 ELSE 0 END) AS REAL) * 100 / COUNT(T2.income_id) FROM member AS T1 INNER JOIN income AS T2 ON T1.member_id = T2.link_to_member WHERE T1.position = 'Member' AND T1.t_shirt_size = 'Medium'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1435,
    "db_id": "student_club",
    "question": "List the names of closed event as \"game\" that was closed from 3/15/2019 to 3/20/2020.",
    "evidence": "name of events refers event_name; game event that was closed refers to type = 'Game' where status = 'Closed'; event_date BETWEEN '2019-03-15' and '2020-03-20'; ",
    "SQL": "SELECT DISTINCT event_name FROM event WHERE type = 'Game' AND date(SUBSTR(event_date, 1, 10)) BETWEEN '2019-03-15' AND '2020-03-20' AND status = 'Closed'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1457,
    "db_id": "student_club",
    "question": "Give the full name and contact number of members who had to spend more than average on each expense.",
    "evidence": "full name refers to first_name, last_name; contact number refers to phone; had spent more than average on each expense refers to cost > AVG(cost)",
    "SQL": "SELECT DISTINCT T3.first_name, T3.last_name, T3.phone FROM expense AS T1 INNER JOIN budget AS T2 ON T1.link_to_budget = T2.budget_id INNER JOIN member AS T3 ON T3.member_id = T1.link_to_member WHERE T1.cost > ( SELECT AVG(T1.cost) FROM expense AS T1 INNER JOIN budget AS T2 ON T1.link_to_budget = T2.budget_id INNER JOIN member AS T3 ON T3.member_id = T1.link_to_member )",
    "difficulty": "challenging"
  },
  {
    "question_id": 1460,
    "db_id": "student_club",
    "question": "Write the full name of the member who spent money for water, veggie tray and supplies and include the cost of it.",
    "evidence": "full name refers to first_name, last name; spent money for refers expense description; expense_description = 'Water, Veggie tray, supplies'",
    "SQL": "SELECT T2.first_name, T2.last_name, T1.cost FROM expense AS T1 INNER JOIN member AS T2 ON T1.link_to_member = T2.member_id WHERE T1.expense_description = 'Water, Veggie tray, supplies'",
    "difficulty": "challenging"
  },
  {
    "question_id": 1464,
    "db_id": "student_club",
    "question": "Write the full names of students who received funds on the date of 9/9/2019 and include the amount received.",
    "evidence": "full name refers to first_name, last_name, amount of funds received refers to amount, received funds on date refers to date_received",
    "SQL": "SELECT DISTINCT T3.first_name, T3.last_name, T4.amount FROM event AS T1 INNER JOIN attendance AS T2 ON T1.event_id = T2.link_to_event INNER JOIN member AS T3 ON T3.member_id = T2.link_to_member INNER JOIN income AS T4 ON T4.link_to_member = T3.member_id WHERE T4.date_received = '2019-09-09'",
    "difficulty": "challenging"
  },
  {
    "question_id": 1149,
    "db_id": "thrombosis_prediction",
    "question": "Are there more in-patient or outpatient who were male? What is the deviation in percentage?",
    "evidence": "male refers to SEX = 'M'; in-patient refers to Admission = '+'; outpatient refers to Admission = '-'; percentage = DIVIDE(COUNT(ID) where SEX = 'M' and Admission = '+', COUNT(ID) where SEX  = 'M' and Admission = '-')",
    "SQL": "SELECT CAST(SUM(CASE WHEN Admission = '+' THEN 1 ELSE 0 END) AS REAL) * 100 / SUM(CASE WHEN Admission = '-' THEN 1 ELSE 0 END) FROM Patient WHERE SEX = 'M'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1150,
    "db_id": "thrombosis_prediction",
    "question": "What is the percentage of female patient were born after 1930?",
    "evidence": "female refers to Sex = 'F'; patient who were born after 1930 refers to year(Birthday) > '1930'; calculation = DIVIDE(COUNT(ID) where year(Birthday) > '1930' and SEX = 'F'), (COUNT(ID) where SEX = 'F')",
    "SQL": "SELECT CAST(SUM(CASE WHEN STRFTIME('%Y', Birthday) > '1930' THEN 1 ELSE 0 END) AS REAL) * 100 / COUNT(*) FROM Patient WHERE SEX = 'F'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1152,
    "db_id": "thrombosis_prediction",
    "question": "What is the ratio of outpatient to inpatient followed up treatment among all the 'SLE' diagnosed patient?",
    "evidence": "'SLE' diagnosed patient means Diagnosis = 'SLE'; inpatient refers to Admission = '+'; outpatient refers to Admission = '-'; calculation =  DIVIDE(COUNT(ID) where Diagnosis = 'SLE' and Admission = '+', COUNT(ID) where Diagnosis = 'SLE' and Admission = '-')",
    "SQL": "SELECT SUM(CASE WHEN Admission = '+' THEN 1.0 ELSE 0 END) / SUM(CASE WHEN Admission = '-' THEN 1 ELSE 0 END) FROM Patient WHERE Diagnosis = 'SLE'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1153,
    "db_id": "thrombosis_prediction",
    "question": "What is the disease patient '30609' diagnosed with. List all the date of laboratory tests done for this patient.",
    "evidence": "'30609' is the Patient ID; disease means Diagnosis",
    "SQL": "SELECT T1.Diagnosis, T2.Date FROM Patient AS T1 INNER JOIN Laboratory AS T2 ON T1.ID = T2.ID WHERE T1.ID = 30609",
    "difficulty": "simple"
  },
  {
    "question_id": 1155,
    "db_id": "thrombosis_prediction",
    "question": "List the patient ID, sex and birthday of patient with LDH beyond normal range.",
    "evidence": "LDH beyond normal range refers to LDH > '500';",
    "SQL": "SELECT DISTINCT T1.ID, T1.SEX, T1.Birthday FROM Patient AS T1 INNER JOIN Laboratory AS T2 ON T1.ID = T2.ID WHERE T2.LDH > 500",
    "difficulty": "simple"
  },
  {
    "question_id": 1156,
    "db_id": "thrombosis_prediction",
    "question": "State the ID and age of patient with positive degree of coagulation.",
    "evidence": "age refers to SUBTRACT(year(current_timestamp), year(Birthday)); positive degree of coagulation refers to RVVT = '+';",
    "SQL": "SELECT DISTINCT T1.ID, STRFTIME('%Y', CURRENT_TIMESTAMP) - STRFTIME('%Y', T1.Birthday) FROM Patient AS T1 INNER JOIN Examination AS T2 ON T1.ID = T2.ID WHERE T2.RVVT = '+'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1157,
    "db_id": "thrombosis_prediction",
    "question": "For patients with severe degree of thrombosis, list their ID, sex and disease the patient is diagnosed with.",
    "evidence": "severe degree of thrombosis refers to thrombosis = 2; disease refers to diagnosis;",
    "SQL": "SELECT DISTINCT T1.ID, T1.SEX, T1.Diagnosis FROM Patient AS T1 INNER JOIN Examination AS T2 ON T1.ID = T2.ID WHERE T2.Thrombosis = 2",
    "difficulty": "simple"
  },
  {
    "question_id": 1162,
    "db_id": "thrombosis_prediction",
    "question": "How many female patients who came at the hospital in 1997 was immediately followed at the outpatient clinic?",
    "evidence": "female refers to sex = 'F'; came at the hospital in 1997 refers to year(Description) = '1997'; immediately followed at the outpatient clinic refers to Admission = '-'",
    "SQL": "SELECT COUNT(*) FROM Patient WHERE STRFTIME('%Y', Description) = '1997' AND SEX = 'F' AND Admission = '-'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1164,
    "db_id": "thrombosis_prediction",
    "question": "How many of the patients with the most serious thrombosis cases examined in 1997 are women?",
    "evidence": "the most serious thrombosis refers to Thrombosis = '1' (the most severe one); women refers to sex = 'F'",
    "SQL": "SELECT  COUNT(*) FROM Patient AS T1 INNER JOIN Examination AS T2 ON T1.ID = T2.ID WHERE T1.SEX = 'F' AND STRFTIME('%Y', T2.`Examination Date`) = '1997' AND T2.Thrombosis = 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 1166,
    "db_id": "thrombosis_prediction",
    "question": "What are the symptoms observed by the youngest patient to ever did a medical examination? Identify their diagnosis.",
    "evidence": "The larger the birthday value, the younger the person is, and vice versa; symptoms observed refers to the symptoms is not NULL",
    "SQL": "SELECT T2.Symptoms, T1.Diagnosis FROM Patient AS T1 INNER JOIN Examination AS T2 ON T1.ID = T2.ID WHERE T2.Symptoms IS NOT NULL ORDER BY T1.Birthday DESC LIMIT 1",
    "difficulty": "simple"
  },
  {
    "question_id": 1168,
    "db_id": "thrombosis_prediction",
    "question": "The oldest SJS patient's medical laboratory work was completed on what date, and what age was the patient when they initially arrived at the hospital?",
    "evidence": "The larger the birthday value, the younger the person is, and vice versa; 'SJS' refers to diagnosis; (SUBTRACT(year(`First Date`)), year(Birthday)); age of the patients when they initially arrived at the hospital refers to year(Birthday)",
    "SQL": "SELECT T1.Date, STRFTIME('%Y', T2.`First Date`) - STRFTIME('%Y', T2.Birthday),T2.Birthday FROM Laboratory AS T1 INNER JOIN Patient AS T2 ON T1.ID = T2.ID WHERE T2.Diagnosis = 'SJS' AND T2.Birthday IS NOT NULL ORDER BY T2.Birthday ASC LIMIT 1",
    "difficulty": "challenging"
  },
  {
    "question_id": 1169,
    "db_id": "thrombosis_prediction",
    "question": "What is the ratio of male to female patients among all those with abnormal uric acid counts?",
    "evidence": "male refers to SEX = 'M'; female refers to SEX = 'F'; abnormal uric acid refers to UA < = '8.0' where SEX = 'M', UA < = '6.5' where SEX = 'F'; calculation = DIVIDE(SUM(UA <= '8.0' and SEX = 'M'), SUM(UA <= '6.5 and SEX = 'F'))",
    "SQL": "SELECT CAST(SUM(CASE WHEN T2.UA <= 8.0 AND T1.SEX = 'M' THEN 1 ELSE 0 END) AS REAL) / SUM(CASE WHEN T2.UA <= 6.5 AND T1.SEX = 'F' THEN 1 ELSE 0 END) FROM Patient AS T1 INNER JOIN Laboratory AS T2 ON T1.ID = T2.ID",
    "difficulty": "challenging"
  },
  {
    "question_id": 1171,
    "db_id": "thrombosis_prediction",
    "question": "How many underage patients were examined during the course of the three-year period from 1990 to 1993?",
    "evidence": "underage patients refers to year(Birthday) < 18; three-year period from 1990 to 1993 refers to year(`Examination Date`) between '1990' and '1993'",
    "SQL": "SELECT COUNT(T1.ID) FROM Patient AS T1 INNER JOIN Examination AS T2 ON T1.ID = T2.ID WHERE STRFTIME('%Y', T2.`Examination Date`) BETWEEN '1990' AND '1993' AND STRFTIME('%Y', T2.`Examination Date`) - STRFTIME('%Y', T1.Birthday) < 18",
    "difficulty": "challenging"
  },
  {
    "question_id": 1175,
    "db_id": "thrombosis_prediction",
    "question": "How old was the patient who had the highest hemoglobin count at the time of the examination, and what is the doctor's diagnosis?",
    "evidence": "How old the patient refers to SUBTRACT(year(`Examination Date`), year(Birthday)); the highest hemoglobin count refers to MAX(HGB)",
    "SQL": "SELECT STRFTIME('%Y', T2.Date) - STRFTIME('%Y', T1.Birthday), T1.Diagnosis FROM Patient AS T1 INNER JOIN Laboratory AS T2 ON T1.ID = T2.ID ORDER BY T2.HGB DESC LIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 1179,
    "db_id": "thrombosis_prediction",
    "question": "For the patient who was diagnosed with SLE on 1994/2/19, what was his/her anti-Cardiolipin antibody concentration status on 1993/11/12?",
    "evidence": "diagnosed with SLE refers to Diagnosis = 'SLE'; 1994/2/19 refers to Description = '1994-02-19'; anti-Cardiolipin refers to aCL IgM; 1993/11/12 refers to Examination Date = '1993/11/12'",
    "SQL": "SELECT `aCL IgA`, `aCL IgG`, `aCL IgM` FROM Examination WHERE ID IN ( SELECT ID FROM Patient WHERE Diagnosis = 'SLE' AND Description = '1994-02-19' ) AND `Examination Date` = '1993-11-12'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1185,
    "db_id": "thrombosis_prediction",
    "question": "For the patient who was born on 1959/2/18, what is the decrease rate for his/her total cholesterol from November to December in 1981?",
    "evidence": "born on 1959/2/18 refers to Birthday = '1959-02-18'; calculation = DIVISION(SUBTRACT(SUM(Birthday = '1959-02-18' and Date like '1981-11-%' THEN `T-CHO`), SUM(Birthday = '1959-02-18' and Date like '1981-12-%' THEN `T-CHO`)), SUM(Birthday = '1959-02-18' and Date like '1981-12-%' THEN `T-CHO`))",
    "SQL": "SELECT CAST((SUM(CASE WHEN T2.Date LIKE '1981-11-%' THEN T2.`T-CHO` ELSE 0 END) - SUM(CASE WHEN T2.Date LIKE '1981-12-%' THEN T2.`T-CHO` ELSE 0 END)) AS REAL) / SUM(CASE WHEN T2.Date LIKE '1981-12-%' THEN T2.`T-CHO` ELSE 0 END) FROM Patient AS T1 INNER JOIN Laboratory AS T2 ON T1.ID = T2.ID WHERE T1.Birthday = '1959-02-18'",
    "difficulty": "challenging"
  },
  {
    "question_id": 1187,
    "db_id": "thrombosis_prediction",
    "question": "How many patients who were examined between 1987/7/6 and 1996/1/31 had a GPT level greater than 30 and an ALB level less than 4? List them by their ID.",
    "evidence": "examined between 1987/7/6 and 1996/1/31 refers to Date BETWEEN '1987-07-06' AND '1996-01-31'; GPT level greater than 30 refers to GPT > 30; ALB level less than 4 ALB < 4",
    "SQL": "SELECT DISTINCT ID FROM Laboratory WHERE Date BETWEEN '1987-07-06' AND '1996-01-31' AND GPT > 30 AND ALB < 4",
    "difficulty": "moderate"
  },
  {
    "question_id": 1189,
    "db_id": "thrombosis_prediction",
    "question": "What number of patients with a degree of thrombosis level 2 and ANA pattern of only S, have a level of anti-Cardiolip in antibody (IgM) 20% higher than average?",
    "evidence": "thrombosis level 2 refers to Thrombosis = 2; ANA pattern of only S refers to ANA = 'S'; average anti-Cardiolip in antibody (IgM) refers to AVG(`aCL IgM`); calculation = MULTIPLY(AVG + AVG, 0.2)",
    "SQL": "SELECT COUNT(*) FROM Examination WHERE Thrombosis = 2 AND `ANA Pattern` = 'S' AND `aCL IgM` > (SELECT AVG(`aCL IgM`) * 1.2 FROM Examination WHERE Thrombosis = 2 AND `ANA Pattern` = 'S')",
    "difficulty": "challenging"
  },
  {
    "question_id": 1192,
    "db_id": "thrombosis_prediction",
    "question": "List all patients who were followed up at the outpatient clinic who underwent a laboratory test in October 1991 and had a total blood bilirubin level within the normal range.",
    "evidence": "followed up at the outpatient clinic refers to Admission = '-'; laboratory test in April 1981 refers to Date like '1991-10%'; blood bilirubin level within the normal range refers to T-BIL < 2.0; ",
    "SQL": "SELECT DISTINCT T1.ID FROM Patient AS T1 INNER JOIN Laboratory AS T2 ON T1.ID = T2.ID WHERE T1.Admission = '-' AND T2.`T-BIL` < 2.0 AND T2.Date LIKE '1991-10-%'",
    "difficulty": "challenging"
  },
  {
    "question_id": 1195,
    "db_id": "thrombosis_prediction",
    "question": "What is the average blood albumin level for female patients with a PLT greater than 400 who have been diagnosed with SLE?",
    "evidence": "average blood albumin level refers to AVG(ALB); female refers to SEX = 'F'; PLT greater than 400 refers to PLT > 400; diagnosed with SLE refers to Diagnosis= 'SLE'",
    "SQL": "SELECT AVG(T2.ALB) FROM Patient AS T1 INNER JOIN Laboratory AS T2 ON T1.ID = T2.ID WHERE T2.PLT > 400 AND T1.Diagnosis = 'SLE' AND T1.SEX = 'F'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1198,
    "db_id": "thrombosis_prediction",
    "question": "How many female patients were given an APS diagnosis?",
    "evidence": "female refers to SEX = 'F'; APS diagnosis refers to Diagnosis='APS'",
    "SQL": "SELECT COUNT(ID) FROM Patient WHERE SEX = 'F' AND Diagnosis = 'APS'",
    "difficulty": "simple"
  },
  {
    "question_id": 1201,
    "db_id": "thrombosis_prediction",
    "question": "What percentage of patients who were born in 1980 and were diagnosed with RA are women?",
    "evidence": "born in 1980 refers to YEAR(BIRTHDAY) = '1980'; 'RA' refers to Diagnosis='RA' ; women refers to SEX = 'F'; calculation = DIVIDE(SUM(SEX = 'F'), COUNT(SEX)) * 100",
    "SQL": "SELECT CAST(SUM(CASE WHEN SEX = 'F' THEN 1 ELSE 0 END) AS REAL) * 100 / COUNT(ID) FROM Patient WHERE Diagnosis = 'RA' AND STRFTIME('%Y', Birthday) = '1980'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1205,
    "db_id": "thrombosis_prediction",
    "question": "Was the patient with the number 57266's uric acid within a normal range?",
    "evidence": "uric acid within a normal range refers to UA > 8.0 and SEX = 'M'OR UA > 6.5 and SEX = 'F'",
    "SQL": "SELECT CASE WHEN (T1.SEX = 'F' AND T2.UA > 6.5) OR (T1.SEX = 'M' AND T2.UA > 8.0) THEN true ELSE false END FROM Patient AS T1 INNER JOIN Laboratory AS T2 ON T1.ID = T2.ID WHERE T1.ID = 57266",
    "difficulty": "moderate"
  },
  {
    "question_id": 1208,
    "db_id": "thrombosis_prediction",
    "question": "Provide IDs for male patients with ALT glutamic pylvic transaminase (GPT) that have history of ALT glutamic pylvic transaminase (GPT) exceed the normal range.",
    "evidence": "male refers to SEX = 'M'; ALT glutamic pylvic transaminase (GPT) exceed the normal range refers to GPT > = 60",
    "SQL": "SELECT DISTINCT T1.ID FROM Patient AS T1 INNER JOIN Laboratory AS T2 ON T1.ID = T2.ID WHERE T1.SEX = 'M' AND T2.GPT >= 60",
    "difficulty": "moderate"
  },
  {
    "question_id": 1209,
    "db_id": "thrombosis_prediction",
    "question": "Please provide the diagnosis of patients with ALT glutamic pylvic transaminase beyond the normal range by ascending order of their date of birth.",
    "evidence": "ALT glutamic pylvic transaminase beyond the normal range refers to GPT > 60; The larger the birthday value, the younger the person is, and vice versa; ",
    "SQL": "SELECT DISTINCT T1.Diagnosis FROM Patient AS T1 INNER JOIN Laboratory AS T2 ON T1.ID = T2.ID WHERE T2.GPT > 60 ORDER BY T1.Birthday ASC",
    "difficulty": "moderate"
  },
  {
    "question_id": 1220,
    "db_id": "thrombosis_prediction",
    "question": "Provide all ID, sex and birthday of patients whose urea nitrogen (UN) just within the borderline of passing?",
    "evidence": "urea nitrogen (UN) just within the borderline of passing refers to UN = 29; ",
    "SQL": "SELECT DISTINCT T1.ID, T1.SEX, T1.Birthday FROM Patient AS T1 INNER JOIN Laboratory AS T2 ON T1.ID = T2.ID WHERE T2.UN = 29",
    "difficulty": "simple"
  },
  {
    "question_id": 1225,
    "db_id": "thrombosis_prediction",
    "question": "List and group all patients by sex for total bilirubin (T-BIL) level not within the normal range.",
    "evidence": "List refers to GROUP_CONCAT(DISTINCT ID); total bilirubin (T-BIL) not within normal range refers to T-BIL > = 2.0",
    "SQL": "SELECT T1.ID,T1.SEX FROM Patient AS T1 INNER JOIN Laboratory AS T2 ON T1.ID = T2.ID WHERE T2.`T-BIL` >= 2.0 GROUP BY T1.SEX,T1.ID",
    "difficulty": "moderate"
  },
  {
    "question_id": 1227,
    "db_id": "thrombosis_prediction",
    "question": "What is the average age of the male patient with high cholesterol?",
    "evidence": "average age = DIVIDE(SUM(SUBTRACT(YEAR(NOW()), YEAR(birthday))), COUNT(ID)); male patient refers to sex = 'M'; high cholesterol refers to `T-CHO` > = 250;",
    "SQL": "SELECT AVG(STRFTIME('%Y', date('NOW')) - STRFTIME('%Y', T1.Birthday)) FROM Patient AS T1 INNER JOIN Laboratory AS T2 ON T1.ID = T2.ID WHERE T2.`T-CHO` >= 250 AND T1.SEX = 'M'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1229,
    "db_id": "thrombosis_prediction",
    "question": "For all patients with triglyceride (TG) level beyond the normal range, how many are age more than 50 years?",
    "evidence": "triglyceride (TG) level beyond the normal range refers to TG > = 200; more than 50 years of age = SUBTRACT(year(current_timestamp), year(Birthday)) > 50; Should consider DISTINCT in the final result;",
    "SQL": "SELECT COUNT(DISTINCT T1.ID) FROM Patient AS T1 INNER JOIN Laboratory AS T2 ON T1.ID = T2.ID WHERE T2.TG >= 200 AND STRFTIME('%Y', CURRENT_TIMESTAMP) - STRFTIME('%Y', T1.Birthday) > 50",
    "difficulty": "moderate"
  },
  {
    "question_id": 1231,
    "db_id": "thrombosis_prediction",
    "question": "For patient born between 1936-1956, how many male patients have creatinine phosphokinase beyond the normal range?",
    "evidence": "born between 1936-1956 refers to year(Birthday) BETWEEN '1936' AND '1956'; male patients refers to sex = 'M'; creatinine phosphokinase beyond the normal range refers to CPK > = 250; Should consider DISTINCT in the final result;",
    "SQL": "SELECT COUNT(DISTINCT T1.ID) FROM Patient AS T1 INNER JOIN Laboratory AS T2 ON T1.ID = T2.ID WHERE STRFTIME('%Y', T1.Birthday) BETWEEN '1936' AND '1956' AND T1.SEX = 'M' AND T2.CPK >= 250",
    "difficulty": "challenging"
  },
  {
    "question_id": 1232,
    "db_id": "thrombosis_prediction",
    "question": "Provide ID, sex and age of patient who has blood glucose (GLU) not within normal range but with total cholesterol(T-CHO) within normal range.",
    "evidence": "age = SUBTRACT(year(current_timestamp), year(Birthday)); blood glucose (GLU) not within normal range refers to GLU > = 180; total cholesterol(T-CHO) within normal range refers to `T-CHO` < 250; ",
    "SQL": "SELECT DISTINCT T1.ID, T1.SEX , STRFTIME('%Y', CURRENT_TIMESTAMP) - STRFTIME('%Y', T1.Birthday) FROM Patient AS T1 INNER JOIN Laboratory AS T2 ON T1.ID = T2.ID WHERE T2.GLU >= 180 AND T2.`T-CHO` < 250",
    "difficulty": "challenging"
  },
  {
    "question_id": 1235,
    "db_id": "thrombosis_prediction",
    "question": "What are the patient's diagnosis for those who has lower red blood blood cell? State their ID and age.",
    "evidence": "patient's diagnosis refers to Diagnosis; lower red blood cell refers to RBC < 3.5; age = SUBTRACT(year(current_timestamp), year(Birthday)); ",
    "SQL": "SELECT DISTINCT T1.Diagnosis, T1.ID , STRFTIME('%Y', CURRENT_TIMESTAMP) -STRFTIME('%Y', T1.Birthday) FROM Patient AS T1 INNER JOIN Laboratory AS T2 ON T1.ID = T2.ID WHERE T2.RBC < 3.5",
    "difficulty": "moderate"
  },
  {
    "question_id": 1238,
    "db_id": "thrombosis_prediction",
    "question": "Among the patients who were diagnosed with SLE, who is the oldest with normal hemoglobin level. Provide the ID and sex.",
    "evidence": "diagnosed with SLE refers to Diagnosis = 'SLE'; The larger the birthday value, the younger the person is, and vice versa; normal hemoglobin level refers to 10 < HGB < 17;",
    "SQL": "SELECT T1.ID, T1.SEX FROM Patient AS T1 INNER JOIN Laboratory AS T2 ON T1.ID = T2.ID WHERE T1.Diagnosis = 'SLE' AND T2.HGB > 10 AND T2.HGB < 17 ORDER BY T1.Birthday ASC LIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 1239,
    "db_id": "thrombosis_prediction",
    "question": "Name the ID and age of patient with two or more laboratory examinations which show their hematoclit level exceeded the normal range.",
    "evidence": "age = SUBTRACT(year(current_timestamp), year(Birthday)); patient with two or more laboratory examinations refers to COUNT(ID) > 2; hematoclit level exceeded the normal range refers to HCT > = 52;",
    "SQL": "SELECT DISTINCT T1.ID, STRFTIME('%Y', CURRENT_TIMESTAMP) - STRFTIME('%Y', T1.Birthday) FROM Patient AS T1 INNER JOIN Laboratory AS T2 ON T1.ID = T2.ID WHERE T1.ID IN ( SELECT ID FROM Laboratory WHERE HCT >= 52 GROUP BY ID HAVING COUNT(ID) >= 2 )",
    "difficulty": "challenging"
  },
  {
    "question_id": 1241,
    "db_id": "thrombosis_prediction",
    "question": "For patients with abnormal platelet level, state the number of patients with lower than normal range. How is it compare to the number of patients with higher than normal range?",
    "evidence": "abnormal platelet level refers to PLT <= 100 or PLT >= 400; platelet level lower than normal range refers to PLT < 100; calculation = SUBTRACT(SUM(PLT < 100), SUM(PLT > 400)); platelet level higher than normal range refers to PLT > 400;",
    "SQL": "SELECT SUM(CASE WHEN T2.PLT <= 100 THEN 1 ELSE 0 END) - SUM(CASE WHEN T2.PLT >= 400 THEN 1 ELSE 0 END) FROM Patient AS T1 INNER JOIN Laboratory AS T2 ON T1.ID = T2.ID",
    "difficulty": "challenging"
  },
  {
    "question_id": 1242,
    "db_id": "thrombosis_prediction",
    "question": "For laboratory examinations take in 1984, list all patients below 50 years old with normal platelet level.",
    "evidence": "laboratory examinations take in 1984 refers to YEAR(Date) = '1984'; below 50 years old = SUBTRACT(year(current_timestamp), year(Birthday)) < 50; normal platelet level refers to PLT between 100 and 400; ",
    "SQL": "SELECT DISTINCT T1.ID FROM Patient AS T1 INNER JOIN Laboratory AS T2 ON T1.ID = T2.ID WHERE T2.PLT BETWEEN 100 AND 400 AND STRFTIME('%Y', T2.Date) - STRFTIME('%Y', T1.Birthday) < 50 AND STRFTIME('%Y', T2.Date) = '1984'",
    "difficulty": "challenging"
  },
  {
    "question_id": 1243,
    "db_id": "thrombosis_prediction",
    "question": "For all patients who are older than 55 years old, what is the percentage of female who has abnormal prothrombin time (PT)?",
    "evidence": "older than 55 years old = SUBTRACT(year(current_timestamp), year(Birthday)) > 55; abnormal prothrombin time (PT) refers to PT > = 14; percentage = DIVIDE(SUM(PT > = 14 AND SEX = 'F'), SUM(PT > = 14)) * 100; female refers to sex = 'F'; ",
    "SQL": "SELECT CAST(SUM(CASE WHEN T2.PT >= 14 AND T1.SEX = 'F' THEN 1 ELSE 0 END) AS REAL) * 100 / COUNT(CASE WHEN T2.PT >= 14 THEN 1 ELSE 0 END) FROM Patient AS T1 INNER JOIN Laboratory AS T2 ON T1.ID = T2.ID WHERE STRFTIME('%Y', CURRENT_TIMESTAMP) - STRFTIME('%Y', T1.Birthday) > 55",
    "difficulty": "challenging"
  },
  {
    "question_id": 1247,
    "db_id": "thrombosis_prediction",
    "question": "Among the male patients who have a normal level of white blood cells, how many of them have an abnormal fibrinogen level?",
    "evidence": "male patients refers to Sex = 'M'; normal level of white blood cells refers to WBC > 3.5 and WBC <9.0; abnormal fibrinogen level refers to FG < = 150 or FG > = 450; Don't compute repetitive ones.",
    "SQL": "SELECT COUNT(DISTINCT T1.ID) FROM Patient AS T1 INNER JOIN Laboratory AS T2 ON T1.ID = T2.ID WHERE T2.FG <= 150 OR T2.FG >= 450 AND T2.WBC > 3.5 AND T2.WBC < 9.0 AND T1.SEX = 'M'",
    "difficulty": "challenging"
  },
  {
    "question_id": 1251,
    "db_id": "thrombosis_prediction",
    "question": "How many patients with an Ig G higher than normal?",
    "evidence": "Ig G higher than normal refers to IGG >= 2000; Should consider DISTINCT in the final result;",
    "SQL": "SELECT COUNT(DISTINCT T1.ID) FROM Patient AS T1 INNER JOIN Laboratory AS T2 ON T1.ID = T2.ID INNER JOIN Examination AS T3 ON T3.ID = T2.ID WHERE T2.IGG >= 2000",
    "difficulty": "simple"
  },
  {
    "question_id": 1252,
    "db_id": "thrombosis_prediction",
    "question": "Among the patients with a normal Ig G level, how many of them have symptoms?",
    "evidence": "normal Ig G level refers to IGG > 900 and IGG < 2000; have symptoms refers to Symptoms IS NOT NULL;",
    "SQL": "SELECT COUNT(T1.ID) FROM Patient AS T1 INNER JOIN Laboratory AS T2 ON T1.ID = T2.ID INNER JOIN Examination AS T3 ON T3.ID = T2.ID WHERE T2.IGG BETWEEN 900 AND 2000 AND T3.Symptoms IS NOT NULL",
    "difficulty": "moderate"
  },
  {
    "question_id": 1254,
    "db_id": "thrombosis_prediction",
    "question": "How many patients with a normal Ig A level came to the hospital after 1990/1/1?",
    "evidence": "normal Ig A level refers to IGA > 80 AND IGA < 500; came to the hospital after 1990/1/1 refers to YEAR(`First Date`) > = 1990;",
    "SQL": "SELECT COUNT(T1.ID) FROM Patient AS T1 INNER JOIN Laboratory AS T2 ON T1.ID = T2.ID WHERE T2.IGA BETWEEN 80 AND 500 AND  strftime('%Y',  T1.`First Date`) > '1990'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1255,
    "db_id": "thrombosis_prediction",
    "question": "For the patients with an abnormal Ig M level, what is the most common disease they are diagnosed with?",
    "evidence": "abnormal Ig M level refers to IGM <=40 OR IGM >= 400; most common disease refers to MAX(COUNT(Diagnosis));",
    "SQL": "SELECT T1.Diagnosis FROM Patient AS T1 INNER JOIN Laboratory AS T2 ON T1.ID = T2.ID WHERE T2.IGM NOT BETWEEN 40 AND 400 GROUP BY T1.Diagnosis ORDER BY COUNT(T1.Diagnosis) DESC LIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 1256,
    "db_id": "thrombosis_prediction",
    "question": "How many patients with a abnormal C-reactive protein don't have their data recorded?",
    "evidence": "abnormal C-reactive protein refers to CRP ='+'; don't have data recorded refers to Description IS NULL;",
    "SQL": "SELECT COUNT(T1.ID) FROM Patient AS T1 INNER JOIN Laboratory AS T2 ON T1.ID = T2.ID WHERE (T2.CRP = '+' ) AND T1.Description IS NULL",
    "difficulty": "moderate"
  },
  {
    "question_id": 1257,
    "db_id": "thrombosis_prediction",
    "question": "Among the patients whose creatinine level is abnormal, how many of them aren't 70 yet?",
    "evidence": "creatinine level is abnormal refers to CRE >= 1.5; aren't 70 yet refers to SUBTRACT((YEAR(CURDATE()), YEAR(Birthday))) < 70; ",
    "SQL": "SELECT COUNT(DISTINCT T1.ID) FROM Patient AS T1 INNER JOIN Laboratory AS T2 ON T1.ID = T2.ID WHERE T2.CRE >= 1.5 AND STRFTIME('%Y', Date('now')) - STRFTIME('%Y', T1.Birthday) < 70",
    "difficulty": "challenging"
  },
  {
    "question_id": 1265,
    "db_id": "thrombosis_prediction",
    "question": "How many patients have a normal level of anti-ribonuclear protein and have been admitted to the hospital?",
    "evidence": "normal level of anti-ribonuclear protein refers to RNP = '-', '+-'; And'-' means 'negative'; '+-' refers to '0'; admitted to the hospital refers to Admission = '+'; Should consider DISTINCT in the final result;",
    "SQL": "SELECT COUNT(DISTINCT T1.ID) FROM Patient AS T1 INNER JOIN Laboratory AS T2 ON T1.ID = T2.ID WHERE T2.RNP = 'negative' OR T2.RNP = '0' AND T1.Admission = '+'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1267,
    "db_id": "thrombosis_prediction",
    "question": "Among the patients with normal anti-SM, how many of them does not have thrombosis?",
    "evidence": "normal anti-SM refers to SM IN('-', '+-'); SM = 'negative' means '-'; SM = '0' means '+-'; SM = '1' means '+'; does not have thrombosis refers to Thrombosis = 0;",
    "SQL": "SELECT COUNT(T1.ID) FROM Examination AS T1 INNER JOIN Laboratory AS T2 ON T1.ID = T2.ID WHERE T2.SM IN ('negative','0') AND T1.Thrombosis = 0",
    "difficulty": "moderate"
  },
  {
    "question_id": 1270,
    "db_id": "thrombosis_prediction",
    "question": "Among the patients who has a normal anti-scl70, how many of them are female and does not have any symptom?",
    "evidence": "normal anti-scl70 refers to SC170 IN('negative', '0'); female refers to Sex = 'F'; does not have any symptom refers to symptoms IS NULL; Should consider DISTINCT in the final result;",
    "SQL": "SELECT COUNT(DISTINCT T1.ID) FROM Patient AS T1 INNER JOIN Laboratory AS T2 ON T1.ID = T2.ID INNER JOIN Examination AS T3 ON T3.ID = T2.ID WHERE (T2.SC170 = 'negative' OR T2.SC170 = '0') AND T1.SEX = 'F' AND T3.Symptoms IS NULL",
    "difficulty": "challenging"
  },
  {
    "question_id": 1275,
    "db_id": "thrombosis_prediction",
    "question": "Among the patients who has a normal level of anti-centromere and a normal level of anti-SSB, how many of them are male?",
    "evidence": "normal level of anti-centromere refers to CENTROMEA IN('-', '+-'); normal level of anti-SSB refers to SSB IN('-', '+-'); male refers to Sex = 'M'; Should consider DISTINCT in the final result;",
    "SQL": "SELECT COUNT(DISTINCT T1.ID) FROM Patient AS T1 INNER JOIN Laboratory AS T2 ON T1.ID = T2.ID WHERE T2.CENTROMEA IN ('negative', '0') AND T2.SSB IN ('negative', '0') AND T1.SEX = 'M'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1281,
    "db_id": "thrombosis_prediction",
    "question": "Among the patients who have an abnormal level of glutamic oxaloacetic transaminase, when was the youngest of them born?",
    "evidence": "abnormal level of glutamic oxaloacetic transaminase refers to GOT > = 60; The larger the birthday value, the younger the person is, and vice versa;",
    "SQL": "SELECT T1.Birthday FROM Patient AS T1 INNER JOIN Laboratory AS T2 ON T1.ID = T2.ID WHERE T2.GOT >= 60 ORDER BY T1.Birthday DESC LIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 1302,
    "db_id": "thrombosis_prediction",
    "question": "For the patients with a normal range of creatinine phosphokinase, how many of them have a positive measure of degree of coagulation?",
    "evidence": "normal range of creatinine phosphokinase refers to CPK < 250; positive measure of degree of coagulation refers to KCT = '+' or RVVT = '+' or LAC = '+' ;",
    "SQL": "SELECT COUNT(T1.ID) FROM Patient AS T1 INNER JOIN Laboratory AS T2 ON T1.ID = T2.ID INNER JOIN Examination AS T3 ON T1.ID = T3.ID WHERE T2.CPK < 250 AND (T3.KCT = '+' OR T3.RVVT = '+' OR T3.LAC = '+')",
    "difficulty": "challenging"
  },
  {
    "question_id": 1025,
    "db_id": "european_football_2",
    "question": "Give the name of the league had the most goals in the 2016 season?",
    "evidence": "league that had the most goals refers to MAX(SUM(home_team_goal, away_team_goal)); 2016 season refers to season = '2015/2016';",
    "SQL": "SELECT t2.name FROM Match AS t1 INNER JOIN League AS t2 ON t1.league_id = t2.id WHERE t1.season = '2015/2016' GROUP BY t2.name ORDER BY SUM(t1.home_team_goal + t1.away_team_goal) DESC LIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 1028,
    "db_id": "european_football_2",
    "question": "In Scotland Premier League, which away team won the most during the 2010 season?",
    "evidence": "Final result should return the Team.team_long_name; Scotland Premier League refers to League.name = 'Scotland Premier League'; away team refers to away_team_api_id; away team that won the most refers to MAX(SUBTRACT(away_team_goal, home_team_goal) > 0); 2010 season refers to season = '2009/2010'; won the most refers to MAX(COUNT(*));",
    "SQL": "SELECT teamInfo.team_long_name FROM League AS leagueData INNER JOIN Match AS matchData ON leagueData.id = matchData.league_id INNER JOIN Team AS teamInfo ON matchData.away_team_api_id = teamInfo.team_api_id WHERE leagueData.name = 'Scotland Premier League' AND matchData.season = '2009/2010' AND matchData.away_team_goal - matchData.home_team_goal > 0 GROUP BY matchData.away_team_api_id ORDER BY COUNT(*) DESC LIMIT 1",
    "difficulty": "challenging"
  },
  {
    "question_id": 1029,
    "db_id": "european_football_2",
    "question": "What are the speed in which attacks are put together of the top 4 teams with the highest build Up Play Speed?",
    "evidence": "speed in which attacks are put together refers to buildUpPlaySpeed;highest build up play speed refers to MAX(buildUpPlaySpeed)",
    "SQL": "SELECT t1.buildUpPlaySpeed FROM Team_Attributes AS t1 INNER JOIN Team AS t2 ON t1.team_api_id = t2.team_api_id ORDER BY t1.buildUpPlaySpeed ASC LIMIT 4",
    "difficulty": "moderate"
  },
  {
    "question_id": 1030,
    "db_id": "european_football_2",
    "question": "Give the name of the league had the most matches end as draw in the 2016 season?",
    "evidence": "most matches end as draw refers to MAX(SUM(home_team_goal = away_team_goal)); 2016 season refers to season = '2015/2016';",
    "SQL": "SELECT t2.name FROM Match AS t1 INNER JOIN League AS t2 ON t1.league_id = t2.id WHERE t1.season = '2015/2016' AND t1.home_team_goal = t1.away_team_goal GROUP BY t2.name ORDER BY COUNT(t1.id) DESC LIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 1031,
    "db_id": "european_football_2",
    "question": "At present, calculate for the player's age who have a sprint speed of no less than 97 between 2013 to 2015.",
    "evidence": "players age at present = SUBTRACT((DATETIME(), birthday)); sprint speed of no less than 97 refers to sprint_speed > = 97; between 2013 to 2015 refers to YEAR(date) > = '2013' AND YEAR(date) < = '2015'; ",
    "SQL": "SELECT DISTINCT DATETIME() - T2.birthday age FROM Player_Attributes AS t1 INNER JOIN Player AS t2 ON t1.player_api_id = t2.player_api_id WHERE STRFTIME('%Y',t1.`date`) >= '2013' AND STRFTIME('%Y',t1.`date`) <= '2015' AND t1.sprint_speed >= 97",
    "difficulty": "challenging"
  },
  {
    "question_id": 1032,
    "db_id": "european_football_2",
    "question": "Give the name of the league with the highest matches of all time and how many matches were played in the said league.",
    "evidence": " league with highest matches of all time refers to MAX(COUNT(league_id));",
    "SQL": "SELECT t2.name, t1.max_count FROM League AS t2 JOIN (SELECT league_id, MAX(cnt) AS max_count FROM (SELECT league_id, COUNT(id) AS cnt FROM Match GROUP BY league_id) AS subquery) AS t1 ON t1.league_id = t2.id",
    "difficulty": "moderate"
  },
  {
    "question_id": 1035,
    "db_id": "european_football_2",
    "question": "Give the team_fifa_api_id of teams with more than 50 but less than 60 build-up play speed.",
    "evidence": "teams with more than 50 but less than 60 build-up play speed refers to buildUpPlaySpeed >50 AND buildUpPlaySpeed <60; ",
    "SQL": "SELECT DISTINCT team_fifa_api_id FROM Team_Attributes WHERE buildUpPlaySpeed > 50 AND buildUpPlaySpeed < 60",
    "difficulty": "simple"
  },
  {
    "question_id": 1036,
    "db_id": "european_football_2",
    "question": "List the long name of teams with above-average build-up play passing in 2012.",
    "evidence": "long name of teams refers to team_long_name; build-up play passing refers to buildUpPlayPassing; above-average build-up play passing = buildUpPlayPassing > DIVIDE(SUM(buildUpPlayPassing), COUNT(team_long_name) WHERE buildUpPlayPassing IS NOT NULL); in 2012 refers to strftime('%Y', date) = '2012'; ",
    "SQL": "SELECT DISTINCT t4.team_long_name FROM Team_Attributes AS t3 INNER JOIN Team AS t4 ON t3.team_api_id = t4.team_api_id WHERE SUBSTR(t3.`date`, 1, 4) = '2012' AND t3.buildUpPlayPassing > ( SELECT CAST(SUM(t2.buildUpPlayPassing) AS REAL) / COUNT(t1.id) FROM Team AS t1 INNER JOIN Team_Attributes AS t2 ON t1.team_api_id = t2.team_api_id WHERE STRFTIME('%Y',t2.`date`) = '2012')",
    "difficulty": "challenging"
  },
  {
    "question_id": 1037,
    "db_id": "european_football_2",
    "question": "Calculate the percentage of players who prefer left foot, who were born between 1987 and 1992.",
    "evidence": "players who prefer left foot refers to preferred_foot = 'left'; percentage of players who prefer left foot = DIVIDE(MULTIPLY((SUM(preferred_foot = 'left'), 100)), COUNT(player_fifa_api_id)); born between 1987 and 1992 refers to YEAR(birthday) BETWEEN '1987' AND '1992';",
    "SQL": "SELECT CAST(COUNT(CASE WHEN t2.preferred_foot = 'left' THEN t1.id ELSE NULL END) AS REAL) * 100 / COUNT(t1.id) percent FROM Player AS t1 INNER JOIN Player_Attributes AS t2 ON t1.player_api_id = t2.player_api_id WHERE SUBSTR(t1.birthday, 1, 4) BETWEEN '1987' AND '1992'",
    "difficulty": "challenging"
  },
  {
    "question_id": 1039,
    "db_id": "european_football_2",
    "question": "Find the average number of long-shot done by Ahmed Samir Farag.",
    "evidence": "average number of long shot = DIVIDE(SUM(long_shots), COUNT(player_fifa_api_id));",
    "SQL": "SELECT CAST(SUM(t2.long_shots) AS REAL) / COUNT(t2.`date`) FROM Player AS t1 INNER JOIN Player_Attributes AS t2 ON t1.player_api_id = t2.player_api_id WHERE t1.player_name = 'Ahmed Samir Farag'",
    "difficulty": "simple"
  },
  {
    "question_id": 1040,
    "db_id": "european_football_2",
    "question": "List the top 10 players' names whose heights are above 180 in descending order of average heading accuracy.",
    "evidence": "heights are above 180 refers to Player.height > 180; average heading accuracy = DIVIDE(SUM(heading_accuracy), COUNT(player_fifa_api_id));",
    "SQL": "SELECT t1.player_name FROM Player AS t1 INNER JOIN Player_Attributes AS t2 ON t1.player_api_id = t2.player_api_id WHERE t1.height > 180 GROUP BY t1.id ORDER BY CAST(SUM(t2.heading_accuracy) AS REAL) / COUNT(t2.`player_fifa_api_id`) DESC LIMIT 10",
    "difficulty": "moderate"
  },
  {
    "question_id": 1042,
    "db_id": "european_football_2",
    "question": "List the name of leagues in which the average goals by the home team is higher than the away team in the 2009/2010 season.",
    "evidence": "name of league refers to League.name; average goals by the home team is higher than the away team = AVG(home_team_goal) > AVG(away_team_goal); AVG(xx_goal) = SUM(xx_goal) / COUNT(DISTINCT Match.id); 2009/2010 season refers to season = '2009/2010'",
    "SQL": "SELECT t1.name FROM League AS t1 INNER JOIN Match AS t2 ON t1.id = t2.league_id WHERE t2.season = '2009/2010' GROUP BY t1.name HAVING (CAST(SUM(t2.home_team_goal) AS REAL) / COUNT(DISTINCT t2.id)) - (CAST(SUM(t2.away_team_goal) AS REAL) / COUNT(DISTINCT t2.id)) > 0",
    "difficulty": "challenging"
  },
  {
    "question_id": 1044,
    "db_id": "european_football_2",
    "question": "List the football players with a birthyear of 1970 and a birthmonth of October.",
    "evidence": "players with a birthyear of 1970 and a birthmonth of October refers to substr(birthday,1,7) AS 'year-month',WHERE year = '1970' AND month = '10';",
    "SQL": "SELECT player_name FROM Player WHERE SUBSTR(birthday, 1, 7) = '1970-10'",
    "difficulty": "simple"
  },
  {
    "question_id": 1048,
    "db_id": "european_football_2",
    "question": "What is the overall rating of the football player Gabriel Tamas in year 2011?",
    "evidence": "in year 2011 refers to strftime('%Y', date) = '2011';",
    "SQL": "SELECT t2.overall_rating FROM Player AS t1 INNER JOIN Player_Attributes AS t2 ON t1.player_api_id = t2.player_api_id WHERE t1.player_name = 'Gabriel Tamas' AND strftime('%Y', t2.date) = '2011'",
    "difficulty": "simple"
  },
  {
    "question_id": 1057,
    "db_id": "european_football_2",
    "question": "Calculate the average home team goal in the 2010/2011 season in the country of Poland.",
    "evidence": "average home team goal = AVG(home_team_goal)= SUM(home_team_goal) / COUNT(DISTINCT Match.id) WHERE name = 'Poland' and season = '2010/2011';",
    "SQL": "SELECT CAST(SUM(t2.home_team_goal) AS REAL) / COUNT(t2.id) FROM Country AS t1 INNER JOIN Match AS t2 ON t1.id = t2.country_id WHERE t1.name = 'Poland' AND t2.season = '2010/2011'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1058,
    "db_id": "european_football_2",
    "question": "Who has the highest average finishing rate between the highest and shortest football player?",
    "evidence": "finishing rate refers to finishing; highest average finishing rate = MAX(AVG(finishing)); highest football player refers to MAX(height); shortest football player refers to MIN(height);",
    "SQL": "SELECT A FROM ( SELECT AVG(finishing) result, 'Max' A FROM Player AS T1 INNER JOIN Player_Attributes AS T2 ON T1.player_api_id = T2.player_api_id WHERE T1.height = ( SELECT MAX(height) FROM Player ) UNION SELECT AVG(finishing) result, 'Min' A FROM Player AS T1 INNER JOIN Player_Attributes AS T2 ON T1.player_api_id = T2.player_api_id WHERE T1.height = ( SELECT MIN(height) FROM Player ) ) ORDER BY result DESC LIMIT 1",
    "difficulty": "challenging"
  },
  {
    "question_id": 1068,
    "db_id": "european_football_2",
    "question": "From 2010 to 2015, what was the average overall rating of players who are higher than 170?",
    "evidence": "from 2010 to 2015 refers to strftime('%Y', date) >= '2010' AND <= '2015'; average overall rating = SUM(t2.overall_rating)/ COUNT(t2.id); higher than 170 refers to Player.height > 170;",
    "SQL": "SELECT CAST(SUM(t2.overall_rating) AS REAL) / COUNT(t2.id) FROM Player AS t1 INNER JOIN Player_Attributes AS t2 ON t1.player_api_id = t2.player_api_id WHERE t1.height > 170 AND STRFTIME('%Y',t2.`date`) >= '2010' AND STRFTIME('%Y',t2.`date`) <= '2015'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1076,
    "db_id": "european_football_2",
    "question": "What is the difference of the average ball control score between Abdou Diallo and Aaron Appindangoye\n?",
    "evidence": "difference of the average ball control = SUBTRACT(AVG(ball_control WHERE player_name = 'Abdou Diallo'), AVG(ball_control WHERE player_name = 'Aaron Appindangoye')); AVG(ball_control WHERE player_name = 'XX XX') = SUM(CASE WHEN player_name = 'XX XX' THEN ball_control ELSE 0 END) / COUNT(CASE WHEN player_name = 'XX XX' THEN id ELSE NULL END)",
    "SQL": "SELECT CAST(SUM(CASE WHEN t1.player_name = 'Abdou Diallo' THEN t2.ball_control ELSE 0 END) AS REAL) / COUNT(CASE WHEN t1.player_name = 'Abdou Diallo' THEN t2.id ELSE NULL END) - CAST(SUM(CASE WHEN t1.player_name = 'Aaron Appindangoye' THEN t2.ball_control ELSE 0 END) AS REAL) / COUNT(CASE WHEN t1.player_name = 'Aaron Appindangoye' THEN t2.id ELSE NULL END) FROM Player AS t1 INNER JOIN Player_Attributes AS t2 ON t1.player_api_id = t2.player_api_id",
    "difficulty": "challenging"
  },
  {
    "question_id": 1078,
    "db_id": "european_football_2",
    "question": "Which player is older, Aaron Lennon or Abdelaziz Barrada?",
    "evidence": "The larger the birthday value, the younger the person is, and vice versa;",
    "SQL": "SELECT player_name FROM Player WHERE player_name IN ('Aaron Lennon', 'Abdelaziz Barrada') ORDER BY birthday ASC LIMIT 1",
    "difficulty": "simple"
  },
  {
    "question_id": 1079,
    "db_id": "european_football_2",
    "question": "Which player is the tallest?",
    "evidence": "tallest player refers to MAX(height);",
    "SQL": "SELECT player_name FROM Player ORDER BY height DESC LIMIT 1",
    "difficulty": "simple"
  },
  {
    "question_id": 1080,
    "db_id": "european_football_2",
    "question": "Among the players whose preferred foot was the left foot when attacking, how many of them would remain in his position when the team attacked?",
    "evidence": "preferred foot when attacking was the left refers to preferred_foot = 'left'; players who would remain in his position when the team attacked refers to attacking_work_rate = 'low';",
    "SQL": "SELECT COUNT(player_api_id) FROM Player_Attributes WHERE preferred_foot = 'left' AND attacking_work_rate = 'low'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1084,
    "db_id": "european_football_2",
    "question": "Among the players born before the year 1986, how many of them would remain in his position and defense while the team attacked?",
    "evidence": "players born before the year 1986 refers to strftime('%Y', birthday)<'1986'; players who would remain in his position and defense while the team attacked refers to defensive_work_rate = 'high'; Should consider DISTINCT in the final result;",
    "SQL": "SELECT COUNT(DISTINCT t1.player_name) FROM Player AS t1 INNER JOIN Player_Attributes AS t2 ON t1.player_api_id = t2.player_api_id WHERE STRFTIME('%Y',t1.birthday) < '1986' AND t2.defensive_work_rate = 'high'",
    "difficulty": "challenging"
  },
  {
    "question_id": 1088,
    "db_id": "european_football_2",
    "question": "Please list the names of the players whose volley score and dribbling score are over 70.",
    "evidence": "volley score are over 70 refers to volleys > 70; dribbling score refers to dribbling are over 70 refers to dribbling > 70;",
    "SQL": "SELECT DISTINCT t1.player_name FROM Player AS t1 INNER JOIN Player_Attributes AS t2 ON t1.player_api_id = t2.player_api_id WHERE t2.volleys > 70 AND t2.dribbling > 70",
    "difficulty": "moderate"
  },
  {
    "question_id": 1091,
    "db_id": "european_football_2",
    "question": "How many matches were held in the Belgium Jupiler League in April, 2009?",
    "evidence": "Belgium Jupiler League refers to League.name = 'Belgium Jupiler League'; in April, 2009 refers to SUBSTR(`date`, 1, 7);",
    "SQL": "SELECT COUNT(t2.id) FROM League AS t1 INNER JOIN Match AS t2 ON t1.id = t2.league_id WHERE t1.name = 'Belgium Jupiler League' AND SUBSTR(t2.`date`, 1, 7) = '2009-04'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1092,
    "db_id": "european_football_2",
    "question": "Give the name of the league had the most matches in the 2008/2009 season?",
    "evidence": "league that had the most matches in the 2008/2009 season refers to MAX(league_name WHERE season = '2008/2009');",
    "SQL": "SELECT t1.name FROM League AS t1 JOIN Match AS t2 ON t1.id = t2.league_id WHERE t2.season = '2008/2009' GROUP BY t1.name HAVING COUNT(t2.id) = (SELECT MAX(match_count) FROM (SELECT COUNT(t2.id) AS match_count FROM Match AS t2 WHERE t2.season = '2008/2009' GROUP BY t2.league_id))",
    "difficulty": "simple"
  },
  {
    "question_id": 1094,
    "db_id": "european_football_2",
    "question": "How much higher in percentage is Ariel Borysiuk's overall rating than that of Paulin Puel?",
    "evidence": "how much higher in percentage = MULTIPLY(DIVIDE(SUBTRACT(overall_rating WHERE player_name = 'Ariel Borysiuk', overall_rating WHERE player_name = 'Paulin Puel'), overall_rating WHERE player_name = 'Paulin Puel'), 100);",
    "SQL": "SELECT (SUM(CASE WHEN t1.player_name = 'Ariel Borysiuk' THEN t2.overall_rating ELSE 0 END) * 1.0 - SUM(CASE WHEN t1.player_name = 'Paulin Puel' THEN t2.overall_rating ELSE 0 END)) * 100 / SUM(CASE WHEN t1.player_name = 'Paulin Puel' THEN t2.overall_rating ELSE 0 END) FROM Player AS t1 INNER JOIN Player_Attributes AS t2 ON t1.player_api_id = t2.player_api_id",
    "difficulty": "challenging"
  },
  {
    "question_id": 1096,
    "db_id": "european_football_2",
    "question": "Calculate the average overall rating of Pietro Marino.",
    "evidence": "Pietro Marino refers to player_name = 'Pietro Marino'; average overall rating AVG(T1.overall_rating)",
    "SQL": "SELECT CAST(SUM(t2.overall_rating) AS REAL) / COUNT(t2.id) FROM Player AS t1 INNER JOIN Player_Attributes AS t2 ON t1.player_api_id = t2.player_api_id WHERE t1.player_name = 'Pietro Marino'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1098,
    "db_id": "european_football_2",
    "question": "What is Ajax's highest chance creation passing score and what is it classified as?",
    "evidence": "Ajax's refers to team_long_name = 'Ajax'; chance creation passing score refers to MAX(chanceCreationPassing); classified refer to chanceCreationPassingClass",
    "SQL": "SELECT t2.chanceCreationPassing, t2.chanceCreationPassingClass FROM Team AS t1 INNER JOIN Team_Attributes AS t2 ON t1.team_api_id = t2.team_api_id WHERE t1.team_long_name = 'Ajax' ORDER BY t2.chanceCreationPassing DESC LIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 1102,
    "db_id": "european_football_2",
    "question": "For the players who had a 77 points overall rating on 2016/6/23, who was the oldest? Give the name of the player.",
    "evidence": "77 points overall rating refers to overall_rating = 77; on 2016/6/23 refers to date LIKE '2016-06-23%'; The larger the birthday value, the younger the person is, and vice versa;",
    "SQL": "SELECT t1.player_name FROM Player AS t1 INNER JOIN Player_Attributes AS t2 ON t1.player_api_id = t2.player_api_id WHERE SUBSTR(t2.`date`, 1, 10) = '2016-06-23' AND t2.overall_rating = 77 ORDER BY t1.birthday ASC LIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 1103,
    "db_id": "european_football_2",
    "question": "What was the overall rating for Aaron Mooy on 2016/2/4?",
    "evidence": "Aaron Mooy refers to player_name = 'Aaron Mooy'; on 2016/2/4 refers to date LIKE '2016-02-04%';",
    "SQL": "SELECT t2.overall_rating FROM Player AS t1 INNER JOIN Player_Attributes AS t2 ON t1.player_api_id = t2.player_api_id WHERE SUBSTR(t2.`date`, 1, 10) = '2016-02-04' AND t1.player_name = 'Aaron Mooy'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1105,
    "db_id": "european_football_2",
    "question": "How was Francesco Migliore's attacking work rate on 2015/5/1?",
    "evidence": "Francesco Migliore refers to player_name = 'Francesco Migliore'; on 2015/5/1 refers to date LIKE '2015-05-01%';",
    "SQL": "SELECT t2.attacking_work_rate FROM Player AS t1 INNER JOIN Player_Attributes AS t2 ON t1.player_api_id = t2.player_api_id WHERE t2.`date` LIKE '2015-05-01%' AND t1.player_name = 'Francesco Migliore'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1107,
    "db_id": "european_football_2",
    "question": "When was the first time did Kevin Constant have his highest crossing score? Give the date.",
    "evidence": "Kevin Constant refers to player_name = 'Kevin Constant'; highest crossing score refers to MAX(crossing)",
    "SQL": "SELECT `date` FROM ( SELECT t2.crossing, t2.`date` FROM Player AS t1 INNER JOIN Player_Attributes AS t2 ON t1.player_fifa_api_id = t2.player_fifa_api_id WHERE t1.player_name = 'Kevin Constant' ORDER BY t2.crossing DESC) ORDER BY date DESC LIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 1110,
    "db_id": "european_football_2",
    "question": "Tell the build Up play passing class for \"FC Lorient\" on 2010/2/22.",
    "evidence": "\"FC Lorient\" refers to team_long_name = 'FC Lorient'; on 2010/2/22 refers to date LIKE '2010-02-22%';",
    "SQL": "SELECT t2.buildUpPlayPassingClass FROM Team AS t1 INNER JOIN Team_Attributes AS t2 ON t1.team_api_id = t2.team_api_id WHERE t1.team_long_name = 'FC Lorient' AND t2.`date` LIKE '2010-02-22%'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1113,
    "db_id": "european_football_2",
    "question": "For the team \"Hannover 96\", what was its defence aggression class on 2015/9/10?",
    "evidence": "\"Hannover 96\" refers to team_long_name = 'Hannover 96'; on 2015/9/10 refers to date LIKE '2015-09-10%';",
    "SQL": "SELECT t2.defenceAggressionClass FROM Team AS t1 INNER JOIN Team_Attributes AS t2 ON t1.team_api_id = t2.team_api_id WHERE t1.team_long_name = 'Hannover 96' AND t2.`date` LIKE '2015-09-10%'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1114,
    "db_id": "european_football_2",
    "question": "What was the average overall rating for Marko Arnautovic from 2007/2/22 to 2016/4/21?",
    "evidence": "average overall rating refers to avg(overall_rating); Marko Arnautovic refers to player_name = 'Marko Arnautovic'; from 2007/2/22 to 2016/4/21 refers to the first 10 characters of date BETWEEN '2007-02-22' and '2016-04-21'",
    "SQL": "SELECT CAST(SUM(t2.overall_rating) AS REAL) / COUNT(t2.id) FROM Player AS t1 INNER JOIN Player_Attributes AS t2 ON t1.player_fifa_api_id = t2.player_fifa_api_id WHERE t1.player_name = 'Marko Arnautovic' AND SUBSTR(t2.`date`, 1, 10) BETWEEN '2007-02-22' AND '2016-04-21'",
    "difficulty": "challenging"
  },
  {
    "question_id": 1115,
    "db_id": "european_football_2",
    "question": "What percentage is Landon Donovan's overall rating higher than Jordan Bowery on 2013/7/12?",
    "evidence": "Landon Donovan's refers to player_name = 'Landon Donovan'; Jordan Bowery refers to player_name = 'Jordan Bowery'; percentage refers to DIVIDE(SUBTRACT(player_name = 'Landon Donovan' overall_rating; player_name = 'Jordan Bowery' overall_rating), player_name = 'Landon Donovan' overall_rating)*100",
    "SQL": "SELECT (SUM(CASE WHEN t1.player_name = 'Landon Donovan' THEN t2.overall_rating ELSE 0 END) * 1.0 - SUM(CASE WHEN t1.player_name = 'Jordan Bowery' THEN t2.overall_rating ELSE 0 END)) * 100 / SUM(CASE WHEN t1.player_name = 'Landon Donovan' THEN t2.overall_rating ELSE 0 END) LvsJ_percent FROM Player AS t1 INNER JOIN Player_Attributes AS t2 ON t1.player_fifa_api_id = t2.player_fifa_api_id WHERE SUBSTR(t2.`date`, 1, 10) = '2013-07-12'",
    "difficulty": "challenging"
  },
  {
    "question_id": 1116,
    "db_id": "european_football_2",
    "question": "List down most tallest players' name.",
    "evidence": "tallest refers to rank based on the height in descending order; Most tallest players refers to rank = 1 ",
    "SQL": "SELECT player_name FROM (SELECT player_name, height, DENSE_RANK() OVER (ORDER BY height DESC) as rank FROM Player) WHERE rank = 1",
    "difficulty": "simple"
  },
  {
    "question_id": 1122,
    "db_id": "european_football_2",
    "question": "State the name of the most strongest player.",
    "evidence": "strongest players refers to player has MAX(overall_rating)",
    "SQL": "SELECT DISTINCT t1.player_name FROM Player AS t1 INNER JOIN Player_Attributes AS t2 ON t1.player_api_id = t2.player_api_id WHERE t2.overall_rating = (SELECT MAX(overall_rating) FROM Player_Attributes)",
    "difficulty": "simple"
  },
  {
    "question_id": 1124,
    "db_id": "european_football_2",
    "question": "Who are the players that tend to be attacking when their mates were doing attack moves? List down their name.",
    "evidence": "tend to be attacking when their mates were doing attack moves refers to attacking_work_rate = 'high';",
    "SQL": "SELECT DISTINCT t1.player_name FROM Player AS t1 INNER JOIN Player_Attributes AS t2 ON t1.player_api_id = t2.player_api_id WHERE t2.attacking_work_rate = 'high'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1130,
    "db_id": "european_football_2",
    "question": "What are the short name of team who played safe while creating chance of passing?",
    "evidence": "played safe while creating chance of passing refers to chanceCreationPassingClass = 'Safe'; short name of team refers to team_short_name",
    "SQL": "SELECT DISTINCT t1.team_short_name FROM Team AS t1 INNER JOIN Team_Attributes AS t2 ON t1.team_api_id = t2.team_api_id WHERE t2.chanceCreationPassingClass = 'Safe'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1133,
    "db_id": "european_football_2",
    "question": "How many football players born after the 1990s have the first name \"Aaron\"?",
    "evidence": "first name \"Aaron\" refers to player_name LIKE 'Aaron%'; born after the 1990s refers to birthday > '1990'",
    "SQL": "SELECT COUNT(id) FROM Player WHERE birthday > '1990' AND player_name LIKE 'Aaron%'",
    "difficulty": "simple"
  },
  {
    "question_id": 1134,
    "db_id": "european_football_2",
    "question": "What is the difference between players 6 and 23's jumping scores?",
    "evidence": "difference between players 6 and 23's jumping scores refers to SUBTRACT(jumping AND id = 6,jumping AND id = 23)",
    "SQL": "SELECT SUM(CASE WHEN t1.id = 6 THEN t1.jumping ELSE 0 END) - SUM(CASE WHEN t1.id = 23 THEN t1.jumping ELSE 0 END) FROM Player_Attributes AS t1",
    "difficulty": "simple"
  },
  {
    "question_id": 1135,
    "db_id": "european_football_2",
    "question": "Please provide top four football players' IDs who are among the lowest potential players and prefer to use the right foot when attacking.",
    "evidence": "lowest potential players refers to MIN(potential); prefer to use the right foot when attacking refers to preferred_foot = 'right'",
    "SQL": "SELECT id FROM Player_Attributes WHERE preferred_foot = 'right' ORDER BY potential ASC LIMIT 4",
    "difficulty": "moderate"
  },
  {
    "question_id": 1136,
    "db_id": "european_football_2",
    "question": "How many players had the highest potential score for crossing that preferred to use their left foots while attacking?",
    "evidence": "highest potential score for crossing refers to MAX(crossing); preferred to use their left foots while attacking refers to preferred_foot = 'left'",
    "SQL": "SELECT COUNT(t1.id) FROM Player_Attributes AS t1 WHERE t1.preferred_foot = 'left' AND t1.crossing = ( SELECT MAX(crossing) FROM Player_Attributes)",
    "difficulty": "moderate"
  },
  {
    "question_id": 1139,
    "db_id": "european_football_2",
    "question": "What was the final score for the match on September 24, 2008, in the Belgian Jupiler League between the home team and the away team?",
    "evidence": "September 24, 2008 refers to date like '2008-09-24%'; in the Belgian Jupiler League refers to League.name = 'Belgium Jupiler League'; final score for home team refers to home_team_goal; final score for away team refers to away_team_goal",
    "SQL": "SELECT t2.home_team_goal, t2.away_team_goal FROM League AS t1 INNER JOIN Match AS t2 ON t1.id = t2.league_id WHERE t1.name = 'Belgium Jupiler League' AND t2.`date` LIKE '2008-09-24%'",
    "difficulty": "challenging"
  },
  {
    "question_id": 1141,
    "db_id": "european_football_2",
    "question": "Does the KSV Cercle Brugge team have a slow, balanced or fast speed class?",
    "evidence": "KSV Cercle Brugge refers to team_long_name = 'KSV Cercle Brugge'; speed class refers to buildUpPlaySpeedClass",
    "SQL": "SELECT DISTINCT t1.buildUpPlaySpeedClass FROM Team_Attributes AS t1 INNER JOIN Team AS t2 ON t1.team_api_id = t2.team_api_id WHERE t2.team_long_name = 'KSV Cercle Brugge'",
    "difficulty": "moderate"
  },
  {
    "question_id": 1144,
    "db_id": "european_football_2",
    "question": "Please state the finishing rate and curve score of the player who has the heaviest weight.",
    "evidence": "finishing rate refer to finishing; curve score refer to curve; heaviest weight refers to MAX(weight)",
    "SQL": "SELECT id, finishing, curve FROM Player_Attributes WHERE player_api_id = ( SELECT player_api_id FROM Player ORDER BY weight DESC LIMIT 1 ) LIMIT 1",
    "difficulty": "simple"
  },
  {
    "question_id": 1145,
    "db_id": "european_football_2",
    "question": "Which top 4 leagues had the most games in the 2015-2016 season?",
    "evidence": "in the 2015-2016 season refers to season = '2015/2016'; league with most games refers to League.name where MAX(COUNT(id))",
    "SQL": "SELECT t1.name FROM League AS t1 INNER JOIN Match AS t2 ON t1.id = t2.league_id WHERE t2.season = '2015/2016' GROUP BY t1.name ORDER BY COUNT(t2.id) DESC LIMIT 4",
    "difficulty": "simple"
  },
  {
    "question_id": 1146,
    "db_id": "european_football_2",
    "question": "Please provide the full name of the away team that scored the most goals.",
    "evidence": "full name refers to team_long_name; away team refers to away_team_api_id; scored the most goals refers to MAX(away_team_goal)",
    "SQL": "SELECT t2.team_long_name FROM Match AS t1 INNER JOIN Team AS t2 ON t1.away_team_api_id = t2.team_api_id ORDER BY t1.away_team_goal DESC LIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 1147,
    "db_id": "european_football_2",
    "question": "Please name one player whose overall strength is the greatest.",
    "evidence": "overall strength is the greatest refers to MAX(overall_rating)",
    "SQL": "SELECT DISTINCT t1.player_name FROM Player AS t1 INNER JOIN Player_Attributes AS t2 ON t1.player_api_id = t2.player_api_id WHERE t2.overall_rating = ( SELECT MAX(overall_rating) FROM Player_Attributes)",
    "difficulty": "simple"
  },
  {
    "question_id": 1148,
    "db_id": "european_football_2",
    "question": "What is the percentage of players that are under 180 cm who have an overall strength of more than 70?",
    "evidence": "percentage refers to DIVIDE(COUNT(height < 180 AND overall_rating > 70),COUNT(id)) * 100",
    "SQL": "SELECT CAST(COUNT(CASE WHEN t2.overall_rating > 70  AND t1.height < 180 THEN t1.id ELSE NULL END) AS REAL) * 100 / COUNT(t1.id) percent FROM Player AS t1 INNER JOIN Player_Attributes AS t2 ON t1.player_api_id = t2.player_api_id ",
    "difficulty": "moderate"
  },
  {
    "question_id": 846,
    "db_id": "formula_1",
    "question": "Please list the reference names of the drivers who are eliminated in the first period in race number 20.",
    "evidence": "driver reference name refers to driverRef; first qualifying period refers to q1; drivers who are eliminated in the first qualifying period refers to 5 drivers with MAX(q1); race number refers to raceId;",
    "SQL": "SELECT T2.driverRef FROM qualifying AS T1 INNER JOIN drivers AS T2 ON T2.driverId = T1.driverId WHERE T1.raceId = 20 ORDER BY T1.q1 DESC LIMIT 5",
    "difficulty": "moderate"
  },
  {
    "question_id": 847,
    "db_id": "formula_1",
    "question": "What is the surname of the driver with the best lap time in race number 19 in the second qualifying period?",
    "evidence": "race number refers to raceId; second qualifying period refers to q2; best lap time refers to MIN(q2);",
    "SQL": "SELECT T2.surname FROM qualifying AS T1 INNER JOIN drivers AS T2 ON T2.driverId = T1.driverId WHERE T1.raceId = 19 ORDER BY T1.q2 ASC LIMIT 1",
    "difficulty": "simple"
  },
  {
    "question_id": 850,
    "db_id": "formula_1",
    "question": "Please give the name of the race held on the circuits in Germany.",
    "evidence": "Germany is a name of country;",
    "SQL": "SELECT DISTINCT T2.name FROM circuits AS T1 INNER JOIN races AS T2 ON T2.circuitID = T1.circuitId WHERE T1.country = 'Germany'",
    "difficulty": "simple"
  },
  {
    "question_id": 854,
    "db_id": "formula_1",
    "question": "What is the coordinates location of the circuits for Australian grand prix?",
    "evidence": "coordinate position/location refers to lat, lng; circuits for Australian grand prix refers to races.name = 'Australian Grand Prix'",
    "SQL": "SELECT DISTINCT T1.lat, T1.lng FROM circuits AS T1 INNER JOIN races AS T2 ON T2.circuitID = T1.circuitId WHERE T2.name = 'Australian Grand Prix'",
    "difficulty": "simple"
  },
  {
    "question_id": 857,
    "db_id": "formula_1",
    "question": "Give the coordinate position for Abu Dhabi Grand Prix.",
    "evidence": "coordinate position/location refers to lat, lng; Abu Dhabi Grand Prix refers to races.name = 'Abu Dhabi Grand Prix'",
    "SQL": "SELECT DISTINCT T1.lat, T1.lng FROM circuits AS T1 INNER JOIN races AS T2 ON T2.circuitID = T1.circuitId WHERE T2.name = 'Abu Dhabi Grand Prix'",
    "difficulty": "simple"
  },
  {
    "question_id": 859,
    "db_id": "formula_1",
    "question": "What's Bruno Senna's Q1 result in the qualifying race No. 354?",
    "evidence": "race number refers to raceId; Bruno Senna refers to the full name of the driver; Full name of the driver refers to drivers.forename and drivers.surname;",
    "SQL": "SELECT T1.q1 FROM qualifying AS T1 INNER JOIN drivers AS T2 ON T2.driverId = T1.driverId WHERE T1.raceId = 354 AND T2.forename = 'Bruno' AND T2.surname = 'Senna'",
    "difficulty": "simple"
  },
  {
    "question_id": 861,
    "db_id": "formula_1",
    "question": "What is his number of the driver who finished 0:01:54 in the Q3 of qualifying race No.903?",
    "evidence": "race number refers to raceId; finished 0:0M:SS in the Q3 refers to q3 LIKE 'M:SS%'",
    "SQL": "SELECT T2.number FROM qualifying AS T1 INNER JOIN drivers AS T2 ON T2.driverId = T1.driverId WHERE T1.raceId = 903 AND T1.q3 LIKE '1:54%'",
    "difficulty": "simple"
  },
  {
    "question_id": 862,
    "db_id": "formula_1",
    "question": "For the Bahrain Grand Prix in 2007, how many drivers not finished the game?",
    "evidence": "Bahrain Grand Prix refers to races.name = 'Bahrain Grand Prix'; drivers who finished the race refers to time is not empty (i.e. time IS NOT NULL);",
    "SQL": "SELECT COUNT(T3.driverId) FROM races AS T1 INNER JOIN results AS T2 ON T2.raceId = T1.raceId INNER JOIN drivers AS T3 ON T3.driverId = T2.driverId WHERE T1.year = 2007 AND T1.name = 'Bahrain Grand Prix' AND T2.time IS NULL",
    "difficulty": "simple"
  },
  {
    "question_id": 865,
    "db_id": "formula_1",
    "question": "For all the drivers who finished the game in race No. 592, who is the oldest?",
    "evidence": "drivers who finished the race refers to time is not empty (i.e. time IS NOT NULL); race number refers to raceId; date of birth refers to drivers.dob; The larger the birthday value, the younger the person is, and vice versa;",
    "SQL": "SELECT T1.forename, T1.surname FROM drivers AS T1 INNER JOIN results AS T2 ON T2.driverId = T1.driverId WHERE T2.raceId = 592 AND T2.time IS NOT NULL AND T1.dob IS NOT NULL ORDER BY T1.dob ASC LIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 866,
    "db_id": "formula_1",
    "question": "Who was the player that got the lap time of 0:01:27 in the race No. 161? Show his introduction website.",
    "evidence": "player and driver are synonyms; the lap time of 0:0M:SS refers to lapTime.time LIKE 'M:SS%';race number refers to raceId; introduction website of the drivers refers to url;",
    "SQL": "SELECT DISTINCT T2.forename, T2.surname, T2.url FROM lapTimes AS T1 INNER JOIN drivers AS T2 ON T2.driverId = T1.driverId WHERE T1.raceId = 161 AND T1.time LIKE '1:27%'",
    "difficulty": "moderate"
  },
  {
    "question_id": 868,
    "db_id": "formula_1",
    "question": "Where is Malaysian Grand Prix held? Give the location coordinates.",
    "evidence": "location coordinates refers to (lat, lng); Malaysian Grand Prix refers to races.name = 'Malaysian Grand Prix'",
    "SQL": "SELECT DISTINCT T1.lat, T1.lng FROM circuits AS T1 INNER JOIN races AS T2 ON T2.circuitID = T1.circuitId WHERE T2.name = 'Malaysian Grand Prix'",
    "difficulty": "simple"
  },
  {
    "question_id": 869,
    "db_id": "formula_1",
    "question": "For the constructor which got the highest point in the race No. 9 , what is its introduction website?",
    "evidence": "race number refers to raceId; constructor which got the highest point refers to MAX(constructorResults.points); introduction website of the constructor refers to url;",
    "SQL": "SELECT T2.url FROM constructorResults AS T1 INNER JOIN constructors AS T2 ON T2.constructorId = T1.constructorId WHERE T1.raceId = 9 ORDER BY T1.points DESC LIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 872,
    "db_id": "formula_1",
    "question": "In the race No. 45, for the driver who had the Q3 time as 0:01:33, what is his abbreviated code?",
    "evidence": "race number refers to raceId; had the Q3 time as 0:0M:SS refers to q3 LIKE 'M:SS%'",
    "SQL": "SELECT T2.code FROM qualifying AS T1 INNER JOIN drivers AS T2 ON T2.driverId = T1.driverId WHERE T1.raceId = 45 AND T1.q3 LIKE '1:33%'",
    "difficulty": "simple"
  },
  {
    "question_id": 875,
    "db_id": "formula_1",
    "question": "Show me the season page of year when the race No. 901 took place.",
    "evidence": "the season page refers to url; race number refers to raceId;",
    "SQL": "SELECT T2.url FROM races AS T1 INNER JOIN seasons AS T2 ON T2.year = T1.year WHERE T1.raceId = 901",
    "difficulty": "simple"
  },
  {
    "question_id": 877,
    "db_id": "formula_1",
    "question": "For all the drivers who finished the game in race No. 872, who is the youngest?",
    "evidence": "race number refers to raceId; drivers who finished the race refers to time has value; the youngest is a driver where MAX(dob);",
    "SQL": "SELECT T1.forename, T1.surname FROM drivers AS T1 INNER JOIN results AS T2 ON T2.driverId = T1.driverId WHERE T2.raceId = 872 AND T2.time IS NOT NULL ORDER BY T1.dob DESC LIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 879,
    "db_id": "formula_1",
    "question": "For the driver who set the fastest lap speed, what is his nationality?",
    "evidence": "the fastest lap speed refers to (MAX) fastestLapSpeed;",
    "SQL": "SELECT T1.nationality FROM drivers AS T1 INNER JOIN results AS T2 ON T2.driverId = T1.driverId ORDER BY T2.fastestLapSpeed DESC LIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 880,
    "db_id": "formula_1",
    "question": "Paul di Resta was in the No. 853 race, what percent faster did he finish in the 853rd race than the next race for the fastest lap speed?",
    "evidence": "Paul di Resta refers to the full name of the driver; Full name of the driver refers to drivers.forename ='Paul' and drivers.surname = 'di Resta'; race number refers to raceId; percentage = DIVIDE(SUBTRACT(fastestLapSpeed(raceId = 853), (fastestLapSpeed (raceId = 854)) * 100 , (fastestLapSpeed(raceId = 853))",
    "SQL": "SELECT (SUM(IIF(T2.raceId = 853, T2.fastestLapSpeed, 0)) - SUM(IIF(T2.raceId = 854, T2.fastestLapSpeed, 0))) * 100 / SUM(IIF(T2.raceId = 853, T2.fastestLapSpeed, 0)) FROM drivers AS T1 INNER JOIN results AS T2 ON T2.driverId = T1.driverId WHERE T1.forename = 'Paul' AND T1.surname = 'di Resta'",
    "difficulty": "challenging"
  },
  {
    "question_id": 881,
    "db_id": "formula_1",
    "question": "For the drivers who took part in the race in 1983/7/16, what's their race completion rate?",
    "evidence": "DIVIDE(COUNT(driverid when time has value ), (COUNT(driverid )) as percentage; in 1983/7/16 refers to when date = '1983-07-16'",
    "SQL": "SELECT CAST(COUNT(CASE WHEN T2.time IS NOT NULL THEN T2.driverId END) AS REAL) * 100 / COUNT(T2.driverId) FROM races AS T1 INNER JOIN results AS T2 ON T2.raceId = T1.raceId WHERE T1.date = '1983-07-16'",
    "difficulty": "moderate"
  },
  {
    "question_id": 884,
    "db_id": "formula_1",
    "question": "List the names of all races that occurred in the earliest recorded year and month.",
    "evidence": "earliest recorded year and month refers to year = year(min(date)) and month = month(min(date));",
    "SQL": "SELECT name FROM races WHERE STRFTIME('%Y', date) = ( SELECT STRFTIME('%Y', date) FROM races ORDER BY date ASC LIMIT 1 ) AND STRFTIME('%m', date) = ( SELECT STRFTIME('%m', date) FROM races ORDER BY date ASC LIMIT 1 )",
    "difficulty": "moderate"
  },
  {
    "question_id": 892,
    "db_id": "formula_1",
    "question": "State the driver with the most points scored. Find his full name with that points.",
    "evidence": "the most points scored refers to max(points); Full name of the driver refers to drivers.forename and drivers.surname;",
    "SQL": "SELECT T3.forename, T3.surname, T2.points FROM races AS T1 INNER JOIN driverStandings AS T2 ON T2.raceId = T1.raceId INNER JOIN drivers AS T3 ON T3.driverId = T2.driverId ORDER BY T2.points DESC LIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 894,
    "db_id": "formula_1",
    "question": "What is the best lap time recorded? List the driver and race with such recorded lap time.",
    "evidence": "the best lap time refers to min(milliseconds); List the driver refers to drivers.forename and drivers.surname; List the race refers to races.name",
    "SQL": "SELECT T2.milliseconds, T1.forename, T1.surname, T3.name FROM drivers AS T1 INNER JOIN lapTimes AS T2 ON T1.driverId = T2.driverId INNER JOIN races AS T3 ON T2.raceId = T3.raceId ORDER BY T2.milliseconds ASC LIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 895,
    "db_id": "formula_1",
    "question": "What is the average lap time for Lewis Hamilton in the 2009 Malaysian Grand Prix?",
    "evidence": "average lap time = AVG(milliseconds); 'Lewis Hamilton' refers to the full name of the driver; Full name of the driver refers to drivers.forename and drivers.surname; 'Malaysian Grand Prix' refers to races.name = 'Malaysian Grand Prix'",
    "SQL": "SELECT AVG(T2.milliseconds) FROM races AS T1 INNER JOIN lapTimes AS T2 ON T2.raceId = T1.raceId INNER JOIN drivers AS T3 ON T3.driverId = T2.driverId WHERE T3.forename = 'Lewis' AND T3.surname = 'Hamilton' AND T1.year = 2009 AND T1.name = 'Malaysian Grand Prix'",
    "difficulty": "moderate"
  },
  {
    "question_id": 896,
    "db_id": "formula_1",
    "question": "Calculate the percentage whereby Hamilton was not at the 1st track of the the f1 circuit since 2010.",
    "evidence": "percentage = DIVIDE(COUNT(raceId) where surname = 'Hamilton' and position>1), (COUNT(raceId) where surname = 'Hamilton'); since 2010 refers to year >= 2010",
    "SQL": "SELECT CAST(COUNT(CASE WHEN T2.position <> 1 THEN T2.position END) AS REAL) * 100 / COUNT(T2.driverStandingsId) FROM races AS T1 INNER JOIN driverStandings AS T2 ON T2.raceId = T1.raceId INNER JOIN drivers AS T3 ON T3.driverId = T2.driverId WHERE T3.surname = 'Hamilton' AND T1.year >= 2010",
    "difficulty": "challenging"
  },
  {
    "question_id": 897,
    "db_id": "formula_1",
    "question": "Name the driver with the most winning. Mention his nationality and what is his maximum point scores.",
    "evidence": "Full name of the driver refers to drivers.forename and drivers.surname; the most winning refers to MAX(COUNT(wins)); average point scores refers to MAX(points);",
    "SQL": "SELECT T1.forename, T1.surname, T1.nationality, MAX(T2.points) FROM drivers AS T1 INNER JOIN driverStandings AS T2 ON T2.driverId = T1.driverId WHERE T2.wins >= 1 GROUP BY T1.forename, T1.surname, T1.nationality ORDER BY COUNT(T2.wins) DESC LIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 898,
    "db_id": "formula_1",
    "question": "How old is the youngest Japanese driver? What is his name?",
    "evidence": "date of birth refers to drivers.dob; The larger the birthday value, the younger the person is, and vice versa; Japanese refers to nationality = 'Japanese'; age = YEAR(CURRENT_TIMESTAMP) - YEAR(dob);",
    "SQL": "SELECT STRFTIME('%Y', CURRENT_TIMESTAMP) - STRFTIME('%Y', dob), forename , surname FROM drivers WHERE nationality = 'Japanese' ORDER BY dob DESC LIMIT 1",
    "difficulty": "simple"
  },
  {
    "question_id": 901,
    "db_id": "formula_1",
    "question": "Name the races along with its circuit name and location for f1 races hosted in September 2005.",
    "evidence": "in September 2005 refers to MONTH(date) = 9 and YEAR(date) = 2005",
    "SQL": "SELECT DISTINCT T2.name, T1.name, T1.location FROM circuits AS T1 INNER JOIN races AS T2 ON T2.circuitID = T1.circuitId WHERE T2.year = 2005 AND STRFTIME('%m', T2.date) = '09'",
    "difficulty": "simple"
  },
  {
    "question_id": 902,
    "db_id": "formula_1",
    "question": "Which race was Alex Yoong in when he was in track number less than 20?",
    "evidence": "Alex Yoong refers to the full name of the driver; Full name of the driver refers to drivers.forename and drivers.surname;track number less than 10 refers to position < 20",
    "SQL": "SELECT T1.name FROM races AS T1 INNER JOIN driverStandings AS T2 ON T2.raceId = T1.raceId INNER JOIN drivers AS T3 ON T3.driverId = T2.driverId WHERE T3.forename = 'Alex' AND T3.surname = 'Yoong' AND T2.position < 20",
    "difficulty": "simple"
  },
  {
    "question_id": 904,
    "db_id": "formula_1",
    "question": "State the race and year of race in which Michael Schumacher had his fastest lap.",
    "evidence": "fastest lap refers to min(milliseconds); Alex Yoong refers to the full name of the driver; Full name of the driver refers to drivers.forename and drivers.surname;",
    "SQL": "SELECT T1.name, T1.year FROM races AS T1 INNER JOIN lapTimes AS T2 ON T2.raceId = T1.raceId INNER JOIN drivers AS T3 ON T3.driverId = T2.driverId WHERE T3.forename = 'Michael' AND T3.surname = 'Schumacher' ORDER BY T2.milliseconds ASC LIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 906,
    "db_id": "formula_1",
    "question": "Which was Lewis Hamilton first race? What was his points recorded for his first race event?",
    "evidence": "first race refers to min(Year); Lewis Hamiltonrefers to the full name of the driver; Full name of the driver refers to drivers.forename and drivers.surname;",
    "SQL": "SELECT T1.name, T2.points FROM races AS T1 INNER JOIN driverStandings AS T2 ON T2.raceId = T1.raceId INNER JOIN drivers AS T3 ON T3.driverId = T2.driverId WHERE T3.forename = 'Lewis' AND T3.surname = 'Hamilton' ORDER BY T1.year ASC LIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 909,
    "db_id": "formula_1",
    "question": "Among all European Grand Prix races, what is the percentage of the races were hosted in Germany?",
    "evidence": "European Grand Prix races refers to races.name = 'European Grand Prix';percentage = divide(COUNT(races where country = Germany and name = 'Europearn Grand Prix'),COUNT(races where name = 'Europearn Grand Prix'))*100",
    "SQL": "SELECT CAST(COUNT(CASE WHEN T1.country = 'Germany' THEN T2.circuitID END) AS REAL) * 100 / COUNT(T2.circuitId) FROM circuits AS T1 INNER JOIN races AS T2 ON T2.circuitID = T1.circuitId WHERE T2.name = 'European Grand Prix'",
    "difficulty": "moderate"
  },
  {
    "question_id": 910,
    "db_id": "formula_1",
    "question": "What's the location coordinates of Silverstone Circuit?",
    "evidence": "location coordinates refers to (lat, lng); Silverstone Circuit refers to circuits.name = 'Silverstone Circuit'",
    "SQL": "SELECT lat, lng FROM circuits WHERE name = 'Silverstone Circuit'",
    "difficulty": "simple"
  },
  {
    "question_id": 912,
    "db_id": "formula_1",
    "question": "What's the reference name of Marina Bay Street Circuit?",
    "evidence": "reference name refers to circuitRef; Marina Bay Street Circuit refers to circuits.name = 'Marina Bay Street Circuit'",
    "SQL": "SELECT circuitRef FROM circuits WHERE name = 'Marina Bay Street Circuit'",
    "difficulty": "simple"
  },
  {
    "question_id": 915,
    "db_id": "formula_1",
    "question": "Which country is the oldest driver from?",
    "evidence": "date of birth refers to drivers.dob; The larger the birthday value, the younger the person is, and vice versa;",
    "SQL": "SELECT nationality FROM drivers WHERE dob IS NOT NULL ORDER BY dob ASC LIMIT 1",
    "difficulty": "simple"
  },
  {
    "question_id": 928,
    "db_id": "formula_1",
    "question": "Which driver ranked the first in the Canadian Grand Prix in 2007? Please give his reference name.",
    "evidence": "reference name refers to driverRef; Canadian Grand Prix refers to races.name = 'Canadian Grand Prix';",
    "SQL": "SELECT T3.forename, T3.surname, T3.driverRef FROM races AS T1 INNER JOIN results AS T2 ON T2.raceId = T1.raceId INNER JOIN drivers AS T3 ON T3.driverId = T2.driverId WHERE T1.name = 'Canadian Grand Prix' AND T2.rank = 1 AND T1.year = 2007",
    "difficulty": "moderate"
  },
  {
    "question_id": 930,
    "db_id": "formula_1",
    "question": "In which Formula_1 race did Lewis Hamilton rank the highest?",
    "evidence": "rank the highest refers to min(rank); Lewis Hamilton refers to the full name of the driver; Full name of the driver refers to drivers.forename and drivers.surname;",
    "SQL": "SELECT name FROM races WHERE raceId IN ( SELECT raceId FROM results WHERE rank = 1 AND driverId = ( SELECT driverId FROM drivers WHERE forename = 'Lewis' AND surname = 'Hamilton' ) )",
    "difficulty": "simple"
  },
  {
    "question_id": 931,
    "db_id": "formula_1",
    "question": "What was the fastest lap speed among all drivers in the 2009 Spanish Grand Prix?",
    "evidence": "the fastest lap speed among all refers to max(fastestLapSpeed); Spanish Grand Prix refers to races.name = 'Spanish Grand Prix';",
    "SQL": "SELECT T2.fastestLapSpeed FROM races AS T1 INNER JOIN results AS T2 ON T2.raceId = T1.raceId WHERE T1.name = 'Spanish Grand Prix' AND T1.year = 2009 AND T2.fastestLapSpeed IS NOT NULL ORDER BY T2.fastestLapSpeed DESC LIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 933,
    "db_id": "formula_1",
    "question": "What was Lewis Hamilton's final rank in the 2008 Chinese Grand Prix?",
    "evidence": "Lewis Hamilton refers to the full name of the driver; Full name of the driver refers to drivers.forename and drivers.surname; final rank refers to positionOrder; Chinese Grand Prix refers to races.name = 'Chinese Grand Prix';",
    "SQL": "SELECT T2.positionOrder FROM races AS T1 INNER JOIN results AS T2 ON T2.raceId = T1.raceId INNER JOIN drivers AS T3 ON T3.driverId = T2.driverId WHERE T3.forename = 'Lewis' AND T3.surname = 'Hamilton' AND T1.name = 'Chinese Grand Prix' AND T1.year = 2008",
    "difficulty": "moderate"
  },
  {
    "question_id": 937,
    "db_id": "formula_1",
    "question": "What's the finish time for the driver who ranked second in 2008's AustChineseralian Grand Prix?",
    "evidence": "finish time refers to time; Chinese Grand Prix refers to races.name = 'Chinese Grand Prix';",
    "SQL": "SELECT T1.time FROM results AS T1 INNER JOIN races AS T2 on T1.raceId = T2.raceId WHERE T1.rank = 2 AND T2.name = 'Chinese Grand Prix' AND T2.year = 2008",
    "difficulty": "simple"
  },
  {
    "question_id": 940,
    "db_id": "formula_1",
    "question": "Among the drivers that finished the race in the 2008 Chinese Grand Prix, how many of them have participated in Formula_1 races?",
    "evidence": "COUNT(raceID) > 0 reveals that this driver participated in races; drivers who finished the race refers to time has value.",
    "SQL": "SELECT COUNT(*) FROM ( SELECT T1.driverId FROM results AS T1 INNER JOIN races AS T2 on T1.raceId = T2.raceId WHERE T2.name = 'Chinese Grand Prix' AND T2.year = 2008 AND T1.time IS NOT NULL GROUP BY T1.driverId HAVING COUNT(T2.raceId) > 0 )",
    "difficulty": "moderate"
  },
  {
    "question_id": 944,
    "db_id": "formula_1",
    "question": "How much faster in percentage is the champion than the driver who finished the race last in the 2008 Australian Grand Prix?",
    "evidence": "how much faster in percentage = divide(subtract(incremental time, champion time), last_driver time) * 100; last driver finished time = incremental time + champion time; only champion's finished time is represented by 'HH:MM:SS.mmm'; finished the game refers to time is not null",
    "SQL": "WITH time_in_seconds AS ( SELECT T1.positionOrder, CASE WHEN T1.positionOrder = 1 THEN (CAST(SUBSTR(T1.time, 1, 1) AS REAL) * 3600) + (CAST(SUBSTR(T1.time, 3, 2) AS REAL) * 60) + CAST(SUBSTR(T1.time, 6) AS REAL) ELSE CAST(SUBSTR(T1.time, 2) AS REAL) END AS time_seconds FROM results AS T1 INNER JOIN races AS T2 ON T1.raceId = T2.raceId WHERE T2.name = 'Australian Grand Prix' AND T1.time IS NOT NULL AND T2.year = 2008 ), champion_time AS ( SELECT time_seconds FROM time_in_seconds WHERE positionOrder = 1), last_driver_incremental AS ( SELECT time_seconds FROM time_in_seconds WHERE positionOrder = (SELECT MAX(positionOrder) FROM time_in_seconds) ) SELECT (CAST((SELECT time_seconds FROM last_driver_incremental) AS REAL) * 100) / (SELECT time_seconds + (SELECT time_seconds FROM last_driver_incremental) FROM champion_time)",
    "difficulty": "challenging"
  },
  {
    "question_id": 945,
    "db_id": "formula_1",
    "question": "How many circuits are there in Adelaide, Australia?",
    "evidence": "Australia is the country; Melbourne is the location of circuit;",
    "SQL": "SELECT COUNT(circuitId) FROM circuits WHERE location = 'Adelaide' AND country = 'Australia'",
    "difficulty": "simple"
  },
  {
    "question_id": 948,
    "db_id": "formula_1",
    "question": "What are the maximum points of British constructors?",
    "evidence": "maximum points = MAX(points); British is a nationality",
    "SQL": "SELECT MAX(T1.points) FROM constructorStandings AS T1 INNER JOIN constructors AS T2 on T1.constructorId = T2.constructorId WHERE T2.nationality = 'British'",
    "difficulty": "simple"
  },
  {
    "question_id": 950,
    "db_id": "formula_1",
    "question": "Please list the constructor names with 0 points at race 291.",
    "evidence": "race at 291 refers to raceID = 291;",
    "SQL": "SELECT T2.name FROM constructorStandings AS T1 INNER JOIN constructors AS T2 on T1.constructorId = T2.constructorId WHERE T1.points = 0 AND T1.raceId = 291",
    "difficulty": "simple"
  },
  {
    "question_id": 951,
    "db_id": "formula_1",
    "question": "How many Japanese constructors have 0 points in 2 races?",
    "evidence": "2 races refers to COUNT(raceID) = 2; Japanese refers to constructors.nationality = 'Japanese';",
    "SQL": "SELECT COUNT(T1.raceId) FROM constructorStandings AS T1 INNER JOIN constructors AS T2 on T1.constructorId = T2.constructorId WHERE T1.points = 0 AND T2.nationality = 'Japanese' GROUP BY T1.constructorId HAVING COUNT(raceId) = 2",
    "difficulty": "simple"
  },
  {
    "question_id": 954,
    "db_id": "formula_1",
    "question": "Please calculate the race completion percentage of Japanese drivers from 2007 to 2009.",
    "evidence": "from 2007 to 2009 refers to year between 2007 and 2009; race completion refers to time is not null; percentage = Divide(COUNT(DriverID where time is not null and year between 2007 and 2009),Count (DriverID where year between 2007 and 2009))*100; ",
    "SQL": "SELECT CAST(SUM(IIF(T1.time IS NOT NULL, 1, 0)) AS REAL) * 100 / COUNT(T1.raceId) FROM results AS T1 INNER JOIN races AS T2 on T1.raceId = T2.raceId INNER JOIN drivers AS T3 on T1.driverId = T3.driverId WHERE T3.nationality = 'Japanese' AND T2.year BETWEEN 2007 AND 2009",
    "difficulty": "challenging"
  },
  {
    "question_id": 955,
    "db_id": "formula_1",
    "question": "What is the average time in seconds of champion for each year, before year 1975?",
    "evidence": "only champion's finished time is represented by 'HH:MM:SS.mmm'; finished the game refers to time is not null; before year 1975 refers to year < 1975;",
    "SQL": "WITH time_in_seconds AS ( SELECT T2.year, T2.raceId, T1.positionOrder, CASE WHEN T1.positionOrder = 1 THEN (CAST(SUBSTR(T1.time, 1, 1) AS REAL) * 3600) + (CAST(SUBSTR(T1.time, 3, 2) AS REAL) * 60) + CAST(SUBSTR(T1.time, 6,2) AS REAL )   + CAST(SUBSTR(T1.time, 9) AS REAL)/1000 ELSE 0 END AS time_seconds FROM results AS T1 INNER JOIN races AS T2 ON T1.raceId = T2.raceId WHERE T1.time IS NOT NULL ), champion_time AS ( SELECT year, raceId, time_seconds FROM time_in_seconds WHERE positionOrder = 1 ) SELECT year, AVG(time_seconds) FROM champion_time WHERE year < 1975 GROUP BY year HAVING AVG(time_seconds) IS NOT NULL",
    "difficulty": "challenging"
  },
  {
    "question_id": 959,
    "db_id": "formula_1",
    "question": "What is the fastest lap number of the champion in 2009?",
    "evidence": "in 2009 refers to year = 2009; Only the time of the champion shows in the format of \"hour: minutes: seconds.millionsecond\"",
    "SQL": "SELECT T1.fastestLap FROM results AS T1 INNER JOIN races AS T2 on T1.raceId = T2.raceId WHERE T2.year = 2009 AND T1.time LIKE '_:%:__.___'",
    "difficulty": "simple"
  },
  {
    "question_id": 960,
    "db_id": "formula_1",
    "question": "What is the average of fastest lap speed in the 2009 Spanish Grand Prix race?",
    "evidence": "Spanish Grand Prix is the name of race refers to name = 'Spanish Grand Prix'; average fastest lap speed refers to avg(fastestLapSpeed);",
    "SQL": "SELECT AVG(T1.fastestLapSpeed) FROM results AS T1 INNER JOIN races AS T2 on T1.raceId = T2.raceId WHERE T2.year = 2009 AND T2.name = 'Spanish Grand Prix'",
    "difficulty": "moderate"
  },
  {
    "question_id": 962,
    "db_id": "formula_1",
    "question": "From 2000 to 2005, what percentage of drivers who were born before 1985 and the lap numbers were over 50?",
    "evidence": "born before 1985 refers to year(dob)<1985; in 2000 to 2005 refers to year between 2000 and 2005; percentage = Divide(COUNT(driverId where year (dob) <1985 and laps >50),COUNT(DriverID where year between 2000 and 2005) *100;",
    "SQL": "SELECT CAST(SUM(IIF(STRFTIME('%Y', T3.dob) < '1985' AND T1.laps > 50, 1, 0)) AS REAL) * 100 / COUNT(*) FROM results AS T1 INNER JOIN races AS T2 on T1.raceId = T2.raceId INNER JOIN drivers AS T3 on T1.driverId = T3.driverId WHERE T2.year BETWEEN 2000 AND 2005",
    "difficulty": "challenging"
  },
  {
    "question_id": 963,
    "db_id": "formula_1",
    "question": "How many French drivers who obtain the laptime less than 02:00.00?",
    "evidence": "lap time less than 02:00.00 refers to seconds < 120;",
    "SQL": "SELECT COUNT(T1.driverId) FROM drivers AS T1 INNER JOIN lapTimes AS T2 on T1.driverId = T2.driverId WHERE T1.nationality = 'French' AND (CAST(SUBSTR(T2.time, 1, 2) AS INTEGER) * 60 + CAST(SUBSTR(T2.time, 4, 2) AS INTEGER) + CAST(SUBSTR(T2.time, 7, 2) AS REAL) / 1000) < 120",
    "difficulty": "moderate"
  },
  {
    "question_id": 964,
    "db_id": "formula_1",
    "question": "List out the code for drivers who have nationality in American.",
    "evidence": "nationality = 'American'",
    "SQL": "SELECT code FROM drivers WHERE Nationality = 'American'",
    "difficulty": "simple"
  },
  {
    "question_id": 967,
    "db_id": "formula_1",
    "question": "State code numbers of top 3 yougest drivers. How many Netherlandic drivers among them?",
    "evidence": "youngest driver refers to Max (year(dob)); Netherlandic and Dutch refer to the same country",
    "SQL": "SELECT COUNT(*) FROM ( SELECT T1.nationality FROM drivers AS T1 ORDER BY JULIANDAY(T1.dob) DESC LIMIT 3) AS T3 WHERE T3.nationality = 'Dutch'",
    "difficulty": "simple"
  },
  {
    "question_id": 971,
    "db_id": "formula_1",
    "question": "Please state the reference name of the oldest German driver.",
    "evidence": "oldest refers to MIN(year(dob)); reference names appear in drverRef.",
    "SQL": "SELECT driverRef FROM drivers WHERE nationality = 'German' ORDER BY JULIANDAY(dob) ASC LIMIT 1",
    "difficulty": "simple"
  },
  {
    "question_id": 972,
    "db_id": "formula_1",
    "question": "Which drivers who were born in 1971 and has the fastest lap time on the race? Give id and code of these drivers.",
    "evidence": "born in 1971 refers to year(dob) = 1971; has the fastest lap time refers to fastestLapTime has values",
    "SQL": "SELECT T2.driverId, T2.code FROM results AS T1 INNER JOIN drivers AS T2 on T1.driverId = T2.driverId WHERE STRFTIME('%Y', T2.dob) = '1971' AND T1.fastestLapTime IS NOT NULL",
    "difficulty": "moderate"
  },
  {
    "question_id": 977,
    "db_id": "formula_1",
    "question": "From race no. 50 to 100, how many finishers have been disqualified?",
    "evidence": "disqualified refers to statusID = 2, finisher refers to time! = null; race no. refers to raceId; raceId > 50 and raceId < 100;",
    "SQL": "SELECT SUM(IIF(time IS NOT NULL, 1, 0)) FROM results WHERE statusId = 2 AND raceID < 100 AND raceId > 50",
    "difficulty": "simple"
  },
  {
    "question_id": 978,
    "db_id": "formula_1",
    "question": "How many times the circuits were held in Austria? Please give their location and coordinates.",
    "evidence": "location coordinates refers to (lat,lng); Austria refers to country = 'Austria';",
    "SQL": "SELECT DISTINCT location, lat, lng FROM circuits WHERE country = 'Austria'",
    "difficulty": "simple"
  },
  {
    "question_id": 981,
    "db_id": "formula_1",
    "question": "On what year did the youngest driver had his first qualifying race? Also state the name, date and time of the race.",
    "evidence": "date of birth refers to drivers.dob; The larger the birthday value, the younger the person is, and vice versa; first qualifying race refers to MIN(races.date);",
    "SQL": "SELECT T3.year, T3.name, T3.date, T3.time FROM qualifying AS T1 INNER JOIN drivers AS T2 on T1.driverId = T2.driverId INNER JOIN races AS T3 on T1.raceId = T3.raceId WHERE T1.driverId = ( SELECT driverId FROM drivers ORDER BY dob DESC LIMIT 1 ) ORDER BY T3.date ASC LIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 988,
    "db_id": "formula_1",
    "question": "List down top 3 German drivers who has the shortest average pit stop duration and were born between 1980-1985.",
    "evidence": "Full name of the driver refers to drivers.forename and drivers.surname; born between 1980-1985 refers to 1980<= year(dob) <=1985; Average pitstop duration refers to Divide(SUM(duration),COUNT(duration)); shortest average refers to Min(avg(duration));",
    "SQL": "SELECT T2.forename, T2.surname FROM pitStops AS T1 INNER JOIN drivers AS T2 on T1.driverId = T2.driverId WHERE T2.nationality = 'German' AND STRFTIME('%Y', T2.dob) BETWEEN '1980' AND '1985' GROUP BY T2.forename, T2.surname ORDER BY AVG(T1.duration) LIMIT 3",
    "difficulty": "challenging"
  },
  {
    "question_id": 989,
    "db_id": "formula_1",
    "question": "Who is the champion of the Canadian Grand Prix in 2008? Indicate his finish time.",
    "evidence": "Only the time of the champion shows in the format of \"hour: minutes: seconds.millionsecond\";",
    "SQL": "SELECT T1.time FROM results AS T1 INNER JOIN races AS T2 ON T1.raceId = T2.raceId WHERE T2.name = 'Canadian Grand Prix' AND T2.year = 2008 AND T1.time LIKE '_:%:__.___'",
    "difficulty": "moderate"
  },
  {
    "question_id": 990,
    "db_id": "formula_1",
    "question": "What is the constructor reference name of the champion in the 2009 Singapore Grand Prix? Please give its website.",
    "evidence": "the time of the champion shows in the format of \"minutes: seconds.millionsecond\" in which Max(time); constructor reference name refers to constructorRef; website refers to url",
    "SQL": "SELECT T3.constructorRef, T3.url FROM results AS T1 INNER JOIN races AS T2 on T1.raceId = T2.raceId INNER JOIN constructors AS T3 on T1.constructorId = T3.constructorId WHERE T2.name = 'Singapore Grand Prix' AND T2.year = 2009 AND T1.time LIKE '_:%:__.___'",
    "difficulty": "challenging"
  },
  {
    "question_id": 717,
    "db_id": "superhero",
    "question": "Please list all the superpowers of 3-D Man.",
    "evidence": "3-D Man refers to superhero_name = '3-D Man'; superpowers refers to power_name",
    "SQL": "SELECT T3.power_name FROM superhero AS T1 INNER JOIN hero_power AS T2 ON T1.id = T2.hero_id INNER JOIN superpower AS T3 ON T2.power_id = T3.id WHERE T1.superhero_name = '3-D Man'",
    "difficulty": "simple"
  },
  {
    "question_id": 994,
    "db_id": "formula_1",
    "question": "Which constructor scored most points from Monaco Grand Prix between 1980 and 2010? List the score, name and nationality of this team.",
    "evidence": "Monaco Grand Priz refers to the race; race in year between 1980 and 2010",
    "SQL": "SELECT SUM(T1.points), T2.name, T2.nationality FROM constructorResults AS T1 INNER JOIN constructors AS T2 ON T1.constructorId = T2.constructorId INNER JOIN races AS T3 ON T3.raceid = T1.raceid WHERE T3.name = 'Monaco Grand Prix' AND T3.year BETWEEN 1980 AND 2010 GROUP BY T2.name ORDER BY SUM(T1.points) DESC LIMIT 1",
    "difficulty": "challenging"
  },
  {
    "question_id": 1001,
    "db_id": "formula_1",
    "question": "What is full name of the racer who ranked 1st in the 3rd qualifying race held in the Marina Bay Street Circuit in 2008?",
    "evidence": "Ranked 1st in the 3rd qualifying race refer to MIN(q3); 2008 is the year of race; full name of racer = forename, surname",
    "SQL": "SELECT T2.forename, T2.surname FROM qualifying AS T1 INNER JOIN drivers AS T2 on T1.driverId = T2.driverId INNER JOIN races AS T3 ON T1.raceid = T3.raceid WHERE q3 IS NOT NULL AND T3.year = 2008 AND T3.circuitId IN ( SELECT circuitId FROM circuits WHERE name = 'Marina Bay Street Circuit' ) ORDER BY CAST(SUBSTR(q3, 1, INSTR(q3, ':') - 1) AS INTEGER) * 60 + CAST(SUBSTR(q3, INSTR(q3, ':') + 1, INSTR(q3, '.') - INSTR(q3, ':') - 1) AS REAL) + CAST(SUBSTR(q3, INSTR(q3, '.') + 1) AS REAL) / 1000 ASC LIMIT 1",
    "difficulty": "challenging"
  },
  {
    "question_id": 1002,
    "db_id": "formula_1",
    "question": "As of the present, what is the full name of the youngest racer? Indicate her nationality and the name of the race to which he/she first joined.",
    "evidence": "full name refers to forename+surname; Youngest racer = MAX(dob)",
    "SQL": "SELECT T1.forename, T1.surname, T1.nationality, T3.name FROM drivers AS T1 INNER JOIN driverStandings AS T2 on T1.driverId = T2.driverId INNER JOIN races AS T3 on T2.raceId = T3.raceId ORDER BY JULIANDAY(T1.dob) DESC LIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 1003,
    "db_id": "formula_1",
    "question": "How many accidents did the driver who had the highest number accidents in the Canadian Grand Prix have?",
    "evidence": "number of accidents refers to the number where statusid = 3; Canadian Grand Prix refers to the race of name\n",
    "SQL": "SELECT COUNT(T1.driverId) FROM results AS T1 INNER JOIN races AS T2 on T1.raceId = T2.raceId INNER JOIN status AS T3 on T1.statusId = T3.statusId WHERE T3.statusId = 3 AND T2.name = 'Canadian Grand Prix' GROUP BY T1.driverId ORDER BY COUNT(T1.driverId) DESC LIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 1011,
    "db_id": "formula_1",
    "question": "Which top 20 driver created the shortest lap time ever record in a Formula_1 race? Please give them full names.",
    "evidence": "shortest lap time refers to MIN(time); the time format for the shortest lap time is 'MM:SS.mmm' or 'M:SS.mmm'; full name of the driver refers to forename, surname",
    "SQL": "WITH lap_times_in_seconds AS (SELECT driverId, (CASE WHEN SUBSTR(time, 1, INSTR(time, ':') - 1) <> '' THEN CAST(SUBSTR(time, 1, INSTR(time, ':') - 1) AS REAL) * 60 ELSE 0 END + CASE WHEN SUBSTR(time, INSTR(time, ':') + 1, INSTR(time, '.') - INSTR(time, ':') - 1) <> '' THEN CAST(SUBSTR(time, INSTR(time, ':') + 1, INSTR(time, '.') - INSTR(time, ':') - 1) AS REAL) ELSE 0 END + CASE WHEN SUBSTR(time, INSTR(time, '.') + 1) <> '' THEN CAST(SUBSTR(time, INSTR(time, '.') + 1) AS REAL) / 1000 ELSE 0 END) AS time_in_seconds FROM lapTimes) SELECT T2.forename, T2.surname, T1.driverId FROM (SELECT driverId, MIN(time_in_seconds) AS min_time_in_seconds FROM lap_times_in_seconds GROUP BY driverId) AS T1 INNER JOIN drivers AS T2 ON T1.driverId = T2.driverId ORDER BY T1.min_time_in_seconds ASC LIMIT 20",
    "difficulty": "challenging"
  },
  {
    "question_id": 1014,
    "db_id": "formula_1",
    "question": "Please list the lap records for the circuits in Italy.",
    "evidence": "lap record means the fastest time recorded which refers to time",
    "SQL": "WITH fastest_lap_times AS (SELECT T1.raceId, T1.FastestLapTime, (CAST(SUBSTR(T1.FastestLapTime, 1, INSTR(T1.FastestLapTime, ':') - 1) AS REAL) * 60) + (CAST(SUBSTR(T1.FastestLapTime, INSTR(T1.FastestLapTime, ':') + 1, INSTR(T1.FastestLapTime, '.') - INSTR(T1.FastestLapTime, ':') - 1) AS REAL)) + (CAST(SUBSTR(T1.FastestLapTime, INSTR(T1.FastestLapTime, '.') + 1) AS REAL) / 1000) as time_in_seconds FROM results AS T1 WHERE T1.FastestLapTime IS NOT NULL ) SELECT T1.FastestLapTime as lap_record FROM results AS T1 INNER JOIN races AS T2 on T1.raceId = T2.raceId INNER JOIN circuits AS T3 on T2.circuitId = T3.circuitId INNER JOIN (SELECT MIN(fastest_lap_times.time_in_seconds) as min_time_in_seconds FROM fastest_lap_times INNER JOIN races AS T2 on fastest_lap_times.raceId = T2.raceId INNER JOIN circuits AS T3 on T2.circuitId = T3.circuitId WHERE T3.country = 'Italy' ) AS T4 ON (CAST(SUBSTR(T1.FastestLapTime, 1, INSTR(T1.FastestLapTime, ':') - 1) AS REAL) * 60) + (CAST(SUBSTR(T1.FastestLapTime, INSTR(T1.FastestLapTime, ':') + 1, INSTR(T1.FastestLapTime, '.') - INSTR(T1.FastestLapTime, ':') - 1) AS REAL)) + (CAST(SUBSTR(T1.FastestLapTime, INSTR(T1.FastestLapTime, '.') + 1) AS REAL) / 1000) = T4.min_time_in_seconds LIMIT 1",
    "difficulty": "challenging"
  },
  {
    "question_id": 719,
    "db_id": "superhero",
    "question": "Among the superheroes with the super power of \"Super Strength\", how many of them have a height of over 200cm?",
    "evidence": "super power of \"Super Strength\" refers to power_name = 'Super Strength'; a height of over 200cm refers to height_cm > 200",
    "SQL": "SELECT COUNT(T1.id) FROM superhero AS T1 INNER JOIN hero_power AS T2 ON T1.id = T2.hero_id INNER JOIN superpower AS T3 ON T2.power_id = T3.id WHERE T3.power_name = 'Super Strength' AND T1.height_cm > 200",
    "difficulty": "moderate"
  },
  {
    "question_id": 723,
    "db_id": "superhero",
    "question": "Among the superheroes with blue eyes, how many of them have the super power of \"Agility\"?",
    "evidence": "blue eyes refers to colour = 'Blue' and eye_colour_id = colour.id; super power of \"Agility\" refers to power_name = 'Agility'",
    "SQL": "SELECT COUNT(T1.id) FROM superhero AS T1 INNER JOIN hero_power AS T2 ON T1.id = T2.hero_id INNER JOIN superpower AS T3 ON T2.power_id = T3.id INNER JOIN colour AS T4 ON T1.eye_colour_id = T4.id WHERE T3.power_name = 'Agility' AND T4.colour = 'Blue'",
    "difficulty": "moderate"
  },
  {
    "question_id": 724,
    "db_id": "superhero",
    "question": "Please list the superhero names of all the superheroes that have blue eyes and blond hair.",
    "evidence": "blue eyes refers to colour = 'Blue' and eye_colour_id = colour.id; blond hair refers to colour = 'Blond' and hair_colour_id = colour.id; super power of \"Agility\" refers to power_name = 'Agility'",
    "SQL": "SELECT T1.superhero_name FROM superhero AS T1 INNER JOIN colour AS T2 ON T1.eye_colour_id = T2.id INNER JOIN colour AS T3 ON T1.hair_colour_id = T3.id WHERE T2.colour = 'Blue' AND T3.colour = 'Blond'",
    "difficulty": "challenging"
  },
  {
    "question_id": 726,
    "db_id": "superhero",
    "question": "Rank heroes published by Marvel Comics by their height in descending order.",
    "evidence": "name refers to superhero_name; the tallest hero refers to MAX(height_cm); published by Marvel Comics refers to publisher_name = 'Marvel Comics'",
    "SQL": "SELECT superhero_name, height_cm, RANK() OVER (ORDER BY height_cm DESC) AS HeightRank FROM superhero INNER JOIN publisher ON superhero.publisher_id = publisher.id WHERE publisher.publisher_name = 'Marvel Comics'",
    "difficulty": "moderate"
  },
  {
    "question_id": 728,
    "db_id": "superhero",
    "question": "Rank superheroes from Marvel Comics by their eye color popularity, starting with the most common color.",
    "evidence": "the superheroes from Marvel Comics refers to publisher_name = 'Marvel Comics'; most common color refers to COUNT(superhero.id) DESC;",
    "SQL": "SELECT colour.colour AS EyeColor, COUNT(superhero.id) AS Count, RANK() OVER (ORDER BY COUNT(superhero.id) DESC) AS PopularityRank FROM superhero INNER JOIN colour ON superhero.eye_colour_id = colour.id INNER JOIN publisher ON superhero.publisher_id = publisher.id WHERE publisher.publisher_name = 'Marvel Comics' GROUP BY colour.colour",
    "difficulty": "moderate"
  },
  {
    "question_id": 730,
    "db_id": "superhero",
    "question": "List the superheroes from Marvel Comics who have the super power of 'Super Strength'.",
    "evidence": "the superheroes from Marvel Comics refers to publisher_name = 'Marvel Comics'; super power of \"Super Strength\" refers to power_name = 'Super Strength';",
    "SQL": "SELECT superhero_name FROM superhero AS T1 WHERE EXISTS (SELECT 1 FROM hero_power AS T2 INNER JOIN superpower AS T3 ON T2.power_id = T3.id WHERE T3.power_name = 'Super Strength' AND T1.id = T2.hero_id)AND EXISTS (SELECT 1 FROM publisher AS T4 WHERE T4.publisher_name = 'Marvel Comics' AND T1.publisher_id = T4.id)",
    "difficulty": "challenging"
  },
  {
    "question_id": 732,
    "db_id": "superhero",
    "question": "Which publisher published the slowest superhero?",
    "evidence": "the slowest superhero refers to attribute_name = 'Speed' where MIN(attribute_value); publisher refers to publisher_name",
    "SQL": "SELECT T2.publisher_name FROM superhero AS T1 INNER JOIN publisher AS T2 ON T1.publisher_id = T2.id INNER JOIN hero_attribute AS T3 ON T1.id = T3.hero_id INNER JOIN attribute AS T4 ON T3.attribute_id = T4.id WHERE T4.attribute_name = 'Speed' ORDER BY T3.attribute_value LIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 733,
    "db_id": "superhero",
    "question": "How many gold-eyed superheroes did Marvel Comics publish?",
    "evidence": "gold-eyed refers to colour = 'Gold' where eye_colour_id = colour.id; superheroes that Marvel Comics published refers to publisher_name = 'Marvel Comics'",
    "SQL": "SELECT COUNT(T1.id) FROM superhero AS T1 INNER JOIN publisher AS T2 ON T1.publisher_id = T2.id INNER JOIN colour AS T3 ON T1.eye_colour_id = T3.id WHERE T2.publisher_name = 'Marvel Comics' AND T3.colour = 'Gold'",
    "difficulty": "moderate"
  },
  {
    "question_id": 736,
    "db_id": "superhero",
    "question": "Who is the dumbest superhero?",
    "evidence": "the dumbest superhero refers to MIN(attribute_value) where attribute_name = 'Intelligence'",
    "SQL": "SELECT T1.superhero_name FROM superhero AS T1 INNER JOIN hero_attribute AS T2 ON T1.id = T2.hero_id INNER JOIN attribute AS T3 ON T2.attribute_id = T3.id WHERE T3.attribute_name = 'Intelligence' ORDER BY T2.attribute_value LIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 737,
    "db_id": "superhero",
    "question": "What is Copycat's race?",
    "evidence": "Copycat is the superhero_name;",
    "SQL": "SELECT T2.race FROM superhero AS T1 INNER JOIN race AS T2 ON T1.race_id = T2.id WHERE T1.superhero_name = 'Copycat'",
    "difficulty": "simple"
  },
  {
    "question_id": 738,
    "db_id": "superhero",
    "question": "Which superheroes have a durability attribute value of less than 50?",
    "evidence": "durability of less than 50 refers to attribute_name = 'Durability' AND attribute_value < 50",
    "SQL": "SELECT superhero_name FROM superhero AS T1 WHERE EXISTS (SELECT 1 FROM hero_attribute AS T2 INNER JOIN attribute AS T3 ON T2.attribute_id = T3.id WHERE T3.attribute_name = 'Durability' AND T2.attribute_value < 50 AND T1.id = T2.hero_id)",
    "difficulty": "simple"
  },
  {
    "question_id": 739,
    "db_id": "superhero",
    "question": "What are the names of the superheroes with the power of death touch?",
    "evidence": "name of superheroes refers to refers to superhero_name; the power of death touch refers to power_name = 'Death Touch'",
    "SQL": "SELECT T1.superhero_name FROM superhero AS T1 INNER JOIN hero_power AS T2 ON T1.id = T2.hero_id INNER JOIN superpower AS T3 ON T2.power_id = T3.id WHERE T3.power_name = 'Death Touch'",
    "difficulty": "moderate"
  },
  {
    "question_id": 740,
    "db_id": "superhero",
    "question": "How many female superheroes have a strength value of 100?",
    "evidence": "female refers to gender = 'Female'; strength value of 100 refers to attribute_name = 'Strength' AND attribute_value = 100",
    "SQL": "SELECT COUNT(T1.id) FROM superhero AS T1 INNER JOIN hero_attribute AS T2 ON T1.id = T2.hero_id INNER JOIN attribute AS T3 ON T2.attribute_id = T3.id INNER JOIN gender AS T4 ON T1.gender_id = T4.id WHERE T3.attribute_name = 'Strength' AND T2.attribute_value = 100 AND T4.gender = 'Female'",
    "difficulty": "moderate"
  },
  {
    "question_id": 743,
    "db_id": "superhero",
    "question": "What is the percentage of superheroes who act in their own self-interest or make decisions based on their own moral code? Indicate how many of the said superheroes were published by Marvel Comics.",
    "evidence": "published by Marvel Comics refers to publisher_name = 'Marvel Comics'; superheroes who act in their own self-interest or make decisions based on their own moral code refers to alignment = 'Bad'; calculation = MULTIPLY(DIVIDE(SUM(alignment = 'Bad); count(id)), 100)",
    "SQL": "SELECT (CAST(COUNT(*) AS REAL) * 100 / (SELECT COUNT(*) FROM superhero)), CAST(SUM(CASE WHEN T2.publisher_name = 'Marvel Comics' THEN 1 ELSE 0 END) AS REAL) FROM superhero AS T1 INNER JOIN publisher AS T2 ON T1.publisher_id = T2.id INNER JOIN alignment AS T3 ON T3.id = T1.alignment_id WHERE T3.alignment = 'Bad'",
    "difficulty": "challenging"
  },
  {
    "question_id": 744,
    "db_id": "superhero",
    "question": "Between DC and Marvel Comics, which publisher has published more superheroes? Find the difference in the number of superheroes they have published.",
    "evidence": "DC refers to publisher_name = 'DC Comics'; Marvel Comics refers to publisher_name = 'Marvel Comics'; calculation = SUBTRACT(SUM(publisher_name = 'Marvel Comics'), SUM(publisher_name = 'DC Comics'))",
    "SQL": "SELECT SUM(CASE WHEN T2.publisher_name = 'Marvel Comics' THEN 1 ELSE 0 END) - SUM(CASE WHEN T2.publisher_name = 'DC Comics' THEN 1 ELSE 0 END) FROM superhero AS T1 INNER JOIN publisher AS T2 ON T1.publisher_id = T2.id",
    "difficulty": "challenging"
  },
  {
    "question_id": 745,
    "db_id": "superhero",
    "question": "Give the publisher ID of Star Trek.",
    "evidence": "Star Trek is the publisher_name;",
    "SQL": "SELECT id FROM publisher WHERE publisher_name = 'Star Trek'",
    "difficulty": "simple"
  },
  {
    "question_id": 747,
    "db_id": "superhero",
    "question": "What is the total number of superheroes without full name?",
    "evidence": "superheroes without full name refers to full_name IS NULL",
    "SQL": "SELECT COUNT(id) FROM superhero WHERE full_name IS NULL",
    "difficulty": "simple"
  },
  {
    "question_id": 750,
    "db_id": "superhero",
    "question": "What is the average weight of all female superheroes?",
    "evidence": "female refers to gender = 'Female'; average weight refers to AVG(weight_kg)",
    "SQL": "SELECT AVG(T1.weight_kg) FROM superhero AS T1 INNER JOIN gender AS T2 ON T1.gender_id = T2.id WHERE T2.gender = 'Female'",
    "difficulty": "simple"
  },
  {
    "question_id": 751,
    "db_id": "superhero",
    "question": "List down at least five superpowers of male superheroes.",
    "evidence": "male refers to gender = 'Male'; superpowers refers to power_name;",
    "SQL": "SELECT T3.power_name FROM superhero AS T1 INNER JOIN hero_power AS T2 ON T1.id = T2.hero_id INNER JOIN superpower AS T3 ON T3.id = T2.power_id INNER JOIN gender AS T4 ON T4.id = T1.gender_id WHERE T4.gender = 'Male' LIMIT 5",
    "difficulty": "moderate"
  },
  {
    "question_id": 753,
    "db_id": "superhero",
    "question": "Among the superheroes with height from 170 to 190, list the names of the superheroes with no eye color.",
    "evidence": "height from 170 to 190 refers to height_cm BETWEEN 170 AND 190; no eye color refers to colour = 'No Colour'",
    "SQL": "SELECT DISTINCT T1.superhero_name FROM superhero AS T1 INNER JOIN colour AS T2 ON T1.eye_colour_id = T2.id WHERE T1.height_cm BETWEEN 170 AND 190 AND T2.colour = 'No Colour'",
    "difficulty": "moderate"
  },
  {
    "question_id": 758,
    "db_id": "superhero",
    "question": "Provide the hair colour of the human superhero who is 185 cm tall.",
    "evidence": "185 cm tall refers to height_cm = 185; human superhero refers to race = 'human'; hair colour refers to colour where hair_colour_id = colour.id;",
    "SQL": "SELECT DISTINCT T3.colour FROM superhero AS T1 INNER JOIN race AS T2 ON T1.race_id = T2.id INNER JOIN colour AS T3 ON T1.hair_colour_id = T3.id WHERE T1.height_cm = 185 AND T2.race = 'Human'",
    "difficulty": "moderate"
  },
  {
    "question_id": 760,
    "db_id": "superhero",
    "question": "In superheroes with height between 150 to 180, what is the percentage of heroes published by Marvel Comics?",
    "evidence": "height between 150 to 180 refers to height_cm BETWEEN 150 AND 180; heroes published by Marvel Comics refers to publisher_name = 'Marvel Comics'; calculation = MULTIPLY(DIVIDE(SUM(publisher.id = 13)), COUNT(publisher.id), 100)",
    "SQL": "SELECT CAST(COUNT(CASE WHEN T2.publisher_name = 'Marvel Comics' THEN 1 ELSE NULL END) AS REAL) * 100 / COUNT(T1.id) FROM superhero AS T1 INNER JOIN publisher AS T2 ON T1.publisher_id = T2.id WHERE T1.height_cm BETWEEN 150 AND 180",
    "difficulty": "challenging"
  },
  {
    "question_id": 761,
    "db_id": "superhero",
    "question": "Among the male superheroes, list the super hero names of superheroes with weight greater than the 79% average weight of all superheroes.",
    "evidence": "super hero names refers to superhero_name;male superheros refers to gender = 'Male';Calculation = weight_kg > MULTIPLY(AVG(weight_kg), 0.79)",
    "SQL": "SELECT T1.superhero_name FROM superhero AS T1 INNER JOIN gender AS T2 ON T1.gender_id = T2.id WHERE T2.gender = 'Male' AND T1.weight_kg * 100 > ( SELECT AVG(weight_kg) FROM superhero ) * 79",
    "difficulty": "moderate"
  },
  {
    "question_id": 764,
    "db_id": "superhero",
    "question": "What are the superpowers of heroes with ID 1?",
    "evidence": "superpowers refers to power_name; heroes with ID 1 refers to hero_id = 1;",
    "SQL": "SELECT DISTINCT T2.power_name FROM hero_power AS T1 INNER JOIN superpower AS T2 ON T1.power_id = T2.id WHERE T1.hero_id = 1",
    "difficulty": "simple"
  },
  {
    "question_id": 765,
    "db_id": "superhero",
    "question": "How many heroes have stealth power?",
    "evidence": "stealth power refers to power_name = 'Stealth';",
    "SQL": "SELECT COUNT(T1.hero_id) FROM hero_power AS T1 INNER JOIN superpower AS T2 ON T1.power_id = T2.id WHERE T2.power_name = 'Stealth'",
    "difficulty": "simple"
  },
  {
    "question_id": 766,
    "db_id": "superhero",
    "question": "What is the hero's full name with the highest attribute in strength?",
    "evidence": "highest attribute in strength refers to MAX(attribute_value) WHERE attribute_name = 'strength';",
    "SQL": "SELECT T1.full_name FROM superhero AS T1 INNER JOIN hero_attribute AS T2 ON T1.id = T2.hero_id INNER JOIN attribute AS T3 ON T2.attribute_id = T3.id WHERE T3.attribute_name = 'Strength' ORDER BY T2.attribute_value DESC LIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 769,
    "db_id": "superhero",
    "question": "Which superhero has the most durability published by Dark Horse Comics?",
    "evidence": "which superhero refers to superhero_name; most durability refers to MAX(attribute_value) WHERE attribute_name = 'durability'; published by Dark Horse Comics refers to publisher_name = 'Dark Horse Comics';",
    "SQL": "SELECT T1.superhero_name FROM superhero AS T1 INNER JOIN hero_attribute AS T2 ON T1.id = T2.hero_id INNER JOIN attribute AS T3 ON T3.id = T2.attribute_id INNER JOIN publisher AS T4 ON T4.id = T1.publisher_id WHERE T4.publisher_name = 'Dark Horse Comics' AND T3.attribute_name = 'Durability' ORDER BY T2.attribute_value DESC LIMIT 1",
    "difficulty": "challenging"
  },
  {
    "question_id": 772,
    "db_id": "superhero",
    "question": "List the eyes, hair and skin colour of all female superheroes published by Dark Horse Comics.",
    "evidence": "eyes refers to eye_colour_id; hair refers to hair_colour_id; skin colour refers to skin_colour_id; female superheroes refers to gender = 'Female'; published by Dark Horse Comics refers to publisher_name = 'Dark Horse Comics';",
    "SQL": "SELECT T1.eye_colour_id, T1.hair_colour_id, T1.skin_colour_id FROM superhero AS T1 INNER JOIN publisher AS T2 ON T2.id = T1.publisher_id INNER JOIN gender AS T3 ON T3.id = T1.gender_id WHERE T2.publisher_name = 'Dark Horse Comics' AND T3.gender = 'Female'",
    "difficulty": "challenging"
  },
  {
    "question_id": 773,
    "db_id": "superhero",
    "question": "Which superhero has the same eyes, hair and skin colour? Indicate the publisher of the superhero.",
    "evidence": "which superhero refers to superhero_name; the same eyes, hair and skin colour refers to hair_colour_id = skin_colour_id AND hair_colour_id = eye_colour_id; publisher refers to publisher_name;",
    "SQL": "SELECT T1.superhero_name, T2.publisher_name FROM superhero AS T1 INNER JOIN publisher AS T2 ON T1.publisher_id = T2.id WHERE T1.eye_colour_id = T1.hair_colour_id AND T1.eye_colour_id = T1.skin_colour_id",
    "difficulty": "challenging"
  },
  {
    "question_id": 775,
    "db_id": "superhero",
    "question": "What is the percentage of blue female superheroes among all female superheroes?",
    "evidence": "percentage = MULTIPLY(DIVIDE(SUM(colour = 'Blue' WHERE gender = 'Female'), COUNT(gender = 'Female')), 100); blue refers to the color = 'Blue' WHERE skin_colour_id = colour.id; female refers to gender = 'Female';",
    "SQL": "SELECT CAST(COUNT(CASE WHEN T3.colour = 'Blue' THEN T1.id ELSE NULL END) AS REAL) * 100 / COUNT(T1.id) FROM superhero AS T1 INNER JOIN gender AS T2 ON T1.gender_id = T2.id INNER JOIN colour AS T3 ON T1.skin_colour_id = T3.id WHERE T2.gender = 'Female'",
    "difficulty": "challenging"
  },
  {
    "question_id": 779,
    "db_id": "superhero",
    "question": "How many powers does Amazo hero have?",
    "evidence": "Amazo hero refers to superhero_name = 'Amazo';",
    "SQL": "SELECT COUNT(T1.power_id) FROM hero_power AS T1 INNER JOIN superhero AS T2 ON T1.hero_id = T2.id WHERE T2.superhero_name = 'Amazo'",
    "difficulty": "simple"
  },
  {
    "question_id": 781,
    "db_id": "superhero",
    "question": "Provide the heights of the heroes whose eye colours are amber.",
    "evidence": "heights of the heroes refers to height_cm; eye colours are amber refers to colour.colour = 'Amber' WHERE eye_colour_id = colour.id;",
    "SQL": "SELECT T1.height_cm FROM superhero AS T1 INNER JOIN colour AS T2 ON T1.eye_colour_id = T2.id WHERE T2.colour = 'Amber'",
    "difficulty": "simple"
  },
  {
    "question_id": 782,
    "db_id": "superhero",
    "question": "List the heroes' names whose eyes and hair colours are both black.",
    "evidence": "heroes' names refers to superhero_name; eyes and hair colours are both black refers to eye_colour_id AND hair_colour_id WHERE colour.colour = 'Black';",
    "SQL": "SELECT T1.superhero_name FROM superhero AS T1 INNER JOIN colour AS T2 ON T1.eye_colour_id = T2.id AND T1.hair_colour_id = T2.id WHERE T2.colour = 'Black'",
    "difficulty": "moderate"
  },
  {
    "question_id": 785,
    "db_id": "superhero",
    "question": "Describe the names of neutral alignment superheroes.",
    "evidence": "names of superheroes refers to superhero_name; neutral alignment refers to alignment = 'Neutral';",
    "SQL": "SELECT T1.superhero_name FROM superhero AS T1 INNER JOIN alignment AS T2 ON T1.alignment_id = T2.id WHERE T2.alignment = 'Neutral'",
    "difficulty": "simple"
  },
  {
    "question_id": 786,
    "db_id": "superhero",
    "question": "How many heroes have the highest attribute value in strength?",
    "evidence": "highest attribute value in strength refers to MAX(attribute_value) WHERE attribute_name = 'Strength';",
    "SQL": "SELECT COUNT(T1.hero_id) FROM hero_attribute AS T1 INNER JOIN attribute AS T2 ON T1.attribute_id = T2.id WHERE T2.attribute_name = 'Strength' AND T1.attribute_value = ( SELECT MAX(attribute_value) FROM hero_attribute )",
    "difficulty": "moderate"
  },
  {
    "question_id": 788,
    "db_id": "superhero",
    "question": "How many percent of female heroes were published by Marvel Comics?",
    "evidence": "percent = MULTIPLY(DIVIDE(SUM(gender = 'Female' WHERE publisher_name = 'Marvel Comics'), COUNT(publisher_name = 'Marvel Comics')), 100); female heroes refers to gender = 'Female'; Marvel Comics refers to publisher_name = 'Marvel Comics';",
    "SQL": "SELECT CAST(COUNT(CASE WHEN T3.gender = 'Female' AND T2.publisher_name = 'Marvel Comics' THEN 1 ELSE NULL END) AS REAL) / COUNT(CASE WHEN T2.publisher_name = 'Marvel Comics' THEN 1 ELSE NULL END) * 100 FROM superhero AS T1 INNER JOIN publisher AS T2 ON T1.publisher_id = T2.id INNER JOIN gender AS T3 ON T1.gender_id = T3.id",
    "difficulty": "challenging"
  },
  {
    "question_id": 790,
    "db_id": "superhero",
    "question": "Calculate the difference between Emil Blonsky's weight and Charles Chandler's weight.",
    "evidence": "difference = SUBTRACT(SUM(weight_kg WHERE full_name = 'Emil Blonsky'), SUM(weight_kg WHERE full_name = 'Charles Chandler')); Emil Blonsky is the full name of superhero; Charles Chandler is the full name of superhero;",
    "SQL": "SELECT ( SELECT weight_kg FROM superhero WHERE full_name LIKE 'Emil Blonsky' ) - ( SELECT weight_kg FROM superhero WHERE full_name LIKE 'Charles Chandler' ) AS CALCULATE",
    "difficulty": "moderate"
  },
  {
    "question_id": 791,
    "db_id": "superhero",
    "question": "Calculate the average height for all superhero.",
    "evidence": "average = DIVIDE(SUM(height_cm), COUNT(all heros));",
    "SQL": "SELECT CAST(SUM(height_cm) AS REAL) / COUNT(id) FROM superhero",
    "difficulty": "simple"
  },
  {
    "question_id": 792,
    "db_id": "superhero",
    "question": "What is Abomination's superpower?",
    "evidence": "Abomination refers to superhero_name = 'Abomination'; superpower refers to power_name;",
    "SQL": "SELECT T3.power_name FROM superhero AS T1 INNER JOIN hero_power AS T2 ON T1.id = T2.hero_id INNER JOIN superpower AS T3 ON T2.power_id = T3.id WHERE T1.superhero_name = 'Abomination'",
    "difficulty": "simple"
  },
  {
    "question_id": 794,
    "db_id": "superhero",
    "question": "Which hero was the fastest?",
    "evidence": "which hero refers to superhero_name; fastest refers to MAX(attribute_value) WHERE attribute_name = 'Speed';",
    "SQL": "SELECT T1.superhero_name FROM superhero AS T1 INNER JOIN hero_attribute AS T2 ON T1.id = T2.hero_id INNER JOIN attribute AS T3 ON T2.attribute_id = T3.id WHERE T3.attribute_name = 'Speed' ORDER BY T2.attribute_value DESC LIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 796,
    "db_id": "superhero",
    "question": "State all of 3-D Man's attributes along with their values.",
    "evidence": "3-D Man is the superhero_name. attributes refers to attribute_name; values refers to attribute_value;",
    "SQL": "SELECT T3.attribute_name, T2.attribute_value FROM superhero AS T1 INNER JOIN hero_attribute AS T2 ON T1.id = T2.hero_id INNER JOIN attribute AS T3 ON T2.attribute_id = T3.id WHERE T1.superhero_name = '3-D Man'",
    "difficulty": "moderate"
  },
  {
    "question_id": 797,
    "db_id": "superhero",
    "question": "Which superheroes have blue eyes with brown hair?",
    "evidence": "which superheroes refers to superhero_name; blue eyes refers to color = 'Blue' and color.id = eye_colour_id; brown hair refers to color = 'Brown' and color.id = hair_colour_id;",
    "SQL": "SELECT T1.superhero_name FROM superhero AS T1 INNER JOIN colour AS T2 ON T1.eye_colour_id = T2.id INNER JOIN colour AS T3 ON T1.hair_colour_id = T3.id WHERE T2.colour = 'Blue' AND T3.colour = 'Brown'",
    "difficulty": "moderate"
  },
  {
    "question_id": 798,
    "db_id": "superhero",
    "question": "What is the publisher for Hawkman, Karate Kid and Speedy?",
    "evidence": "publisher refers to publisher_name; Hawkman refers to superhero_name = 'Hawkman'; Karate Kid refers to superhero_name = 'Karate Kid'; Speedy refers to superhero_name = 'Speedy';",
    "SQL": "SELECT T2.publisher_name FROM superhero AS T1 INNER JOIN publisher AS T2 ON T1.publisher_id = T2.id WHERE T1.superhero_name IN ('Hawkman', 'Karate Kid', 'Speedy')",
    "difficulty": "moderate"
  },
  {
    "question_id": 800,
    "db_id": "superhero",
    "question": "Calculate the percentage of superheroes with blue eyes.",
    "evidence": "percentage = MULTIPLY(DIVIDE(SUM(superhero_name WHERE color = 'Blue'), COUNT(superhero_name)), 100.0); blue eyes refers to color = 'Blue' and color.id =  eye_colour_id = 7;",
    "SQL": "SELECT CAST(COUNT(CASE WHEN T2.colour = 'Blue' THEN 1 ELSE NULL END) AS REAL) * 100 / COUNT(T1.id) FROM superhero AS T1 INNER JOIN colour AS T2 ON T1.eye_colour_id = T2.id",
    "difficulty": "moderate"
  },
  {
    "question_id": 801,
    "db_id": "superhero",
    "question": "Find the ratio between male superheroes and female superheroes.",
    "evidence": "ratio = DIVIDE(SUM(gender_id = 1) / SUM(gender_id = 2)); male superheroes refers to gender = 'Female'; female superheroes refers to gender = 'Male';",
    "SQL": "SELECT CAST(COUNT(CASE WHEN T2.gender = 'Male' THEN T1.id ELSE NULL END) AS REAL) / COUNT(CASE WHEN T2.gender = 'Female' THEN T1.id ELSE NULL END) FROM superhero AS T1 INNER JOIN gender AS T2 ON T1.gender_id = T2.id",
    "difficulty": "moderate"
  },
  {
    "question_id": 806,
    "db_id": "superhero",
    "question": "Provide the eye colour of the superhero who has Karen Beecher-Duncan as their full name.",
    "evidence": "eye colour refers to colour.colour where eye_colour_id = colour.id; Karen Beecher-Duncan is the full name of superhero;",
    "SQL": "SELECT T2.colour FROM superhero AS T1 INNER JOIN colour AS T2 ON T1.eye_colour_id = T2.id WHERE T1.full_name = 'Karen Beecher-Duncan'",
    "difficulty": "simple"
  },
  {
    "question_id": 819,
    "db_id": "superhero",
    "question": "In superheroes with missing weight data, calculate the difference between the number of superheroes with blue eyes and no eye color.",
    "evidence": "missing weight data refers to weight_kg = 0 OR T1.weight_kg = NULL; difference = SUBTRACT(SUM(colour.id = 7), SUM(colour.id = 1)); blue eyes refers to eye_colour_id WHERE colour.id = 7; no eye color refers to eye_colour_id WHERE colour.id = 1;",
    "SQL": "SELECT SUM(CASE WHEN T2.id = 7 THEN 1 ELSE 0 END) - SUM(CASE WHEN T2.id = 1 THEN 1 ELSE 0 END) FROM superhero AS T1 INNER JOIN colour AS T2 ON T1.eye_colour_id = T2.id WHERE T1.weight_kg = 0 OR T1.weight_kg is NULL",
    "difficulty": "challenging"
  },
  {
    "question_id": 822,
    "db_id": "superhero",
    "question": "How many green-skinned villains are there in the superhero universe?",
    "evidence": "green-skinned refers to colour.colour = 'Green' WHERE skin_colour_id = colour.id; villains refers to alignment = 'Bad';",
    "SQL": "SELECT COUNT(T1.id) FROM superhero AS T1 INNER JOIN alignment AS T2 ON T1.alignment_id = T2.id INNER JOIN colour AS T3 ON T1.skin_colour_id = T3.id WHERE T2.alignment = 'Bad' AND T3.colour = 'Green'",
    "difficulty": "moderate"
  },
  {
    "question_id": 824,
    "db_id": "superhero",
    "question": "Identify superheroes who can control wind and list their names in alphabetical order.",
    "evidence": "superheroes refers to superhero_name; can control wind refers to power_name = 'Wind Control';",
    "SQL": "SELECT T1.superhero_name FROM superhero AS T1 INNER JOIN hero_power AS T2 ON T1.id = T2.hero_id INNER JOIN superpower AS T3 ON T2.power_id = T3.id WHERE T3.power_name = 'Wind Control' ORDER BY T1.superhero_name",
    "difficulty": "moderate"
  },
  {
    "question_id": 825,
    "db_id": "superhero",
    "question": "Identify the gender of the superhero who has the ability of Phoenix Force.",
    "evidence": "ability of Phoenix Force refers to power_name = 'Phoenix Force';",
    "SQL": "SELECT T4.gender FROM superhero AS T1 INNER JOIN hero_power AS T2 ON T1.id = T2.hero_id INNER JOIN superpower AS T3 ON T2.power_id = T3.id INNER JOIN gender AS T4 ON T1.gender_id = T4.id WHERE T3.power_name = 'Phoenix Force'",
    "difficulty": "moderate"
  },
  {
    "question_id": 829,
    "db_id": "superhero",
    "question": "Which publisher created more superheroes: DC or Marvel Comics? Find the difference in the number of superheroes.",
    "evidence": "DC refers to publisher_name = 'DC Comics'; Marvel Comics refers to publisher_name = 'Marvel Comics'; difference = SUBTRACT(SUM(publisher_name = 'DC Comics'), SUM(publisher_name = 'Marvel Comics'));",
    "SQL": "SELECT SUM(CASE WHEN T2.publisher_name = 'DC Comics' THEN 1 ELSE 0 END) - SUM(CASE WHEN T2.publisher_name = 'Marvel Comics' THEN 1 ELSE 0 END) FROM superhero AS T1 INNER JOIN publisher AS T2 ON T1.publisher_id = T2.id",
    "difficulty": "challenging"
  },
  {
    "question_id": 531,
    "db_id": "codebase_community",
    "question": "Which user has a higher reputation, Harlan or Jarrod Dixon?",
    "evidence": "\"Harlan\" and \"Jarrod Dixon\" are both DisplayName; highest reputation refers to Max(Reputation)",
    "SQL": "SELECT DisplayName FROM users WHERE DisplayName IN ('Harlan', 'Jarrod Dixon') AND Reputation = ( SELECT MAX(Reputation) FROM users WHERE DisplayName IN ('Harlan', 'Jarrod Dixon') )",
    "difficulty": "simple"
  },
  {
    "question_id": 532,
    "db_id": "codebase_community",
    "question": "Please list the display names of all the users whose accounts were created in the year 2011.",
    "evidence": "account created in the year 2011 refers to year(CreationDate) = 2011",
    "SQL": "SELECT DisplayName FROM users WHERE STRFTIME('%Y', CreationDate) = '2011'",
    "difficulty": "simple"
  },
  {
    "question_id": 533,
    "db_id": "codebase_community",
    "question": "How many users last accessed the website after 2014/9/1?",
    "evidence": "last accessed after 2014/9/1 refers to LastAccessDate > '2014-09-01'",
    "SQL": "SELECT COUNT(Id) FROM users WHERE date(LastAccessDate) > '2014-09-01'",
    "difficulty": "simple"
  },
  {
    "question_id": 539,
    "db_id": "codebase_community",
    "question": "Who is the owner of the post \"Eliciting priors from experts\"?",
    "evidence": "\"Eliciting priors from experts\" is the Title of post; owner refers to DisplayName",
    "SQL": "SELECT T2.DisplayName FROM posts AS T1 INNER JOIN users AS T2 ON T1.OwnerUserId = T2.Id WHERE T1.Title = 'Eliciting priors from experts'",
    "difficulty": "simple"
  },
  {
    "question_id": 537,
    "db_id": "codebase_community",
    "question": "How many posts does the user csgillespie own?",
    "evidence": "\"csgillespie\" is the DisplayName of user",
    "SQL": "SELECT COUNT(T1.id) FROM posts AS T1 INNER JOIN users AS T2 ON T1.OwnerUserId = T2.Id WHERE T2.DisplayName = 'csgillespie'",
    "difficulty": "simple"
  },
  {
    "question_id": 544,
    "db_id": "codebase_community",
    "question": "What is the display name of the user who last edited the post \"Examples for teaching: Correlation does not mean causation\"?",
    "evidence": "\"Examples for teaching: Correlation does not mean causation\" is the Title of post; user who last edited refers to LastEditorUserId",
    "SQL": "SELECT T2.DisplayName FROM posts AS T1 INNER JOIN users AS T2 ON T1.LastEditorUserId = T2.Id WHERE T1.Title = 'Examples for teaching: Correlation does not mean causation'",
    "difficulty": "moderate"
  },
  {
    "question_id": 547,
    "db_id": "codebase_community",
    "question": "Among the posts owned by an elder user, how many of them have a score of over 19?",
    "evidence": "elder users refers to Age > 65; Score of over 19 refers to Score > = 20",
    "SQL": "SELECT COUNT(T1.Id) FROM posts AS T1 INNER JOIN users AS T2 ON T1.OwnerUserId = T2.Id WHERE T1.Score >= 20 AND T2.Age > 65",
    "difficulty": "simple"
  },
  {
    "question_id": 549,
    "db_id": "codebase_community",
    "question": "From which post is the tag \"bayesian\" excerpted from? Please give the body of the post.",
    "evidence": "\"bayesian\" is the TagName; excerpt from refers to ExcerptPostId",
    "SQL": "SELECT T2.Body FROM tags AS T1 INNER JOIN posts AS T2 ON T2.Id = T1.ExcerptPostId WHERE T1.TagName = 'bayesian'",
    "difficulty": "simple"
  },
  {
    "question_id": 555,
    "db_id": "codebase_community",
    "question": "What is the average score of the posts owned by the user csgillespie?",
    "evidence": "\"csgillespie\" is the DisplayName of user; average score refers to AVG(Score)",
    "SQL": "SELECT AVG(T1.Score) FROM posts AS T1 INNER JOIN users AS T2 ON T1.OwnerUserId = T2.Id WHERE T2.DisplayName = 'csgillespie'",
    "difficulty": "simple"
  },
  {
    "question_id": 557,
    "db_id": "codebase_community",
    "question": "Among the posts with a score of over 5, what is the percentage of them being owned by an elder user?",
    "evidence": "score of over 5 refers to Score > 5; elder user refers to Age > 65; percentage = Divide (Count(Id where Age>65), Count(Id)) * 100",
    "SQL": "SELECT CAST(SUM(IIF(T2.Age > 65, 1, 0)) AS REAL) * 100 / COUNT(T1.Id) FROM posts AS T1 INNER JOIN users AS T2 ON T1.OwnerUserId = T2.Id WHERE T1.Score > 5",
    "difficulty": "moderate"
  },
  {
    "question_id": 563,
    "db_id": "codebase_community",
    "question": "User No.3025 gave a comment at 20:29:39 on 2014/4/23 to a post, how many favorite counts did that post get?",
    "evidence": "user no. 3025 refers to UserId = '3025'; comment at 20:29:39 on 2014/4/23 refers to CreationDate = '2014/4/23 20:29:39.0'",
    "SQL": "SELECT T1.FavoriteCount FROM posts AS T1 INNER JOIN comments AS T2 ON T1.Id = T2.PostId WHERE T2.CreationDate = '2014-04-23 20:29:39.0' AND T2.UserId = 3025",
    "difficulty": "moderate"
  },
  {
    "question_id": 565,
    "db_id": "codebase_community",
    "question": "User No.23853 gave a comment to a post at 9:08:18 on 2013/7/12, was that post well-finished?",
    "evidence": "user no. 23853 refers to UserId = '23853'; at 9:08:18 on 2013/7/12 refers to CreationDate = '2013-07-12 09:08:18.0'; not well-finished refers to ClosedDate IS NULL and vice versa",
    "SQL": "SELECT IIF(T2.ClosedDate IS NULL, 'NOT well-finished', 'well-finished') AS resylt FROM comments AS T1 INNER JOIN posts AS T2 ON T1.PostId = T2.Id WHERE T1.UserId = 23853 AND T1.CreationDate = '2013-07-12 09:08:18.0'",
    "difficulty": "moderate"
  },
  {
    "question_id": 567,
    "db_id": "codebase_community",
    "question": "For the user with the display name of \"Tiago Pasqualini\", how many posts did he/she own?",
    "evidence": "\"Tiago Pasqualini\" is the DisplayName;",
    "SQL": "SELECT COUNT(T1.Id) FROM users AS T1 INNER JOIN posts AS T2 ON T1.Id = T2.OwnerUserId WHERE T1.DisplayName = 'Tiago Pasqualini'",
    "difficulty": "simple"
  },
  {
    "question_id": 568,
    "db_id": "codebase_community",
    "question": "Provide the display name of the user who made the vote No.6347.",
    "evidence": "vote no. 6347 refers to Id = '6347'",
    "SQL": "SELECT T1.DisplayName FROM users AS T1 INNER JOIN votes AS T2 ON T1.Id = T2.UserId WHERE T2.Id = 6347",
    "difficulty": "simple"
  },
  {
    "question_id": 571,
    "db_id": "codebase_community",
    "question": "For the user No.24, how many times is the number of his/her posts compared to his/her votes?",
    "evidence": "user no. 24 refers to UserId = OwnerUserId = '24'; times of his/her post than votes = Divide (Count(post.Id), Count(votes.Id))",
    "SQL": "SELECT CAST(COUNT(DISTINCT T2.Id) AS REAL) / COUNT(DISTINCT T1.Id) FROM votes AS T1 INNER JOIN posts AS T2 ON T1.UserId = T2.OwnerUserId WHERE T1.UserId = 24",
    "difficulty": "moderate"
  },
  {
    "question_id": 572,
    "db_id": "codebase_community",
    "question": "How many views did the post titled 'Integration of Weka and/or RapidMiner into Informatica PowerCenter/Developer' get?",
    "evidence": "\"Integration of Weka and/or RapidMiner into Informatica PowerCenter/Developer\" is the Title of post; views refers to ViewCount",
    "SQL": "SELECT ViewCount FROM posts WHERE Title = 'Integration of Weka and/or RapidMiner into Informatica PowerCenter/Developer'",
    "difficulty": "moderate"
  },
  {
    "question_id": 573,
    "db_id": "codebase_community",
    "question": "Write the contents of comments with a score of 17.",
    "evidence": "score of 17 refers to Score = 17; contents of comments refers to Text",
    "SQL": "SELECT Text FROM comments WHERE Score = 17",
    "difficulty": "simple"
  },
  {
    "question_id": 576,
    "db_id": "codebase_community",
    "question": "Name the user that commented 'thank you user93!'",
    "evidence": "\"thank you user93\" is the Text of comment; user refers to DisplayName",
    "SQL": "SELECT T1.DisplayName FROM users AS T1 INNER JOIN comments AS T2 ON T1.Id = T2.UserId WHERE T2.Text = 'thank you user93!'",
    "difficulty": "simple"
  },
  {
    "question_id": 578,
    "db_id": "codebase_community",
    "question": "Which user made a post titled 'Understanding what Dassault iSight is doing?' and how much is the reputation of the user?",
    "evidence": "\"Understanding what Dassault iSight is doing?\" is the Title of post; user refers to DisplayName;",
    "SQL": "SELECT T1.DisplayName, T1.Reputation FROM users AS T1 INNER JOIN posts AS T2 ON T1.Id = T2.OwnerUserId WHERE T2.Title = 'Understanding what Dassault iSight is doing?'",
    "difficulty": "moderate"
  },
  {
    "question_id": 581,
    "db_id": "codebase_community",
    "question": "Who is the owner of the post titled 'Open source tools for visualizing multi-dimensional data?'",
    "evidence": "'Open source tools for visualizing multi-dimensional data' is the Title of Post; owner refers to DisplayName;",
    "SQL": "SELECT T2.DisplayName FROM posts AS T1 INNER JOIN users AS T2 ON T1.OwnerUserId = T2.Id WHERE T1.Title = 'Open source tools for visualizing multi-dimensional data?'",
    "difficulty": "moderate"
  },
  {
    "question_id": 584,
    "db_id": "codebase_community",
    "question": "Write all the comments left by users who edited the post titled 'Why square the difference instead of taking the absolute value in standard deviation?'",
    "evidence": "\"Why square the difference instead of taking the absolute value in standard deviation?\" is the Title of post;",
    "SQL": "SELECT T2.Comment FROM posts AS T1 INNER JOIN postHistory AS T2 ON T1.Id = T2.PostId WHERE T1.Title = 'Why square the difference instead of taking the absolute value in standard deviation?'",
    "difficulty": "moderate"
  },
  {
    "question_id": 586,
    "db_id": "codebase_community",
    "question": "Which user added a bounty amount of 50 to the post title mentioning variance?",
    "evidence": "bounty amount of 50 refers to BountyAmount = 50; user refers to DisplayName; title mentioning variance refers to Title include 'variance'",
    "SQL": "SELECT T3.DisplayName, T1.Title FROM posts AS T1 INNER JOIN votes AS T2 ON T1.Id = T2.PostId INNER JOIN users AS T3 ON T3.Id = T2.UserId WHERE T2.BountyAmount = 50 AND T1.Title LIKE '%variance%'",
    "difficulty": "challenging"
  },
  {
    "question_id": 587,
    "db_id": "codebase_community",
    "question": "Calculate the average view count of each post tagged as 'humor' and list the title and the comment of each post.",
    "evidence": "tagged as  'humor' refers to tag = '<humor>'; comment of the post refers to Text; average view count = AVG(ViewCount)",
    "SQL": "SELECT AVG(T2.ViewCount), T2.Title, T1.Text FROM comments AS T1 INNER JOIN posts AS T2 ON T2.Id = T1.PostId  WHERE T2.Tags = '<humor>' GROUP BY T2.Title, T1.Text ",
    "difficulty": "moderate"
  },
  {
    "question_id": 592,
    "db_id": "codebase_community",
    "question": "How many users are awarded with more than 5 badges?",
    "evidence": "more than 5 badges refers to Count (Name) > 5; user refers to UserId",
    "SQL": "SELECT COUNT(UserId) FROM ( SELECT UserId, COUNT(Name) AS num FROM badges GROUP BY UserId ) T WHERE T.num > 5",
    "difficulty": "simple"
  },
  {
    "question_id": 595,
    "db_id": "codebase_community",
    "question": "Which user have only one post history per post and having at least 1000 views?",
    "evidence": "having at least 1000 view refers to Views > = 1000; user refers to UserId",
    "SQL": "SELECT T2.UserId FROM users AS T1 INNER JOIN postHistory AS T2 ON T1.Id = T2.UserId INNER JOIN posts AS T3 ON T2.PostId = T3.Id WHERE T3.ViewCount >= 1000 GROUP BY T2.UserId HAVING COUNT(DISTINCT T2.PostHistoryTypeId) = 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 598,
    "db_id": "codebase_community",
    "question": "What is the percentage difference of student badges given during 2010 and 2011?",
    "evidence": "student badges refers to badge's name = 'Student'; during 2010 refers to Year(Date) = 2010; during 2011 refers to Year(Date) = 2011; percentage difference = Subtract (Divide(Count(Name where Year(Date) = 2010), Count (Name)) *100, Divide(Count(Name where Year(Date) = 2011), Count(Name)) * 100)",
    "SQL": "SELECT CAST(SUM(IIF(STRFTIME('%Y', Date) = '2010', 1, 0)) AS REAL) * 100 / COUNT(Id) - CAST(SUM(IIF(STRFTIME('%Y', Date) = '2011', 1, 0)) AS REAL) * 100 / COUNT(Id) FROM badges WHERE Name = 'Student'",
    "difficulty": "challenging"
  },
  {
    "question_id": 604,
    "db_id": "codebase_community",
    "question": "What is the average of the up votes and the average user age for users creating more than 10 posts?",
    "evidence": "creating more than 10 post refers to Count (UserId) > 10; average of the up votes = Divide (Sum(UpVotes), Count (UserId)); average age = Divide (Sum(Age), Count(UserId))",
    "SQL": "SELECT AVG(T1.UpVotes), AVG(T1.Age) FROM users AS T1 INNER JOIN ( SELECT OwnerUserId, COUNT(*) AS post_count FROM posts GROUP BY OwnerUserId HAVING post_count > 10) AS T2 ON T1.Id = T2.OwnerUserId",
    "difficulty": "moderate"
  },
  {
    "question_id": 629,
    "db_id": "codebase_community",
    "question": "Calculate the ratio of votes in 2010 and 2011.",
    "evidence": "DIVIDE(COUNT(Id where YEAR(CreationDate) = 2010), COUNT(Id where YEAR(CreationDate) = 2011)) FROM votes;",
    "SQL": "SELECT CAST(SUM(IIF(STRFTIME('%Y', CreationDate) = '2010', 1, 0)) AS REAL) / SUM(IIF(STRFTIME('%Y', CreationDate) = '2011', 1, 0)) FROM votes",
    "difficulty": "simple"
  },
  {
    "question_id": 633,
    "db_id": "codebase_community",
    "question": "Which post by slashnick has the most answers count? State the post ID.",
    "evidence": "most answers count refers to MAX(AnswerCount); post by slashnick refers to DisplayName = 'slashnick';",
    "SQL": "SELECT T2.PostId FROM users AS T1 INNER JOIN postHistory AS T2 ON T1.Id = T2.UserId INNER JOIN posts AS T3 ON T2.PostId = T3.Id WHERE T1.DisplayName = 'slashnick' ORDER BY T3.AnswerCount DESC LIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 634,
    "db_id": "codebase_community",
    "question": "Among posts by Harvey Motulsky and Noah Snyder, which one has higher popularity?",
    "evidence": "Has higher popularity means the post has higher view count ; calculation = MAX(SUM(ViewCount)) where DisplayName = 'Harvey Motulsky' OR DisplayName = 'Noah Snyder';",
    "SQL": "SELECT T1.DisplayName FROM users AS T1 INNER JOIN postHistory AS T2 ON T1.Id = T2.UserId INNER JOIN posts AS T3 ON T2.PostId = T3.Id WHERE T1.DisplayName = 'Harvey Motulsky' OR T1.DisplayName = 'Noah Snyder' GROUP BY T1.DisplayName ORDER BY SUM(T3.ViewCount) DESC LIMIT 1",
    "difficulty": "challenging"
  },
  {
    "question_id": 637,
    "db_id": "codebase_community",
    "question": "State all the tags used by Mark Meckes in his posts that doesn't have comments.",
    "evidence": "used by Mark Meckes refers to DisplayName = 'Mark Meckes'; Doen't have comments refers to CommentCount = 0;",
    "SQL": "SELECT T3.Tags FROM users AS T1 INNER JOIN postHistory AS T2 ON T1.Id = T2.UserId INNER JOIN posts AS T3 ON T3.Id = T2.PostId WHERE T1.DisplayName = 'Mark Meckes' AND T3.CommentCount = 0",
    "difficulty": "moderate"
  },
  {
    "question_id": 639,
    "db_id": "codebase_community",
    "question": "Based on posts posted by Community, calculate the percentage of posts that use the R language.",
    "evidence": "DIVIDE(COUNT(PostId WHERE TagName = 'r')), (COUNT(PostId WHERE DisplayName = 'Community')) as percentage; R language refers to tagname = 'r'",
    "SQL": "SELECT CAST(SUM(IIF(T3.TagName = 'r', 1, 0)) AS REAL) * 100 / COUNT(T1.Id) FROM users AS T1 INNER JOIN postHistory AS T2 ON T1.Id = T2.UserId INNER JOIN tags AS T3 ON T3.ExcerptPostId = T2.PostId WHERE T1.DisplayName = 'Community'",
    "difficulty": "challenging"
  },
  {
    "question_id": 640,
    "db_id": "codebase_community",
    "question": "Calculate the difference in view count from post posted by Mornington and view count from posts posted by Amos.",
    "evidence": "calculation = SUBTRACT(SUM(ViewCount where DisplayName = 'Mornington'), SUM(ViewCount where DisplayName = 'Amos'));",
    "SQL": "SELECT SUM(IIF(T1.DisplayName = 'Mornington', T3.ViewCount, 0)) - SUM(IIF(T1.DisplayName = 'Amos', T3.ViewCount, 0)) AS diff FROM users AS T1 INNER JOIN postHistory AS T2 ON T1.Id = T2.UserId INNER JOIN posts AS T3 ON T3.Id = T2.PostId",
    "difficulty": "moderate"
  },
  {
    "question_id": 665,
    "db_id": "codebase_community",
    "question": "What is the average monthly number of links created in 2010 for posts that have no more than 2 answers?",
    "evidence": "calculation = DIVIDE(COUNT(Id where YEAR(CreationDate) = 2010 and AnswerCount < = 2), 12)",
    "SQL": "SELECT CAST(COUNT(T1.Id) AS REAL) / 12 FROM postLinks AS T1 INNER JOIN posts AS T2 ON T1.PostId = T2.Id WHERE T2.AnswerCount <= 2 AND STRFTIME('%Y', T1.CreationDate) = '2010'",
    "difficulty": "moderate"
  },
  {
    "question_id": 669,
    "db_id": "codebase_community",
    "question": "When did 'chl' cast its first vote in a post?",
    "evidence": "DisplayName = 'chl'; cast its first vote refers to MIN(CreationDate);",
    "SQL": "SELECT T2.CreationDate FROM users AS T1 INNER JOIN votes AS T2 ON T1.Id = T2.UserId WHERE T1.DisplayName = 'chl' ORDER BY T2.CreationDate LIMIT 1",
    "difficulty": "simple"
  },
  {
    "question_id": 671,
    "db_id": "codebase_community",
    "question": "What is the display name of the user who acquired the first Autobiographer badge?",
    "evidence": "Autobiographer is the name of the badge; acquired the first refers to MIN(Date);",
    "SQL": "SELECT T1.DisplayName FROM users AS T1 INNER JOIN badges AS T2 ON T1.Id = T2.UserId WHERE T2.`Name` = 'Autobiographer' ORDER BY T2.Date LIMIT 1",
    "difficulty": "simple"
  },
  {
    "question_id": 672,
    "db_id": "codebase_community",
    "question": "Among the users located in United Kingdom, how many users whose post have a total favorite amount of 4 or more?",
    "evidence": "favorite amount of 4 or more refers to FavoriteCount > = 4; Location = 'United Kingdom';",
    "SQL": "SELECT COUNT(T1.Id) FROM users AS T1 INNER JOIN posts AS T2 ON T1.Id = T2.OwnerUserId WHERE T1.Location = 'United Kingdom' AND T2.FavoriteCount >= 4",
    "difficulty": "moderate"
  },
  {
    "question_id": 678,
    "db_id": "codebase_community",
    "question": "Which post by Harvey Motulsky has the most views? Please give the id and title of this post.",
    "evidence": "DisplayName = 'Harvey Motulsky'; the most views refer to MAX(ViewCount);",
    "SQL": "SELECT T2.Id, T2.Title FROM users AS T1 INNER JOIN posts AS T2 ON T1.Id = T2.OwnerUserId WHERE T1.DisplayName = 'Harvey Motulsky' ORDER BY T2.ViewCount DESC LIMIT 1",
    "difficulty": "simple"
  },
  {
    "question_id": 682,
    "db_id": "codebase_community",
    "question": "Which is the most valuable post in 2010? Please give its id and the owner's display name.",
    "evidence": "the most valuable post in 2010 refers to MAX(FavoriteCount) where year(CreationDate) = 2010;",
    "SQL": "SELECT T2.OwnerUserId, T1.DisplayName FROM users AS T1 INNER JOIN posts AS T2 ON T1.Id = T2.OwnerUserId WHERE STRFTIME('%Y', T1.CreationDate) = '2010' ORDER BY T2.FavoriteCount DESC LIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 683,
    "db_id": "codebase_community",
    "question": "What is the percentage of posts whose owners had a reputation of over 1000 in 2011?",
    "evidence": "percentage = DIVIDE(COUNT(Id where YEAR(CreationDate) = 2011 and Reputation > 1000), COUNT(Id) ) * 100;",
    "SQL": "SELECT CAST(SUM(IIF(STRFTIME('%Y', T2.CreaionDate) = '2011' AND T1.Reputation > 1000, 1, 0)) AS REAL) * 100 / COUNT(T1.Id) FROM users AS T1 INNER JOIN posts AS T2 ON T1.Id = T2.OwnerUserId",
    "difficulty": "moderate"
  },
  {
    "question_id": 685,
    "db_id": "codebase_community",
    "question": "Identify the total views on the post 'Computer Game Datasets'. Name the user who posted it last time.",
    "evidence": "total views refer to ViewCount; Name the user refers to DisplayName; post 'Computer Game Datasets' refers to Text = 'Computer Game Datasets';",
    "SQL": "SELECT T2.ViewCount, T3.DisplayName FROM postHistory AS T1 INNER JOIN posts AS T2 ON T1.PostId = T2.Id INNER JOIN users AS T3 ON T2.LastEditorUserId = T3.Id WHERE T1.Text = 'Computer Game Datasets'",
    "difficulty": "moderate"
  },
  {
    "question_id": 687,
    "db_id": "codebase_community",
    "question": "How many comments were added to the post with the highest score?",
    "evidence": "the highest score refers to MAX(Score);",
    "SQL": "SELECT COUNT(T2.Id) FROM posts AS T1 INNER JOIN comments AS T2 ON T1.Id = T2.PostId GROUP BY T1.Id ORDER BY T1.Score DESC LIMIT 1",
    "difficulty": "simple"
  },
  {
    "question_id": 694,
    "db_id": "codebase_community",
    "question": "Provide the text of the latest 10 comments to the post with the title 'Analysing wind data with R' and the display name of the user who left it.",
    "evidence": "the latest comment refers to MAX(CreationDate);",
    "SQL": "SELECT T3.Text, T1.DisplayName FROM users AS T1 INNER JOIN posts AS T2 ON T1.Id = T2.OwnerUserId INNER JOIN comments AS T3 ON T2.Id = T3.PostId WHERE T2.Title = 'Analysing wind data with R' ORDER BY T1.CreationDate DESC LIMIT 10",
    "difficulty": "moderate"
  },
  {
    "question_id": 701,
    "db_id": "codebase_community",
    "question": "Among all the posts posted by the most influential user, identify the percentage with a score above 50.",
    "evidence": "The higher reputation the user has the more influence; percentage = DIVIDE(COUNT(stats_posts.Id where Score > 50 and MAX(Reputation))), COUNT(stats_posts.Id where MAX(Reputation));",
    "SQL": "SELECT CAST(SUM(CASE WHEN T2.Score > 50 THEN 1 ELSE 0 END) AS REAL) * 100 / COUNT(T1.Id) FROM users T1 INNER JOIN posts T2 ON T1.Id = T2.OwnerUserId INNER JOIN ( SELECT MAX(Reputation) AS max_reputation FROM users ) T3 ON T1.Reputation = T3.max_reputation",
    "difficulty": "challenging"
  },
  {
    "question_id": 704,
    "db_id": "codebase_community",
    "question": "What is the excerpt post ID and wiki post ID of the tag named sample?",
    "evidence": "tag named sample refers to TagName = 'sample';",
    "SQL": "SELECT ExcerptPostId, WikiPostId FROM tags WHERE TagName = 'sample'",
    "difficulty": "simple"
  },
  {
    "question_id": 705,
    "db_id": "codebase_community",
    "question": "Give the user's reputation and up vote number of the user that commented \"fine, you win :)\".",
    "evidence": "Text = 'fine, you win :)';",
    "SQL": "SELECT T2.Reputation, T2.UpVotes FROM comments AS T1 INNER JOIN users AS T2 ON T1.UserId = T2.Id WHERE T1.Text = 'fine, you win :)'",
    "difficulty": "simple"
  },
  {
    "question_id": 707,
    "db_id": "codebase_community",
    "question": "Among the posts with views ranging from 100 to 150, what is the comment with the highest score?",
    "evidence": "views ranging from 100 to 150 refers to ViewCount BETWEEN 100 and 150; comment with the highest score refers to Text where MAX(Score);",
    "SQL": "SELECT Text FROM comments WHERE PostId IN ( SELECT Id FROM posts WHERE ViewCount BETWEEN 100 AND 150 ) ORDER BY Score DESC LIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 710,
    "db_id": "codebase_community",
    "question": "In posts with 1 comment, how many of the comments have 0 score?",
    "evidence": "in posts with 1 comment refers to CommentCount = 1;",
    "SQL": "SELECT COUNT(T1.id) FROM comments AS T1 INNER JOIN posts AS T2 ON T1.PostId = T2.Id WHERE T2.CommentCount = 1 AND T2.Score = 0",
    "difficulty": "simple"
  },
  {
    "question_id": 716,
    "db_id": "codebase_community",
    "question": "Among the comments with scores between 5 to 10, what is the percentage of the users with 0 up votes?",
    "evidence": "percentage = DIVIDE(COUNT(UserId where UpVotes = 0 and Score BETWEEN 5 and 10))*100, (COUNT(UserId where Score BETWEEN 5 and 10));",
    "SQL": "SELECT CAST(SUM(IIF(T1.UpVotes = 0, 1, 0)) AS REAL) * 100/ COUNT(T1.Id) AS per FROM users AS T1 INNER JOIN comments AS T2 ON T1.Id = T2.UserId WHERE T2.Score BETWEEN 5 AND 10",
    "difficulty": "moderate"
  },
  {
    "question_id": 340,
    "db_id": "card_games",
    "question": "Which are the cards that have incredibly powerful foils.",
    "evidence": "incredibly poweful foils refers to cardKingdomFoilId is not null AND cardKingdomId is not null",
    "SQL": "SELECT id FROM cards WHERE cardKingdomFoilId IS NOT NULL AND cardKingdomId IS NOT NULL",
    "difficulty": "simple"
  },
  {
    "question_id": 341,
    "db_id": "card_games",
    "question": "What are the borderless cards available without powerful foils?",
    "evidence": "borderless' refers to borderColor; poweful foils refers to cardKingdomFoilId paired with cardKingdomId AND cardKingdomId is not null",
    "SQL": "SELECT id FROM cards WHERE borderColor = 'borderless' AND (cardKingdomId IS NULL OR cardKingdomId IS NULL)",
    "difficulty": "simple"
  },
  {
    "question_id": 344,
    "db_id": "card_games",
    "question": "List all the mythic rarity print cards banned in gladiator format.",
    "evidence": "mythic rarity printing refers to rarity = 'mythic'; card banned refers to status = 'Banned'; in gladiator format refers to format = 'gladiator';",
    "SQL": "SELECT DISTINCT T1.id FROM cards AS T1 INNER JOIN legalities AS T2 ON T1.uuid = T2.uuid WHERE T2.format = 'gladiator' AND T2.status = 'Banned' AND T1.rarity = 'mythic'",
    "difficulty": "moderate"
  },
  {
    "question_id": 345,
    "db_id": "card_games",
    "question": "For artifact type of cards that do not have multiple faces on the same card, state its legalities status for vintage play format.",
    "evidence": "Artifact type of cards refers to types = 'Artifact'; card does not have multiple faces on the same card refers to side is NULL'; vintage play format refers to format = 'vintage';",
    "SQL": "SELECT DISTINCT T2.status FROM cards AS T1 INNER JOIN legalities AS T2 ON T1.uuid = T2.uuid WHERE T1.type = 'Artifact' AND T2.format = 'vintage' AND T1.side IS NULL",
    "difficulty": "moderate"
  },
  {
    "question_id": 346,
    "db_id": "card_games",
    "question": "List all the card id and artist with unknown power which are legal for commander play format.",
    "evidence": "unknown power refers to power = '*' or POWER IS NULL; commander play format refers to format = 'commander'; legal for commander play format refers to format = 'commander' where status = 'Legal'",
    "SQL": "SELECT T1.id, T1.artist FROM cards AS T1 INNER JOIN legalities AS T2 ON T1.uuid = T2.uuid WHERE T2.status = 'Legal' AND T2.format = 'commander' AND (T1.power IS NULL OR T1.power = '*')",
    "difficulty": "moderate"
  },
  {
    "question_id": 347,
    "db_id": "card_games",
    "question": "Find all cards illustrated by Stephen Daniel and describe the text of the ruling of these cards. State if these cards have missing or degraded properties and values.",
    "evidence": "cards have missing or degraded properties and value refers to hasContentWarning = 1; 'Stephen Daniele' is artist; Find all cards refers to return card id",
    "SQL": "SELECT T1.id, T2.text, T1.hasContentWarning FROM cards AS T1 INNER JOIN rulings AS T2 ON T1.uuid = T2.uuid WHERE T1.artist = 'Stephen Daniele'",
    "difficulty": "moderate"
  },
  {
    "question_id": 349,
    "db_id": "card_games",
    "question": "Name the card and artist with the most ruling information. Also state if the card is a promotional printing.",
    "evidence": "with the most ruling information refers to Max(count(rulings.uuid)); the card is the promotional printing refers to isPromo = 1;",
    "SQL": "SELECT T1.name, T1.artist, T1.isPromo FROM cards AS T1 INNER JOIN rulings AS T2 ON T1.uuid = T2.uuid WHERE T1.isPromo = 1 AND T1.artist = (SELECT artist FROM cards WHERE isPromo = 1 GROUP BY artist HAVING COUNT(DISTINCT uuid) = (SELECT MAX(count_uuid) FROM ( SELECT COUNT(DISTINCT uuid) AS count_uuid FROM cards WHERE isPromo = 1 GROUP BY artist ))) LIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 352,
    "db_id": "card_games",
    "question": "Calculate the percentage of the cards availabe in Chinese Simplified.",
    "evidence": "Chinese Simplified' is the language; percentage = Divide(Sum(id where language = 'Chinese Simplified'), Count(id)) *100",
    "SQL": "SELECT CAST(SUM(CASE WHEN T2.language = 'Chinese Simplified' THEN 1 ELSE 0 END) AS REAL) * 100 / COUNT(T1.id) FROM cards AS T1 INNER JOIN foreign_data AS T2 ON T1.uuid = T2.uuid",
    "difficulty": "moderate"
  },
  {
    "question_id": 356,
    "db_id": "card_games",
    "question": "How many cards have infinite power?",
    "evidence": "infinite power refers to power = '*';",
    "SQL": "SELECT COUNT(*) FROM cards WHERE power = '*'",
    "difficulty": "simple"
  },
  {
    "question_id": 358,
    "db_id": "card_games",
    "question": "What is the border color of card \"Ancestor's Chosen\"?",
    "evidence": "name of card  = 'Ancestor''s Chosen' ;",
    "SQL": "SELECT DISTINCT borderColor FROM cards WHERE name = 'Ancestor''s Chosen'",
    "difficulty": "simple"
  },
  {
    "question_id": 366,
    "db_id": "card_games",
    "question": "What is the rule of playing card \"Benalish Knight\"?",
    "evidence": "Benalish Knight' is the name of card; rule of playing card refers to format;",
    "SQL": "SELECT T2.format FROM cards AS T1 INNER JOIN legalities AS T2 ON T1.uuid = T2.uuid WHERE T1.name = 'Benalish Knight'",
    "difficulty": "simple"
  },
  {
    "question_id": 368,
    "db_id": "card_games",
    "question": "What is the percentage of borderless cards?",
    "evidence": "borderless card refers to borderColor = 'borderless'; percentage = Divide(Count (id) where borderColor = 'borderless', Count(id)) *100",
    "SQL": "SELECT CAST(SUM(CASE WHEN borderColor = 'borderless' THEN 1 ELSE 0 END) AS REAL) * 100 / COUNT(id) FROM cards",
    "difficulty": "simple"
  },
  {
    "question_id": 371,
    "db_id": "card_games",
    "question": "What is the percentage of cards whose language is French among the Story Spotlight cards?",
    "evidence": "Story Spotlight card refers to isStorySpotlight = 1; French is the language; Percentage = Divide(Count(id) where language = 'French' and isStorySpotlight = 1, Count(id) where isStorySpotlight = 1)*100",
    "SQL": "SELECT CAST(SUM(CASE WHEN T2.language = 'French' THEN 1 ELSE 0 END) AS REAL) * 100 / COUNT(T1.id) FROM cards AS T1 INNER JOIN foreign_data AS T2 ON T1.uuid = T2.uuid WHERE T1.isStorySpotlight = 1",
    "difficulty": "challenging"
  },
  {
    "question_id": 377,
    "db_id": "card_games",
    "question": "How many cards with original type of \"Summon - Angel\" have subtype other than \"Angel\"?",
    "evidence": "subtype other than Angel refers to subtypes is not 'Angel';",
    "SQL": "SELECT COUNT(id) FROM cards WHERE originalType = 'Summon - Angel' AND subtypes != 'Angel'",
    "difficulty": "simple"
  },
  {
    "question_id": 379,
    "db_id": "card_games",
    "question": "What are the cards belong to duel deck a? List the ID.",
    "evidence": "duel deck a refers to duelDeck = a;",
    "SQL": "SELECT id FROM cards WHERE duelDeck = 'a'",
    "difficulty": "simple"
  },
  {
    "question_id": 383,
    "db_id": "card_games",
    "question": "How many of the banned cards are white border?",
    "evidence": "banned card refers to status = 'Banned'; white border refers to borderColor = 'white';",
    "SQL": "SELECT COUNT(T1.id) FROM cards AS T1 INNER JOIN legalities AS T2 ON T1.uuid = T2.uuid WHERE T2.status = 'Banned' AND T1.borderColor = 'white'",
    "difficulty": "simple"
  },
  {
    "question_id": 391,
    "db_id": "card_games",
    "question": "Among the Artifact cards, which are black color and comes with foreign languague translation?",
    "evidence": "Artifact card refers to originalType = 'Artifact'; black color refers to colors = 'B'; foreign language refers to language in foreign_data",
    "SQL": "SELECT DISTINCT T1.name FROM cards AS T1 INNER JOIN foreign_data AS T2 ON T1.uuid = T2.uuid WHERE T1.originalType = 'Artifact' AND T1.colors = 'B'",
    "difficulty": "moderate"
  },
  {
    "question_id": 397,
    "db_id": "card_games",
    "question": "What is the mana cost of cards with a normal layout, a 2003 frame version, with a black border color, and available in paper and mtgo?",
    "evidence": "available in paper and mtgo refers to availability = 'mtgo,paper'; frameVersion = 2003;borderColor = 'black'",
    "SQL": "SELECT manaCost FROM cards WHERE availability = 'mtgo,paper' AND borderColor = 'black' AND frameVersion = 2003 AND layout = 'normal'",
    "difficulty": "moderate"
  },
  {
    "question_id": 402,
    "db_id": "card_games",
    "question": "What is the percentage of Story Spotlight cards that do not have a text box? List them by their ID.",
    "evidence": "Story Spotlight cards that do not have a text box refers to isStorylight = 1 and isTextless = 0; Percentage = DIVIDE(SUM(count(id) where isStorylight = 1 AND isTextless = 0 ), SUM(count(id))) * 100",
    "SQL": "SELECT CAST(SUM(CASE WHEN isTextless = 0 AND  isStorySpotlight = 1 THEN 1 ELSE 0 END) AS REAL) * 100 / COUNT(id) FROM cards",
    "difficulty": "moderate"
  },
  {
    "question_id": 405,
    "db_id": "card_games",
    "question": "How many Brazilian Portuguese translated sets are inside the Commander block?",
    "evidence": "Commander block refer to block = 'Commander'; sets refer to code = setCode; Portuguese refer to language = 'Portuguese (Brasil)'",
    "SQL": "SELECT COUNT(T1.id) FROM sets AS T1 INNER JOIN set_translations AS T2 ON T1.code = T2.setCode WHERE T2.language = 'Portuguese (Brazil)' AND T1.block = 'Commander'",
    "difficulty": "moderate"
  },
  {
    "question_id": 407,
    "db_id": "card_games",
    "question": "Lists all types of cards in German.",
    "evidence": "German refer to language; all types refer to the subtypes, supertypes; subtypes is not null AND supertypes is not null",
    "SQL": "SELECT T1.subtypes, T1.supertypes FROM cards AS T1 INNER JOIN foreign_data AS T2 ON T1.uuid = T2.uuid WHERE T2.language = 'German' AND T1.subtypes IS NOT NULL AND T1.supertypes IS NOT NULL",
    "difficulty": "moderate"
  },
  {
    "question_id": 408,
    "db_id": "card_games",
    "question": "How many unknown power cards contain info about the triggered ability",
    "evidence": "unknown power cards refers to power is null or power = '*';contain info about the triggered ability refers to text contains 'triggered ability'",
    "SQL": "SELECT Count(DISTINCT T1.id) FROM cards AS T1 INNER JOIN rulings AS T2 ON T1.uuid = T2.uuid WHERE (T1.power IS NULL OR T1.power = '*') AND T2.text LIKE '%triggered ability%'",
    "difficulty": "moderate"
  },
  {
    "question_id": 409,
    "db_id": "card_games",
    "question": "Indicates the number of cards with pre-modern format, ruling text \"This is a triggered mana ability.\" that do not have multiple faces.",
    "evidence": "pre-modern format refers to format = 'premodern' ;do not have multiple faces refers to side IS NULL",
    "SQL": "SELECT COUNT(T1.id) FROM cards AS T1 INNER JOIN legalities AS T2 ON T1.uuid = T2.uuid INNER JOIN rulings AS T3 ON T1.uuid = T3.uuid WHERE T2.format = 'premodern' AND T3.text = 'This is a triggered mana ability.' AND T1.Side IS NULL",
    "difficulty": "moderate"
  },
  {
    "question_id": 412,
    "db_id": "card_games",
    "question": "What is the foreign name of the card in French of type Creature, normal layout and black border color, by artist Matthew D. Wilson?",
    "evidence": "in French refers to language = 'French'; black border color refers to borderColor = 'black'",
    "SQL": "SELECT name FROM foreign_data WHERE uuid IN ( SELECT uuid FROM cards WHERE types = 'Creature' AND layout = 'normal' AND borderColor = 'black' AND artist = 'Matthew D. Wilson' ) AND language = 'French'",
    "difficulty": "moderate"
  },
  {
    "question_id": 414,
    "db_id": "card_games",
    "question": "What language is the set of 180 cards that belongs to the Ravnica block translated into?",
    "evidence": "set of 180 cards refers to baseSetSize = 180",
    "SQL": "SELECT T2.language FROM sets AS T1 INNER JOIN set_translations AS T2 ON T1.code = T2.setCode WHERE T1.block = 'Ravnica' AND T1.baseSetSize = 180",
    "difficulty": "simple"
  },
  {
    "question_id": 415,
    "db_id": "card_games",
    "question": "What percentage of cards with format commander and legal status do not have a content warning?",
    "evidence": "do not have a content warning refers to hasContentWarning = 0; percentage refers to DIVIDE(COUNT(hasContentWarning = 0),COUNT(ID))*100 where format = 'commander' AND Status = 'legal';",
    "SQL": "SELECT CAST(SUM(CASE WHEN T1.hasContentWarning = 0 THEN 1 ELSE 0 END) AS REAL) * 100 / COUNT(T1.id) FROM cards AS T1 INNER JOIN legalities AS T2 ON T1.uuid = T2.uuid WHERE T2.format = 'commander' AND T2.status = 'Legal'",
    "difficulty": "challenging"
  },
  {
    "question_id": 416,
    "db_id": "card_games",
    "question": "What percentage of cards without power are in French?",
    "evidence": "in French refers to language = 'French'; cards without power refers to power IS NULL OR power = '*'; percentage = DIVIDE(COUNT(language = 'French' and power is NULL or power = '*'), COUNT( power is NULL or power = '*'))*100",
    "SQL": "SELECT CAST(SUM(CASE WHEN T2.language = 'French' THEN 1 ELSE 0 END) AS REAL) * 100 / COUNT(T1.id) FROM cards AS T1 INNER JOIN foreign_data AS T2 ON T1.uuid = T2.uuid WHERE T1.power IS NULL OR T1.power = '*'",
    "difficulty": "challenging"
  },
  {
    "question_id": 422,
    "db_id": "card_games",
    "question": "What is the language of the card with the multiverse number 149934?",
    "evidence": "multiverse number 149934 refers to multiverseid = 149934;",
    "SQL": "SELECT language FROM foreign_data WHERE multiverseid = 149934",
    "difficulty": "simple"
  },
  {
    "question_id": 424,
    "db_id": "card_games",
    "question": "What proportion of cards do not have a text box with a normal layout?",
    "evidence": "do not have a text box refers to isTextless = 1; proportion refers to DIVIDE(COUNT(Textless = 1 and layout = 'normal'),COUNT(Textless))*100",
    "SQL": "SELECT CAST(SUM(CASE WHEN isTextless = 1 AND layout = 'normal' THEN 1 ELSE 0 END) AS REAL) * 100 / COUNT(*) FROM cards",
    "difficulty": "simple"
  },
  {
    "question_id": 427,
    "db_id": "card_games",
    "question": "What languages are available in the set known as Archenemy on the magic card market and having the code ARC?",
    "evidence": "known as Archenemy refers to mcmName = 'Archenemy'; having the code ARC refers to setCode = 'ARC'",
    "SQL": "SELECT T2.language FROM sets AS T1 INNER JOIN set_translations AS T2 ON T1.code = T2.setCode WHERE T1.mcmName = 'Archenemy' AND T2.setCode = 'ARC'",
    "difficulty": "moderate"
  },
  {
    "question_id": 440,
    "db_id": "card_games",
    "question": "Which foreign language used by \"A Pedra Fellwar\"?",
    "evidence": "\"A Pedra Fellwar\" refers to name = 'A Pedra Fellwar'",
    "SQL": "SELECT DISTINCT language FROM foreign_data WHERE name = 'A Pedra Fellwar'",
    "difficulty": "simple"
  },
  {
    "question_id": 459,
    "db_id": "card_games",
    "question": "Which card costs more converted mana, \"Serra Angel\" or \"Shrine Keeper\"?",
    "evidence": "\"Serra Angel\" refers to name = 'Serra Angel'; \"Shrine Keeper\" refers to name = 'Shrine Keeper'; card costs more converted mana when the value of convertedManaCost is greater",
    "SQL": "SELECT name FROM cards WHERE name IN ('Serra Angel', 'Shrine Keeper') ORDER BY convertedManaCost DESC LIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 462,
    "db_id": "card_games",
    "question": "What's the Italian name of the set of cards with \"Ancestor's Chosen\" is in?",
    "evidence": "Italian is a language which refers to language = 'Italian'; with \"Ancestor's Chosen\" in the card set refers to name = 'Ancestor''s Chosen'",
    "SQL": "SELECT translation FROM set_translations WHERE setCode IN ( SELECT setCode FROM cards WHERE name = 'Ancestor''s Chosen' ) AND language = 'Italian'",
    "difficulty": "moderate"
  },
  {
    "question_id": 465,
    "db_id": "card_games",
    "question": "For the set of cards with \"Ancestor's Chosen\" in it, is there a Korean version of it?",
    "evidence": "set of cards with \"Ancestor''s Chosen\" in it refers to name = 'Ancestor''s Chosen'; Korean version refers to language = 'Korean'",
    "SQL": "SELECT IIF(SUM(CASE WHEN T2.language = 'Korean' AND T2.translation IS NOT NULL THEN 1 ELSE 0 END) > 0, 'YES', 'NO') FROM cards AS T1 INNER JOIN set_translations AS T2 ON T2.setCode = T1.setCode WHERE T1.name = 'Ancestor''s Chosen'",
    "difficulty": "moderate"
  },
  {
    "question_id": 466,
    "db_id": "card_games",
    "question": "Among the cards in the set \"Hauptset Zehnte Edition\", how many of them are designed by Adam Rex?",
    "evidence": "card set \"Hauptset Zehnte Edition\" refers to translation = 'Hauptset Zehnte Edition'; designed by Adam refers to artist = 'Adam Rex'",
    "SQL": "SELECT COUNT(T1.id) FROM cards AS T1 INNER JOIN set_translations AS T2 ON T2.setCode = T1.setCode WHERE T2.translation = 'Hauptset Zehnte Edition' AND T1.artist = 'Adam Rex'",
    "difficulty": "moderate"
  },
  {
    "question_id": 468,
    "db_id": "card_games",
    "question": "What is the Simplified Chinese translation of the name of the set \"Eighth Edition\"?",
    "evidence": "Eighth Edition is the name of card set which refers to name = 'Eighth Edition'; Simplified Chinese refers to language = 'Chinese Simplified'; translation of the name refers to translation",
    "SQL": "SELECT T2.translation FROM sets AS T1 INNER JOIN set_translations AS T2 ON T2.setCode = T1.code WHERE T1.name = 'Eighth Edition' AND T2.language = 'Chinese Simplified'",
    "difficulty": "moderate"
  },
  {
    "question_id": 469,
    "db_id": "card_games",
    "question": "Did the set of cards with \"Angel of Mercy\" appear on Magic: The Gathering Online?",
    "evidence": "card set \"Angel of Mercy\" refers to name = 'Angel of Mercy'; appear on Magic: The Gathering Online refers to mtgoCode is NOT NULL and vice versa",
    "SQL": "SELECT IIF(T2.mtgoCode IS NOT NULL, 'YES', 'NO') FROM cards AS T1 INNER JOIN sets AS T2 ON T2.code = T1.setCode WHERE T1.name = 'Angel of Mercy'",
    "difficulty": "moderate"
  },
  {
    "question_id": 472,
    "db_id": "card_games",
    "question": "Among the sets in the block \"Ice Age\", how many of them have an Italian translation?",
    "evidence": "sets in the block \"Ice Age\" refers to block = 'Ice Age'; Italian translation refers to language = 'Italian' and translation is not null",
    "SQL": "SELECT COUNT(DISTINCT T1.id) FROM sets AS T1 INNER JOIN set_translations AS T2 ON T2.setCode = T1.code WHERE T1.block = 'Ice Age' AND T2.language = 'Italian' AND T2.translation IS NOT NULL",
    "difficulty": "moderate"
  },
  {
    "question_id": 473,
    "db_id": "card_games",
    "question": "Is the set of cards with Adarkar Valkyrie only available outside the United States?",
    "evidence": "card set Adarkar Valkyrie refers to name = 'Adarkar Valkyrie'; isForeignOnly = 1 means only available outside the United States;",
    "SQL": "SELECT IIF(isForeignOnly = 1, 'YES', 'NO') FROM cards AS T1 INNER JOIN sets AS T2 ON T2.code = T1.setCode WHERE T1.name = 'Adarkar Valkyrie'",
    "difficulty": "moderate"
  },
  {
    "question_id": 474,
    "db_id": "card_games",
    "question": "Among the sets of cards that have an Italian translation, how many of them have a base set number of under 100?",
    "evidence": "Italian translation refers to language = 'Italian'; have a translation means translation is not null; base set number of under 100 refers to baseSetSize < 10",
    "SQL": "SELECT COUNT(T1.id) FROM sets AS T1 INNER JOIN set_translations AS T2 ON T2.setCode = T1.code WHERE T2.translation IS NOT NULL AND T1.baseSetSize < 100 AND T2.language = 'Italian'",
    "difficulty": "moderate"
  },
  {
    "question_id": 477,
    "db_id": "card_games",
    "question": "Which of these artists have designed a card in the set Coldsnap, Jeremy Jarvis, Aaron Miller or Chippy?",
    "evidence": "card set Coldsnap refers to name = 'Coldsnap'; Jeremy Jarvis, Aaron Miller or Chippy are the name of artists which refers to artist IN ('Jeremy Jarvis', 'Aaron Miller','Chippy');",
    "SQL": "SELECT T1.artist FROM cards AS T1 INNER JOIN sets AS T2 ON T2.code = T1.setCode WHERE (T2.name = 'Coldsnap' AND T1.artist = 'Chippy') OR (T2.name = 'Coldsnap' AND T1.artist = 'Aaron Miller') OR (T2.name = 'Coldsnap' AND T1.artist = 'Jeremy Jarvis') GROUP BY T1.artist",
    "difficulty": "challenging"
  },
  {
    "question_id": 479,
    "db_id": "card_games",
    "question": "Among the cards with converted mana cost higher than 5 in the set Coldsnap, how many of them have unknown power?",
    "evidence": "card set Coldsnap refers to name = 'Coldsnap'; converted mana cost higher than 5 refers to convertedManaCost > 5; unknown power refers to power = '*' or T1.power is null",
    "SQL": "SELECT SUM(CASE WHEN T1.power = '*' OR T1.power IS NULL THEN 1 ELSE 0 END) FROM cards AS T1 INNER JOIN sets AS T2 ON T2.code = T1.setCode WHERE T2.name = 'Coldsnap' AND T1.convertedManaCost > 5",
    "difficulty": "moderate"
  },
  {
    "question_id": 480,
    "db_id": "card_games",
    "question": "What is the Italian flavor text of the card \"Ancestor's Chosen\"?",
    "evidence": "Italian refers to language = 'Italian'; flavor text refers to flavorText; \"Ancestor''s Chosen\" refers to name = 'Ancestor''s Chosen'",
    "SQL": "SELECT T2.flavorText FROM cards AS T1 INNER JOIN foreign_data AS T2 ON T2.uuid = T1.uuid WHERE T1.name = 'Ancestor''s Chosen' AND T2.language = 'Italian'",
    "difficulty": "moderate"
  },
  {
    "question_id": 483,
    "db_id": "card_games",
    "question": "Please list the Italian text ruling of all the cards in the set Coldsnap.",
    "evidence": "card set Coldsnap refers to name = 'Coldsnap'; Italian refers to language = 'Italian'",
    "SQL": "SELECT DISTINCT T1.text FROM foreign_data AS T1 INNER JOIN cards AS T2 ON T2.uuid = T1.uuid INNER JOIN sets AS T3 ON T3.code = T2.setCode WHERE T3.name = 'Coldsnap' AND T1.language = 'Italian'",
    "difficulty": "moderate"
  },
  {
    "question_id": 484,
    "db_id": "card_games",
    "question": "Please list the Italian names of the cards in the set Coldsnap with the highest converted mana cost.",
    "evidence": "card set Coldsnap refers to name = 'Coldsnap'; Italian refers to language = 'Italian'; highest converted mana cost refers to MAX(convertedManaCost)",
    "SQL": "SELECT T2.name FROM foreign_data AS T1 INNER JOIN cards AS T2 ON T2.uuid = T1.uuid INNER JOIN sets AS T3 ON T3.code = T2.setCode WHERE T3.name = 'Coldsnap' AND T1.language = 'Italian' ORDER BY T2.convertedManaCost DESC",
    "difficulty": "moderate"
  },
  {
    "question_id": 486,
    "db_id": "card_games",
    "question": "What is the percentage of the cards with a converted mana cost of 7 in the set Coldsnap?",
    "evidence": "converted mana cost of 7 refers to convertedManaCost = 7; card set Coldsnap refers to name = 'Coldsnap'; percentage = DIVIDE(SUM(convertedManaCost = 7), SUM(convertedManaCost))*100",
    "SQL": "SELECT CAST(SUM(CASE WHEN T1.convertedManaCost = 7 THEN 1 ELSE 0 END) AS REAL) * 100 / COUNT(T1.id) FROM cards AS T1 INNER JOIN sets AS T2 ON T2.code = T1.setCode WHERE T2.name = 'Coldsnap'",
    "difficulty": "moderate"
  },
  {
    "question_id": 487,
    "db_id": "card_games",
    "question": "What is the percentage of incredibly powerful cards in the set Coldsnap?",
    "evidence": "card set Coldsnap refers to name = 'Coldsnap'; foil is incredibly powerful refers to cardKingdomFoilId is not null AND cardKingdomId is not null; the percentage of incredibly powerful cards in the set refers to DIVIDE(SUM(incredibly powerful), SUM(name = 'Coldsnap'))*100",
    "SQL": "SELECT CAST(SUM(CASE WHEN T1.cardKingdomFoilId IS NOT NULL AND T1.cardKingdomId IS NOT NULL THEN 1 ELSE 0 END) AS REAL) * 100 / COUNT(T1.id) FROM cards AS T1 INNER JOIN sets AS T2 ON T2.code = T1.setCode WHERE T2.name = 'Coldsnap'",
    "difficulty": "challenging"
  },
  {
    "question_id": 518,
    "db_id": "card_games",
    "question": "Which of the play format has the highest number of banned status? Indicate the play format and the names of all the card meet the condition.",
    "evidence": "play format refers to format; banned status refers to status = 'Banned'; the highest number of banned status refers to MAX(COUNT(status = 'Banned'))",
    "SQL": "WITH MaxBanned AS (SELECT format, COUNT(*) AS count_banned FROM legalities WHERE status = 'Banned' GROUP BY format ORDER BY COUNT(*) DESC LIMIT 1) SELECT T2.format, T1.name FROM cards AS T1 INNER JOIN legalities AS T2 ON T2.uuid = T1.uuid INNER JOIN MaxBanned MB ON MB.format = T2.format WHERE T2.status = 'Banned'",
    "difficulty": "moderate"
  },
  {
    "question_id": 522,
    "db_id": "card_games",
    "question": "Which cards are ranked 1st on EDHRec? List all of the cards name and its banned play format.",
    "evidence": "ranked 1st on EDHRec refers to edhrecRank = 1; banned refers to status = 'Banned'; play format refers to format; cards name refers to name",
    "SQL": "SELECT T1.name, T2.format FROM cards AS T1 INNER JOIN legalities AS T2 ON T2.uuid = T1.uuid WHERE T1.edhrecRank = 1 AND T2.status = 'Banned' GROUP BY T1.name, T2.format",
    "difficulty": "moderate"
  },
  {
    "question_id": 528,
    "db_id": "card_games",
    "question": "List the names of all the cards in the set Hour of Devastation and find the formats in which these cards are legal.",
    "evidence": "the set Hour of Devastation refers to set.name = 'Hour of Devastation'; names of all the cards in the set refers to cards.name; legal cards refers to status = 'Legal'; the formats refers to format",
    "SQL": "SELECT DISTINCT T2.name , CASE WHEN T1.status = 'Legal' THEN T1.format ELSE NULL END FROM legalities AS T1 INNER JOIN cards AS T2 ON T2.uuid = T1.uuid WHERE T2.setCode IN ( SELECT code FROM sets WHERE name = 'Hour of Devastation' )",
    "difficulty": "challenging"
  },
  {
    "question_id": 529,
    "db_id": "card_games",
    "question": "Find and list the names of sets which doesn't have Japanese translation but have Korean translation.",
    "evidence": "names of sets refers to name; doesn't have Japanese translation refers to language not like '%Japanese%'; have Korean translation refers to language = 'Korean'",
    "SQL": "SELECT name FROM sets WHERE code IN ( SELECT setCode FROM set_translations WHERE language = 'Korean' AND language NOT LIKE '%Japanese%' )",
    "difficulty": "moderate"
  },
  {
    "question_id": 530,
    "db_id": "card_games",
    "question": "List all the frame styles and cards Allen Williams worked on and find any banned cards if there are any.",
    "evidence": "frame styles refers to frameVersion; cards Allen Williams worked on refers to artist = 'Allen Williams'; banned cards refers to status = 'Banned'",
    "SQL": "SELECT DISTINCT T1.frameVersion, T1.name , IIF(T2.status = 'Banned', T1.name, 'NO') FROM cards AS T1 INNER JOIN legalities AS T2 ON T1.uuid = T2.uuid WHERE T1.artist = 'Allen Williams'",
    "difficulty": "moderate"
  },
  {
    "question_id": 195,
    "db_id": "toxicology",
    "question": "What is the most common bond type?",
    "evidence": "most common bond type refers MAX(COUNT(bond_type))",
    "SQL": "SELECT T.bond_type FROM ( SELECT bond_type, COUNT(bond_id) FROM bond GROUP BY bond_type ORDER BY COUNT(bond_id) DESC LIMIT 1 ) AS T",
    "difficulty": "simple"
  },
  {
    "question_id": 197,
    "db_id": "toxicology",
    "question": "Calculate the average number of oxygen atoms in single-bonded molecules.",
    "evidence": "single-bonded molecules refers to bond_type = '-' ; average number of oxygen atom = AVG(element = 'o')",
    "SQL": "SELECT AVG(oxygen_count) FROM (SELECT T1.molecule_id, COUNT(T1.element) AS oxygen_count FROM atom AS T1 INNER JOIN bond AS T2 ON T1.molecule_id = T2.molecule_id  WHERE T2.bond_type = '-' AND T1.element = 'o'  GROUP BY T1.molecule_id) AS oxygen_counts",
    "difficulty": "moderate"
  },
  {
    "question_id": 198,
    "db_id": "toxicology",
    "question": "On average how many carcinogenic molecules are single bonded?",
    "evidence": "carcinogenic molecules refers to label = '+'; single-bonded refers to bond_type = '-'; average = DIVIDE(SUM(bond_type = '-'), COUNT(atom_id))",
    "SQL": "SELECT AVG(single_bond_count) FROM (SELECT T3.molecule_id, COUNT(T1.bond_type) AS single_bond_count FROM bond AS T1  INNER JOIN atom AS T2 ON T1.molecule_id = T2.molecule_id INNER JOIN molecule AS T3 ON T3.molecule_id = T2.molecule_id WHERE T1.bond_type = '-' AND T3.label = '+' GROUP BY T3.molecule_id) AS subquery",
    "difficulty": "challenging"
  },
  {
    "question_id": 200,
    "db_id": "toxicology",
    "question": "Find the triple-bonded molecules which are carcinogenic.",
    "evidence": "triple-bonded molecules refers to bond_type = '#'; carcinogenic refers to label = '+'",
    "SQL": "SELECT DISTINCT T2.molecule_id FROM bond AS T1 INNER JOIN molecule AS T2 ON T1.molecule_id = T2.molecule_id WHERE T1.bond_type = '#' AND T2.label = '+'",
    "difficulty": "simple"
  },
  {
    "question_id": 201,
    "db_id": "toxicology",
    "question": "What is the percentage of carbon in double-bond molecules?",
    "evidence": "carbon refers to element = 'c'; double-bond molecules refers to bond_type = '='; percentage = DIVIDE(SUM(element = 'c'), COUNT(atom_id))",
    "SQL": "SELECT CAST(COUNT(DISTINCT CASE WHEN T1.element = 'c' THEN T1.atom_id ELSE NULL END) AS REAL) * 100 / COUNT(DISTINCT T1.atom_id) FROM atom AS T1 INNER JOIN bond AS T2 ON T1.molecule_id = T2.molecule_id WHERE T2.bond_type = '='",
    "difficulty": "moderate"
  },
  {
    "question_id": 206,
    "db_id": "toxicology",
    "question": "What elements are in the TR004_8_9 bond atoms?",
    "evidence": "TR004_8_9 bond atoms refers to bond_id = 'TR004_8_9';",
    "SQL": "SELECT DISTINCT T1.element FROM atom AS T1 INNER JOIN connected AS T2 ON T1.atom_id = T2.atom_id WHERE T2.bond_id = 'TR004_8_9'",
    "difficulty": "challenging"
  },
  {
    "question_id": 207,
    "db_id": "toxicology",
    "question": "What elements are in a double type bond?",
    "evidence": "double type bond refers to bond_type = '=';",
    "SQL": "SELECT DISTINCT T1.element FROM atom AS T1 INNER JOIN bond AS T2 ON T1.molecule_id = T2.molecule_id INNER JOIN connected AS T3 ON T1.atom_id = T3.atom_id WHERE T2.bond_type = '='",
    "difficulty": "challenging"
  },
  {
    "question_id": 208,
    "db_id": "toxicology",
    "question": "Which type of label is the most numerous in atoms with hydrogen?",
    "evidence": "with hydrogen refers to element = 'h'; label most numerous in atoms refers to MAX(COUNT(label)); ",
    "SQL": "SELECT T.label FROM ( SELECT T2.label, COUNT(T2.molecule_id) FROM atom AS T1 INNER JOIN molecule AS T2 ON T1.molecule_id = T2.molecule_id WHERE T1.element = 'h' GROUP BY T2.label ORDER BY COUNT(T2.molecule_id) DESC LIMIT 1 ) t",
    "difficulty": "moderate"
  },
  {
    "question_id": 212,
    "db_id": "toxicology",
    "question": "Which element is the least numerous in non-carcinogenic molecules?",
    "evidence": "label = '-' means molecules are non-carcinogenic; least numerous refers to MIN(COUNT(element));",
    "SQL": "SELECT T.element FROM (SELECT T1.element, COUNT(DISTINCT T1.molecule_id) FROM atom AS T1 INNER JOIN molecule AS T2 ON T1.molecule_id = T2.molecule_id WHERE T2.label = '-' GROUP BY T1.element ORDER BY COUNT(DISTINCT T1.molecule_id) ASC LIMIT 1) t",
    "difficulty": "challenging"
  },
  {
    "question_id": 213,
    "db_id": "toxicology",
    "question": "What type of bond is there between the atoms TR004_8 and TR004_20?",
    "evidence": "type of bond refers to bond_type; between the atoms TR004_8 and TR004_20 refers to atom_id = 'TR004_8' AND atom_id2 = 'TR004_20' OR another way around",
    "SQL": "SELECT T1.bond_type FROM bond AS T1 INNER JOIN connected AS T2 ON T1.bond_id = T2.bond_id WHERE T2.atom_id = 'TR004_8' AND T2.atom_id2 = 'TR004_20' OR T2.atom_id2 = 'TR004_8' AND T2.atom_id = 'TR004_20'",
    "difficulty": "moderate"
  },
  {
    "question_id": 215,
    "db_id": "toxicology",
    "question": "How many atoms with iodine and with sulfur type elements are there in single bond molecules?",
    "evidence": "with iodine element refer to element = 'i'; with sulfur element refers to element = 's'; single type bond refers to bond_type = '-'; Should consider the distinct atoms when counting;",
    "SQL": "SELECT COUNT(DISTINCT CASE WHEN T1.element = 'i' THEN T1.atom_id ELSE NULL END) AS iodine_nums , COUNT(DISTINCT CASE WHEN T1.element = 's' THEN T1.atom_id ELSE NULL END) AS sulfur_nums FROM atom AS T1 INNER JOIN connected AS T2 ON T1.atom_id = T2.atom_id INNER JOIN bond AS T3 ON T2.bond_id = T3.bond_id WHERE T3.bond_type = '-'",
    "difficulty": "challenging"
  },
  {
    "question_id": 218,
    "db_id": "toxicology",
    "question": "What percentage of carcinogenic-type molecules does not contain fluorine?",
    "evidence": "label = '+' mean molecules are carcinogenic; contain fluorine refers to element = 'f'; percentage = DIVIDE(SUM(element = 'f') * 100, COUNT(molecule_id)) where label = '+'; Should consider the distinct atoms when counting;",
    "SQL": "SELECT CAST(COUNT(DISTINCT CASE WHEN T1.element <> 'f' THEN T2.molecule_id ELSE NULL END) AS REAL) * 100 / COUNT(DISTINCT T2.molecule_id) FROM atom AS T1 INNER JOIN molecule AS T2 ON T1.molecule_id = T2.molecule_id WHERE T2.label = '+'",
    "difficulty": "challenging"
  },
  {
    "question_id": 219,
    "db_id": "toxicology",
    "question": "What is the percentage of carcinogenic molecules in triple type bonds?",
    "evidence": "label = '+' mean molecules are carcinogenic; triple bond refers to bond_type = '#'; percentage = DIVIDE(SUM(bond_type = '#') * 100, COUNT(bond_id)) as percent where label = '+'",
    "SQL": "SELECT CAST(COUNT(DISTINCT CASE WHEN T2.label = '+' THEN T2.molecule_id ELSE NULL END) AS REAL) * 100 / COUNT(DISTINCT T2.molecule_id) FROM atom AS T1 INNER JOIN molecule AS T2 ON T1.molecule_id = T2.molecule_id INNER JOIN bond AS T3 ON T2.molecule_id = T3.molecule_id WHERE T3.bond_type = '#'",
    "difficulty": "challenging"
  },
  {
    "question_id": 220,
    "db_id": "toxicology",
    "question": "Please list top three elements of the toxicology of the molecule TR000 in alphabetical order.",
    "evidence": "TR000 is the molecule id;",
    "SQL": "SELECT DISTINCT T.element FROM atom AS T WHERE T.molecule_id = 'TR000' ORDER BY T.element LIMIT 3",
    "difficulty": "challenging"
  },
  {
    "question_id": 226,
    "db_id": "toxicology",
    "question": "What is the percentage of double bonds in the molecule TR008? Please provide your answer as a percentage with five decimal places.",
    "evidence": "double bond refers to bond_type = '='; TR008 is the molecule id; percentage = DIVIDE(SUM(bond_type = '='), COUNT(bond_id)) as percent where molecule_id = 'TR008'",
    "SQL": "SELECT ROUND(CAST(COUNT(CASE WHEN T.bond_type = '=' THEN T.bond_id ELSE NULL END) AS REAL) * 100 / COUNT(T.bond_id),5) FROM bond AS T WHERE T.molecule_id = 'TR008'",
    "difficulty": "moderate"
  },
  {
    "question_id": 227,
    "db_id": "toxicology",
    "question": "What is the percentage of molecules that are carcinogenic? Please provide your answer as a percentage with three decimal places.",
    "evidence": "label = '+' mean molecules are carcinogenic; percentage = DIVIDE(SUM(label = '+'), COUNT(molecule_id)) as percent",
    "SQL": "SELECT ROUND(CAST(COUNT(CASE WHEN T.label = '+' THEN T.molecule_id ELSE NULL END) AS REAL) * 100 / COUNT(T.molecule_id),3) FROM molecule t",
    "difficulty": "simple"
  },
  {
    "question_id": 228,
    "db_id": "toxicology",
    "question": "How much of the hydrogen in molecule TR206 is accounted for? Please provide your answer as a percentage with four decimal places.",
    "evidence": "hydrogen refers to element = 'h'; TR206 is the molecule id; percentage = DIVIDE(SUM(element = 'h'), COUNT(atom_id)) as percent where molecule_id = 'TR206'",
    "SQL": "SELECT ROUND(CAST(COUNT(CASE WHEN T.element = 'h' THEN T.atom_id ELSE NULL END) AS REAL) * 100 / COUNT(T.atom_id),4) FROM atom AS T WHERE T.molecule_id = 'TR206'",
    "difficulty": "moderate"
  },
  {
    "question_id": 230,
    "db_id": "toxicology",
    "question": "What are the elements of the toxicology and label of molecule TR060?",
    "evidence": "TR060 is the molecule id; ",
    "SQL": "SELECT DISTINCT T1.element, T2.label FROM atom AS T1 INNER JOIN molecule AS T2 ON T1.molecule_id = T2.molecule_id WHERE T2.molecule_id = 'TR060'",
    "difficulty": "challenging"
  },
  {
    "question_id": 231,
    "db_id": "toxicology",
    "question": "Which bond type accounted for the majority of the bonds found in molecule TR010 and state whether or not this molecule is carcinogenic?",
    "evidence": "TR010 is the molecule id; majority of the bond found refers to MAX(COUNT(bond_type)); ",
    "SQL": "SELECT T.bond_type FROM ( SELECT T1.bond_type, COUNT(T1.molecule_id) FROM bond AS T1  WHERE T1.molecule_id = 'TR010' GROUP BY T1.bond_type ORDER BY COUNT(T1.molecule_id) DESC LIMIT 1 ) AS T",
    "difficulty": "challenging"
  },
  {
    "question_id": 232,
    "db_id": "toxicology",
    "question": "Please list top three molecules that have single bonds between two atoms and are not carcinogenic in alphabetical order.",
    "evidence": "label = '-' means molecules are not carcinogenic; single type bond refers to bond_type = '-'; list top three molecules refers to return molecule_id and order by molecule_id;",
    "SQL": "SELECT DISTINCT T2.molecule_id FROM bond AS T1 INNER JOIN molecule AS T2 ON T1.molecule_id = T2.molecule_id WHERE T1.bond_type = '-' AND T2.label = '-' ORDER BY T2.molecule_id LIMIT 3",
    "difficulty": "moderate"
  },
  {
    "question_id": 234,
    "db_id": "toxicology",
    "question": "How many bonds which involved atom 12 does molecule TR009 have?",
    "evidence": "TR009 is the molecule id;  involved atom 12 refers to atom_id = 'TR009_12' or atom_id2 = 'TR009_12'",
    "SQL": "SELECT COUNT(T2.bond_id) FROM bond AS T1 INNER JOIN connected AS T2 ON T1.bond_id = T2.bond_id WHERE T1.molecule_id = 'TR009' AND T2.atom_id = T1.molecule_id || '_1' OR T2.atom_id2 = T1.molecule_id || '_2'",
    "difficulty": "moderate"
  },
  {
    "question_id": 236,
    "db_id": "toxicology",
    "question": "What are the bond type and the atoms of the bond ID of TR001_6_9?",
    "evidence": "atoms refer to atom_id or atom_id2",
    "SQL": "SELECT T1.bond_type, T2.atom_id, T2.atom_id2 FROM bond AS T1 INNER JOIN connected AS T2 ON T1.bond_id = T2.bond_id WHERE T2.bond_id = 'TR001_6_9'",
    "difficulty": "moderate"
  },
  {
    "question_id": 239,
    "db_id": "toxicology",
    "question": "How many connections does the atom 19 have?",
    "evidence": "connections refers to bond_id; atom 19 refers to atom_id like 'TR%_19';",
    "SQL": "SELECT COUNT(T.bond_id) FROM connected AS T WHERE SUBSTR(T.atom_id, -2) = '19'",
    "difficulty": "simple"
  },
  {
    "question_id": 240,
    "db_id": "toxicology",
    "question": "List all the elements of the toxicology of the molecule \"TR004\".",
    "evidence": "TR004 is the molecule id;",
    "SQL": "SELECT DISTINCT T.element FROM atom AS T WHERE T.molecule_id = 'TR004'",
    "difficulty": "challenging"
  },
  {
    "question_id": 242,
    "db_id": "toxicology",
    "question": "Among all the atoms from 21 to 25, list all the molecules that are carcinogenic.",
    "evidence": "atoms from 21 to 25 refers to SUBSTR(atom_id, 7, 2) between '21' and '25'; label = '+' mean molecules are carcinogenic",
    "SQL": "SELECT DISTINCT T2.molecule_id FROM atom AS T1 INNER JOIN molecule AS T2 ON T1.molecule_id = T2.molecule_id WHERE SUBSTR(T1.atom_id, -2) BETWEEN '21' AND '25' AND T2.label = '+'",
    "difficulty": "moderate"
  },
  {
    "question_id": 243,
    "db_id": "toxicology",
    "question": "What are the bonds that have phosphorus and nitrogen as their atom elements?",
    "evidence": "have phosphorus as atom elements refers to element = 'p'; have nitrogen as atom elements refers to element = 'n'",
    "SQL": "SELECT T2.bond_id FROM atom AS T1 INNER JOIN connected AS T2 ON T1.atom_id = T2.atom_id WHERE T2.bond_id IN ( SELECT T3.bond_id FROM connected AS T3 INNER JOIN atom AS T4 ON T3.atom_id = T4.atom_id WHERE T4.element = 'p' ) AND T1.element = 'n'",
    "difficulty": "moderate"
  },
  {
    "question_id": 244,
    "db_id": "toxicology",
    "question": "Is the molecule with the most double bonds carcinogenic?",
    "evidence": "double bond refers to bond_type = ' = '; label = '+' mean molecules are carcinogenic",
    "SQL": "SELECT T1.label FROM molecule AS T1 INNER JOIN ( SELECT T.molecule_id, COUNT(T.bond_type) FROM bond AS T WHERE T.bond_type = '=' GROUP BY T.molecule_id ORDER BY COUNT(T.bond_type) DESC LIMIT 1 ) AS T2 ON T1.molecule_id = T2.molecule_id",
    "difficulty": "moderate"
  },
  {
    "question_id": 245,
    "db_id": "toxicology",
    "question": "What is the average number of bonds the atoms with the element iodine have?",
    "evidence": "atoms with the element iodine refers to element = 'i'; average = DIVIDE(COUND(bond_id), COUNT(atom_id)) where element = 'i'",
    "SQL": "SELECT CAST(COUNT(T2.bond_id) AS REAL) / COUNT(T1.atom_id) FROM atom AS T1 INNER JOIN connected AS T2 ON T1.atom_id = T2.atom_id WHERE T1.element = 'i'",
    "difficulty": "moderate"
  },
  {
    "question_id": 247,
    "db_id": "toxicology",
    "question": "List all the elements of atoms that can not bond with any other atoms.",
    "evidence": " atoms cannot bond with other atoms means atom_id NOT in connected table;",
    "SQL": "SELECT DISTINCT T.element FROM atom AS T WHERE T.element NOT IN ( SELECT DISTINCT T1.element FROM atom AS T1 INNER JOIN connected AS T2 ON T1.atom_id = T2.atom_id )",
    "difficulty": "challenging"
  },
  {
    "question_id": 248,
    "db_id": "toxicology",
    "question": "What are the atoms of the triple bond with the molecule \"TR041\"?",
    "evidence": "TR041 is the molecule id; triple bond refers to bond_type = '#';",
    "SQL": "SELECT T2.atom_id, T2.atom_id2 FROM atom AS T1 INNER JOIN connected AS T2 ON T1.atom_id = T2.atom_id INNER JOIN bond AS T3 ON T2.bond_id = T3.bond_id WHERE T3.bond_type = '#' AND T3.molecule_id = 'TR041'",
    "difficulty": "simple"
  },
  {
    "question_id": 249,
    "db_id": "toxicology",
    "question": "What are the elements of the atoms of TR144_8_19?",
    "evidence": "TR144_8_19 is the bond id; ",
    "SQL": "SELECT T2.element FROM connected AS T1 INNER JOIN atom AS T2 ON T1.atom_id = T2.atom_id WHERE T1.bond_id = 'TR144_8_19'",
    "difficulty": "challenging"
  },
  {
    "question_id": 253,
    "db_id": "toxicology",
    "question": "List the elements of all the triple bonds.",
    "evidence": "triple bond refers to bond_type = '#';",
    "SQL": "SELECT DISTINCT T3.element FROM bond AS T1 INNER JOIN connected AS T2 ON T1.bond_id = T2.bond_id INNER JOIN atom AS T3 ON T2.atom_id = T3.atom_id WHERE T1.bond_type = '#'",
    "difficulty": "challenging"
  },
  {
    "question_id": 255,
    "db_id": "toxicology",
    "question": "What proportion of single bonds are carcinogenic? Please provide your answer as a percentage with five decimal places.",
    "evidence": "single bond refers to bond_type = '-'; label = '+' mean molecules are carcinogenic; proportion = DIVIDE(SUM(label = '+') * 100, COUNT(bond_id)) where bond_type = '-'",
    "SQL": "SELECT ROUND(CAST(COUNT(CASE WHEN T2.label = '+' THEN T1.bond_id ELSE NULL END) AS REAL) * 100 / COUNT(T1.bond_id),5) FROM bond AS T1 INNER JOIN molecule AS T2 ON T1.molecule_id = T2.molecule_id WHERE T1.bond_type = '-'",
    "difficulty": "moderate"
  },
  {
    "question_id": 260,
    "db_id": "toxicology",
    "question": "Calculate the total atoms with triple-bond molecules containing the element phosphorus or bromine.",
    "evidence": "triple bond refers to bond_type = '#'; phosphorus refers to element = 'p'; bromine refers to element = 'br'",
    "SQL": "SELECT COUNT(T1.atom_id) FROM atom AS T1 INNER JOIN molecule AS T2 ON T1.molecule_id = T2.molecule_id INNER JOIN bond AS T3 ON T2.molecule_id = T3.molecule_id WHERE T3.bond_type = '#' AND T1.element IN ('p', 'br')",
    "difficulty": "moderate"
  },
  {
    "question_id": 263,
    "db_id": "toxicology",
    "question": "What is the composition of element chlorine in percentage among the single bond molecules?",
    "evidence": "element chlorine refers to element = 'cl'; single bond refers to bond_type = '-'; percentage = DIVIDE(SUM(element = 'cl'), COUNT(atom_id)) as percent where bond_type = '-'",
    "SQL": "SELECT CAST(COUNT(CASE WHEN T.element = 'cl' THEN T.atom_id ELSE NULL END) AS REAL) * 100 / COUNT(T.atom_id) FROM ( SELECT T1.atom_id, T1.element FROM atom AS T1 INNER JOIN molecule AS T2 ON T1.molecule_id = T2.molecule_id INNER JOIN bond AS T3 ON T2.molecule_id = T3.molecule_id WHERE T3.bond_type = '-' ) AS T",
    "difficulty": "challenging"
  },
  {
    "question_id": 268,
    "db_id": "toxicology",
    "question": "What are the elements for bond id TR001_10_11?",
    "evidence": "TR001_10_11 is the bond id;",
    "SQL": "SELECT T2.element FROM connected AS T1 INNER JOIN atom AS T2 ON T1.atom_id = T2.atom_id WHERE T1.bond_id = 'TR001_10_11'",
    "difficulty": "challenging"
  },
  {
    "question_id": 273,
    "db_id": "toxicology",
    "question": "What is the percentage of element chlorine in carcinogenic molecules?",
    "evidence": "chlorine refers to element = 'cl'; label = '+' mean molecules are carcinogenic; percentage = DIVIDE(SUM(element = 'pb'); COUNT(molecule_id)) as percentage where label = '+'",
    "SQL": "SELECT CAST(COUNT( CASE WHEN T1.element = 'cl' THEN T1.element ELSE NULL END) AS REAL) * 100 / COUNT(T1.element) FROM atom AS T1 INNER JOIN molecule AS T2 ON T1.molecule_id = T2.molecule_id WHERE T2.label = '+'",
    "difficulty": "moderate"
  },
  {
    "question_id": 281,
    "db_id": "toxicology",
    "question": "Tally the toxicology element of the 4th atom of each molecule that was carcinogenic.",
    "evidence": "label = '+' means molecules are carcinogenic; 4th atom of each molecule refers to substr(atom_id, 7, 1) = '4'; ",
    "SQL": "SELECT DISTINCT T1.element FROM atom AS T1 INNER JOIN molecule AS T2 ON T1.molecule_id = T2.molecule_id WHERE T2.label = '+' AND SUBSTR(T1.atom_id, -1) = '4' AND LENGTH(T1.atom_id) = 7",
    "difficulty": "challenging"
  },
  {
    "question_id": 282,
    "db_id": "toxicology",
    "question": "What is the ratio of Hydrogen elements in molecule ID TR006? List the ratio with its label.",
    "evidence": "hydrogen refers to element = 'h'; ratio = DIVIDE(SUM(element = 'h'), count(element)) where molecule_id = 'TR006' ; label = '+' mean molecules are carcinogenic; label = '-' means molecules are non-carcinogenic",
    "SQL": "WITH SubQuery AS (SELECT DISTINCT T1.atom_id, T1.element, T1.molecule_id, T2.label FROM atom AS T1 INNER JOIN molecule AS T2 ON T1.molecule_id = T2.molecule_id WHERE T2.molecule_id = 'TR006') SELECT CAST(COUNT(CASE WHEN element = 'h' THEN atom_id ELSE NULL END) AS REAL) / (CASE WHEN COUNT(atom_id) = 0 THEN NULL ELSE COUNT(atom_id) END) AS ratio, label FROM SubQuery GROUP BY label",
    "difficulty": "challenging"
  },
  {
    "question_id": 327,
    "db_id": "toxicology",
    "question": "Which non-carcinogenic molecules consisted more than 5 atoms?",
    "evidence": "label = '-' means molecules are non-carcinogenic; molecules consisted more than 5 atoms refers to COUNT(molecule_id) > 5",
    "SQL": "SELECT T.molecule_id FROM ( SELECT T1.molecule_id, COUNT(T2.atom_id) FROM molecule AS T1 INNER JOIN atom AS T2 ON T1.molecule_id = T2.molecule_id WHERE T1.label = '-' GROUP BY T1.molecule_id HAVING COUNT(T2.atom_id) > 5 ) t",
    "difficulty": "moderate"
  },
  {
    "question_id": 5,
    "db_id": "california_schools",
    "question": "How many schools with an average score in Math greater than 400 in the SAT test are exclusively virtual?",
    "evidence": "Exclusively virtual refers to Virtual = 'F'",
    "SQL": "SELECT COUNT(DISTINCT T2.School) FROM satscores AS T1 INNER JOIN schools AS T2 ON T1.cds = T2.CDSCode WHERE T2.Virtual = 'F' AND T1.AvgScrMath > 400",
    "difficulty": "simple"
  },
  {
    "question_id": 11,
    "db_id": "california_schools",
    "question": "Please list the codes of the schools with a total enrollment of over 500.",
    "evidence": "Total enrollment can be represented by `Enrollment (K-12)` + `Enrollment (Ages 5-17)`",
    "SQL": "SELECT T2.CDSCode FROM schools AS T1 INNER JOIN frpm AS T2 ON T1.CDSCode = T2.CDSCode WHERE T2.`Enrollment (K-12)` + T2.`Enrollment (Ages 5-17)` > 500",
    "difficulty": "simple"
  },
  {
    "question_id": 12,
    "db_id": "california_schools",
    "question": "Among the schools with an SAT excellence rate of over 0.3, what is the highest eligible free rate for students aged 5-17?",
    "evidence": "Excellence rate = NumGE1500 / NumTstTakr; Eligible free rates for students aged 5-17 = `Free Meal Count (Ages 5-17)` / `Enrollment (Ages 5-17)`",
    "SQL": "SELECT MAX(CAST(T1.`Free Meal Count (Ages 5-17)` AS REAL) / T1.`Enrollment (Ages 5-17)`) FROM frpm AS T1 INNER JOIN satscores AS T2 ON T1.CDSCode = T2.cds WHERE CAST(T2.NumGE1500 AS REAL) / T2.NumTstTakr > 0.3",
    "difficulty": "moderate"
  },
  {
    "question_id": 17,
    "db_id": "california_schools",
    "question": "Rank schools by their average score in Writing where the score is greater than 499, showing their charter numbers.",
    "evidence": "Valid charter number means the number is not null",
    "SQL": "SELECT CharterNum, AvgScrWrite, RANK() OVER (ORDER BY AvgScrWrite DESC) AS WritingScoreRank FROM schools AS T1  INNER JOIN satscores AS T2 ON T1.CDSCode = T2.cds WHERE T2.AvgScrWrite > 499 AND CharterNum is not null",
    "difficulty": "simple"
  },
  {
    "question_id": 23,
    "db_id": "california_schools",
    "question": "List the names of schools with more than 30 difference in enrollements between K-12 and ages 5-17? Please also give the full street adress of the schools.",
    "evidence": "Diffrence in enrollement = `Enrollment (K-12)` - `Enrollment (Ages 5-17)`",
    "SQL": "SELECT T1.School, T1.Street FROM schools AS T1 INNER JOIN frpm AS T2 ON T1.CDSCode = T2.CDSCode WHERE T2.`Enrollment (K-12)` - T2.`Enrollment (Ages 5-17)` > 30",
    "difficulty": "moderate"
  },
  {
    "question_id": 24,
    "db_id": "california_schools",
    "question": "Give the names of the schools with the percent eligible for free meals in K-12 is more than 0.1 and test takers whose test score is greater than or equal to 1500?",
    "evidence": "Percent eligible for free meals = Free Meal Count (K-12) / Total (Enrollment (K-12)",
    "SQL": "SELECT T2.`School Name` FROM satscores AS T1 INNER JOIN frpm AS T2 ON T1.cds = T2.CDSCode WHERE CAST(T2.`Free Meal Count (K-12)` AS REAL) / T2.`Enrollment (K-12)` > 0.1 AND T1.NumGE1500 > 0",
    "difficulty": "moderate"
  },
  {
    "question_id": 25,
    "db_id": "california_schools",
    "question": "Name schools in Riverside which the average of average math score for SAT is grater than 400, what is the funding type of these schools?",
    "evidence": "Average of average math = sum(average math scores) / count(schools).",
    "SQL": "SELECT T1.sname, T2.`Charter Funding Type` FROM satscores AS T1 INNER JOIN frpm AS T2 ON T1.cds = T2.CDSCode WHERE T2.`District Name` LIKE 'Riverside%' GROUP BY T1.sname, T2.`Charter Funding Type` HAVING CAST(SUM(T1.AvgScrMath) AS REAL) / COUNT(T1.cds) > 400",
    "difficulty": "moderate"
  },
  {
    "question_id": 26,
    "db_id": "california_schools",
    "question": "State the names and full communication address of high schools in Monterey which has more than 800 free or reduced price meals for ages 15-17?",
    "evidence": "Full communication address should include Street, City, State and zip code if any.",
    "SQL": "SELECT T1.`School Name`, T2.Street, T2.City, T2.State, T2.Zip FROM frpm AS T1 INNER JOIN schools AS T2 ON T1.CDSCode = T2.CDSCode WHERE T2.County = 'Monterey' AND T1.`Free Meal Count (Ages 5-17)` > 800 AND T1.`School Type` = 'High Schools (Public)'",
    "difficulty": "moderate"
  },
  {
    "question_id": 27,
    "db_id": "california_schools",
    "question": "What is the average score in writing for the schools that were opened after 1991 or closed before 2000? List the school names along with the score. Also, list the communication number of the schools if there is any.",
    "evidence": "Communication number refers to phone number.",
    "SQL": "SELECT T2.School, T1.AvgScrWrite, T2.Phone FROM schools AS T2 LEFT JOIN satscores AS T1 ON T2.CDSCode = T1.cds WHERE strftime('%Y', T2.OpenDate) > '1991' OR strftime('%Y', T2.ClosedDate) < '2000'",
    "difficulty": "moderate"
  },
  {
    "question_id": 28,
    "db_id": "california_schools",
    "question": "Consider the average difference between K-12 enrollment and 15-17 enrollment of schools that are locally funded, list the names and DOC type of schools which has a difference above this average.",
    "evidence": "Difference between K-12 enrollment and 15-17 enrollment can be computed by `Enrollment (K-12)` - `Enrollment (Ages 5-17)`",
    "SQL": "SELECT T2.School, T2.DOC FROM frpm AS T1 INNER JOIN schools AS T2 ON T1.CDSCode = T2.CDSCode WHERE T2.FundingType = 'Locally funded' AND (T1.`Enrollment (K-12)` - T1.`Enrollment (Ages 5-17)`) > (SELECT AVG(T3.`Enrollment (K-12)` - T3.`Enrollment (Ages 5-17)`) FROM frpm AS T3 INNER JOIN schools AS T4 ON T3.CDSCode = T4.CDSCode WHERE T4.FundingType = 'Locally funded')",
    "difficulty": "challenging"
  },
  {
    "question_id": 31,
    "db_id": "california_schools",
    "question": "What is the eligible free rate of the 10th and 11th schools with the highest enrolment for students in grades 1 through 12?",
    "evidence": "K-12 refers to students in grades 1 through 12; Eligible free rate for K-12 = `Free Meal Count (K-12)` / `Enrollment (K-12)`",
    "SQL": "SELECT CAST(`Free Meal Count (K-12)` AS REAL) / `Enrollment (K-12)` FROM frpm ORDER BY `Enrollment (K-12)` DESC LIMIT 9, 2",
    "difficulty": "moderate"
  },
  {
    "question_id": 32,
    "db_id": "california_schools",
    "question": "What is the eligible free or reduced price meal rate for the top 5 schools in grades 1-12 with the highest free or reduced price meal count of the schools with the ownership code 66?",
    "evidence": "grades 1-12 means K-12; Eligible free or reduced price meal rate for K-12 = `FRPM Count (K-12)` / `Enrollment (K-12)`",
    "SQL": "SELECT CAST(T1.`FRPM Count (K-12)` AS REAL) / T1.`Enrollment (K-12)` FROM frpm AS T1 INNER JOIN schools AS T2 ON T1.CDSCode = T2.CDSCode WHERE T2.SOC = 66 ORDER BY T1.`FRPM Count (K-12)` DESC LIMIT 5",
    "difficulty": "moderate"
  },
  {
    "question_id": 37,
    "db_id": "california_schools",
    "question": "What is the complete address of the school with the lowest excellence rate? Indicate the Street, City, Zip and State.",
    "evidence": "Execellence Rate = NumGE1500 / NumTstTakr; complete address has Street, City, State, Zip code",
    "SQL": "SELECT T2.Street, T2.City, T2.State, T2.Zip FROM satscores AS T1 INNER JOIN schools AS T2 ON T1.cds = T2.CDSCode ORDER BY CAST(T1.NumGE1500 AS REAL) / T1.NumTstTakr ASC LIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 36,
    "db_id": "california_schools",
    "question": "Under whose administration is the school with the highest number of students scoring 1500 or more on the SAT? Indicate their full names.",
    "evidence": "full name means first name, last name; There are at most 3 administrators for each school; SAT Scores are greater or equal to 1500 refers to NumGE1500",
    "SQL": "SELECT T2.AdmFName1, T2.AdmLName1, T2.AdmFName2, T2.AdmLName2, T2.AdmFName3, T2.AdmLName3 FROM satscores AS T1 INNER JOIN schools AS T2 ON T1.cds = T2.CDSCode ORDER BY T1.NumGE1500 DESC LIMIT 1",
    "difficulty": "challenging"
  },
  {
    "question_id": 39,
    "db_id": "california_schools",
    "question": "What is the average number of test takers from Fresno schools that opened between 1/1/1980 and 12/31/1980?",
    "evidence": "between 1/1/1980 and 12/31/1980 means the year = 1980",
    "SQL": "SELECT AVG(T1.NumTstTakr) FROM satscores AS T1 INNER JOIN schools AS T2 ON T1.cds = T2.CDSCode WHERE strftime('%Y', T2.OpenDate) = '1980' AND T2.County = 'Fresno'",
    "difficulty": "simple"
  },
  {
    "question_id": 40,
    "db_id": "california_schools",
    "question": "What is the telephone number for the school with the lowest average score in reading in Fresno Unified?",
    "evidence": "Fresno Unified is a name of district;",
    "SQL": "SELECT T2.Phone FROM satscores AS T1 INNER JOIN schools AS T2 ON T1.cds = T2.CDSCode WHERE T2.District = 'Fresno Unified' AND T1.AvgScrRead IS NOT NULL ORDER BY T1.AvgScrRead ASC LIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 41,
    "db_id": "california_schools",
    "question": "List the names of virtual schools that are among the top 5 in their respective counties based on average reading scores.",
    "evidence": "Exclusively virtual refers to Virtual = 'F'; respective counties means PARTITION BY County",
    "SQL": "SELECT School FROM (SELECT T2.School,T1.AvgScrRead, RANK() OVER (PARTITION BY T2.County ORDER BY T1.AvgScrRead DESC) AS rnk FROM satscores AS T1 INNER JOIN schools AS T2 ON T1.cds = T2.CDSCode WHERE T2.Virtual = 'F' ) ranked_schools WHERE rnk <= 5",
    "difficulty": "simple"
  },
  {
    "question_id": 45,
    "db_id": "california_schools",
    "question": "What is the average writing score of each of the schools managed by Ricci Ulrich? List the schools and the corresponding average writing scores.",
    "evidence": "Usually, administrators manage the school stuff.",
    "SQL": "SELECT T2.School, T1.AvgScrWrite FROM satscores AS T1 INNER JOIN schools AS T2 ON T1.cds = T2.CDSCode WHERE T2.AdmFName1 = 'Ricci' AND T2.AdmLName1 = 'Ulrich'",
    "difficulty": "moderate"
  },
  {
    "question_id": 46,
    "db_id": "california_schools",
    "question": "Which state special schools have the highest number of enrollees from grades 1 through 12?",
    "evidence": "State Special Schools refers to DOC = 31; Grades 1 through 12 means K-12",
    "SQL": "SELECT T2.School FROM frpm AS T1 INNER JOIN schools AS T2 ON T1.CDSCode = T2.CDSCode WHERE T2.DOC = 31 ORDER BY T1.`Enrollment (K-12)` DESC LIMIT 1",
    "difficulty": "simple"
  },
  {
    "question_id": 47,
    "db_id": "california_schools",
    "question": "What is the monthly average number of schools that opened in Alameda County under the jurisdiction of the Elementary School District in 1980?",
    "evidence": "Elementary School District refers to DOC = 52; Monthly average number of schools that opened in 1980 = count(schools that opened in 1980) / 12",
    "SQL": "SELECT CAST(COUNT(School) AS REAL) / 12 FROM schools WHERE DOC = 52 AND County = 'Alameda' AND strftime('%Y', OpenDate) = '1980'",
    "difficulty": "moderate"
  },
  {
    "question_id": 48,
    "db_id": "california_schools",
    "question": "What is the ratio of merged Unified School District schools in Orange County to merged Elementary School District schools?",
    "evidence": "Elementary School District refers to DOC = 52; Unified School District refers to DOC = 54.",
    "SQL": "SELECT CAST(SUM(CASE WHEN DOC = 54 THEN 1 ELSE 0 END) AS REAL) / SUM(CASE WHEN DOC = 52 THEN 1 ELSE 0 END) FROM schools WHERE StatusType = 'Merged' AND County = 'Orange'",
    "difficulty": "moderate"
  },
  {
    "question_id": 50,
    "db_id": "california_schools",
    "question": "What is the postal street address for the school with the 7th highest Math average? Indicate the school's name.",
    "evidence": "Postal street and mailing street are synonyms.",
    "SQL": "SELECT T2.MailStreet, T2.School FROM satscores AS T1 INNER JOIN schools AS T2 ON T1.cds = T2.CDSCode ORDER BY T1.AvgScrMath DESC LIMIT 6, 1",
    "difficulty": "simple"
  },
  {
    "question_id": 62,
    "db_id": "california_schools",
    "question": "What is the total number of non-chartered schools in the county of Los Angeles with a percent (%) of eligible free meals for grades 1 through 12 that is less than 0.18%?",
    "evidence": "non-chartered schools refer to schools whose Charter = 0; K-12 means grades 1 through 12; percent of eligible free rate for K-12 = `Free Meal Count (K-12)` * 100 / `Enrollment (K-12)`",
    "SQL": "SELECT COUNT(T2.School) FROM frpm AS T1 INNER JOIN schools AS T2 ON T1.CDSCode = T2.CDSCode WHERE T2.County = 'Los Angeles' AND T2.Charter = 0 AND CAST(T1.`Free Meal Count (K-12)` AS REAL) * 100 / T1.`Enrollment (K-12)` < 0.18",
    "difficulty": "challenging"
  },
  {
    "question_id": 72,
    "db_id": "california_schools",
    "question": "How many students from the ages of 5 to 17 are enrolled at the State Special School school in Fremont for the 2014-2015 academic year?",
    "evidence": "State Special School means EdOpsCode = 'SSS'",
    "SQL": "SELECT T1.`Enrollment (Ages 5-17)` FROM frpm AS T1 INNER JOIN schools AS T2 ON T1.CDSCode = T2.CDSCode WHERE T2.EdOpsCode = 'SSS' AND T2.City = 'Fremont' AND T1.`Academic Year` BETWEEN 2014 AND 2015",
    "difficulty": "moderate"
  },
  {
    "question_id": 77,
    "db_id": "california_schools",
    "question": "Which schools served a grade span of Kindergarten to 9th grade in the county of Los Angeles and what is its Percent (%) Eligible FRPM (Ages 5-17)?",
    "evidence": "Percent (%) Eligible FRPM (Ages 5-17) can be acquired by `FRPM Count (Ages 5-17)` / `Enrollment (Ages 5-17)` * 100",
    "SQL": "SELECT T2.School, T1.`FRPM Count (Ages 5-17)` * 100 / T1.`Enrollment (Ages 5-17)` FROM frpm AS T1 INNER JOIN schools AS T2 ON T1.CDSCode = T2.CDSCode WHERE T2.County = 'Los Angeles' AND T2.GSserved = 'K-9'",
    "difficulty": "moderate"
  },
  {
    "question_id": 79,
    "db_id": "california_schools",
    "question": "Between San Diego and Santa Barbara, which county offers the most number of schools that does not offer physical building? Indicate the amount.",
    "evidence": "'Does not offer physical building' means Virtual = F in the database.",
    "SQL": "SELECT County, COUNT(Virtual) FROM schools WHERE (County = 'San Diego' OR County = 'Santa Barbara') AND Virtual = 'F' GROUP BY County ORDER BY COUNT(Virtual) DESC LIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 82,
    "db_id": "california_schools",
    "question": "What is the grade span offered in the school with the highest longitude?",
    "evidence": "the highest longitude refers to the school with the maximum absolute longitude value.",
    "SQL": "SELECT GSoffered FROM schools ORDER BY ABS(longitude) DESC LIMIT 1",
    "difficulty": "simple"
  },
  {
    "question_id": 83,
    "db_id": "california_schools",
    "question": "Of the schools that offers a magnet program serving a grade span of Kindergarten to 8th grade, how many offers Multiple Provision Types? List the number of cities that offers a Kindergarten to 8th grade span and indicate how many schools are there serving such grade span for each city.",
    "evidence": "Kindergarten to 8th grade refers to K-8; 'Offers a magnet program' means Magnet = 1; Multiple Provision Types refers to `NSLP Provision Status` = 'Multiple Provision Types'",
    "SQL": "SELECT T2.City, COUNT(T2.CDSCode) FROM frpm AS T1 INNER JOIN schools AS T2 ON T1.CDSCode = T2.CDSCode WHERE T2.Magnet = 1 AND T2.GSoffered = 'K-8' AND T1.`NSLP Provision Status` = 'Multiple Provision Types' GROUP BY T2.City",
    "difficulty": "challenging"
  },
  {
    "question_id": 85,
    "db_id": "california_schools",
    "question": "What is the Percent (%) Eligible Free (K-12) in the school administered by an administrator whose first name is Alusine. List the district code of the school.",
    "evidence": "Percent (%) Eligible Free (K-12) = `Free Meal Count (K-12)` / `Enrollment (K-12)` * 100%",
    "SQL": "SELECT T1.`Free Meal Count (K-12)` * 100 / T1.`Enrollment (K-12)`, T1.`District Code` FROM frpm AS T1 INNER JOIN schools AS T2 ON T1.CDSCode = T2.CDSCode WHERE T2.AdmFName1 = 'Alusine'",
    "difficulty": "moderate"
  },
  {
    "question_id": 87,
    "db_id": "california_schools",
    "question": "What are the valid e-mail addresses of the administrator of the school located in the San Bernardino county, City of San Bernardino City Unified that opened between 1/1/2009 to 12/31/2010 whose school types are public Intermediate/Middle Schools and Unified Schools?",
    "evidence": "Intermediate/Middle Schools refers to SOC = 62; Unified School refers to DOC = 54; years between 2009 and 2010 can refer to 'between 1/1/2009 to 12/31/2010'",
    "SQL": "SELECT T2.AdmEmail1, T2.AdmEmail2 FROM frpm AS T1 INNER JOIN schools AS T2 ON T1.CDSCode = T2.CDSCode WHERE T2.County = 'San Bernardino' AND T2.City = 'San Bernardino' AND T2.DOC = 54 AND strftime('%Y', T2.OpenDate) BETWEEN '2009' AND '2010' AND T2.SOC = 62",
    "difficulty": "challenging"
  },
  {
    "question_id": 89,
    "db_id": "financial",
    "question": "How many accounts who choose issuance after transaction are staying in East Bohemia region?",
    "evidence": "A3 contains the data of region; 'POPLATEK PO OBRATU' represents for 'issuance after transaction'.",
    "SQL": "SELECT COUNT(T2.account_id) FROM district AS T1 INNER JOIN account AS T2 ON T1.district_id = T2.district_id WHERE T1.A3 = 'east Bohemia' AND T2.frequency = 'POPLATEK PO OBRATU'",
    "difficulty": "moderate"
  },
  {
    "question_id": 92,
    "db_id": "financial",
    "question": "List out the no. of districts that have female average salary is more than 6000 but less than 10000?",
    "evidence": "A11 refers to average salary; Female mapps to gender = 'F'",
    "SQL": "SELECT COUNT(DISTINCT T2.district_id)  FROM client AS T1 INNER JOIN district AS T2 ON T1.district_id = T2.district_id WHERE T1.gender = 'F' AND T2.A11 BETWEEN 6000 AND 10000",
    "difficulty": "simple"
  },
  {
    "question_id": 93,
    "db_id": "financial",
    "question": "How many male customers who are living in North Bohemia have average salary greater than 8000?",
    "evidence": "Male means that gender = 'M'; A3 refers to region; A11 pertains to average salary.",
    "SQL": "SELECT COUNT(T1.client_id) FROM client AS T1 INNER JOIN district AS T2 ON T1.district_id = T2.district_id WHERE T1.gender = 'M' AND T2.A3 = 'north Bohemia' AND T2.A11 > 8000",
    "difficulty": "moderate"
  },
  {
    "question_id": 94,
    "db_id": "financial",
    "question": "List out the account numbers of female clients who are oldest and has lowest average salary, calculate the gap between this lowest average salary with the highest average salary?",
    "evidence": "Female means gender = 'F'; A11 refers to average salary; Gap = highest average salary - lowest average salary; If the person A's birthdate > B's birthdate, it means that person B is order than person A.",
    "SQL": "SELECT T1.account_id , ( SELECT MAX(A11) - MIN(A11) FROM district ) FROM account AS T1 INNER JOIN district AS T2 ON T1.district_id = T2.district_id INNER JOIN disp AS T3 ON T1.account_id = T3.account_id INNER JOIN client AS T4 ON T3.client_id = T4.client_id WHERE T2.district_id = ( SELECT district_id FROM client WHERE gender = 'F' ORDER BY birth_date ASC LIMIT 1 ) ORDER BY T2.A11 DESC LIMIT 1",
    "difficulty": "challenging"
  },
  {
    "question_id": 95,
    "db_id": "financial",
    "question": "List out the account numbers of clients who are youngest and have highest average salary?",
    "evidence": "If the person A's birthdate < B's birthdate, it means that person B is younger than person A; A11 refers to average salary",
    "SQL": "SELECT T1.account_id  FROM account AS T1 INNER JOIN disp AS T2 ON T1.account_id = T2.account_id INNER JOIN client AS T3 ON T2.client_id = T3.client_id INNER JOIN district AS T4 on T4.district_id = T1.district_id WHERE T2.client_id = ( SELECT client_id FROM client ORDER BY birth_date DESC LIMIT 1) GROUP BY T4.A11, T1.account_id",
    "difficulty": "moderate"
  },
  {
    "question_id": 98,
    "db_id": "financial",
    "question": "Among the accounts who have approved loan date in 1997, list out the accounts that have the lowest approved amount and choose weekly issuance statement.",
    "evidence": "'POPLATEK TYDNE' stands for weekly issuance",
    "SQL": "SELECT T2.account_id FROM loan AS T1 INNER JOIN account AS T2 ON T1.account_id = T2.account_id WHERE STRFTIME('%Y', T1.date) = '1997' AND T2.frequency = 'POPLATEK TYDNE' ORDER BY T1.amount LIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 99,
    "db_id": "financial",
    "question": "Among the accounts who have loan validity more than 12 months, list out the accounts that have the highest approved amount and have account opening date in 1993.",
    "evidence": "Loan validity more than 12 months refers to duration > 12",
    "SQL": "SELECT T1.account_id FROM loan AS T1 INNER JOIN account AS T2 ON T1.account_id = T2.account_id WHERE STRFTIME('%Y', T2.date) = '1993' AND T1.duration > 12 ORDER BY T1.amount DESC LIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 100,
    "db_id": "financial",
    "question": "Among the account opened, how many female customers who were born before 1950 and stayed in Sokolov?",
    "evidence": "Customers refer to clients; Female refers to gender = 'F'; Names of districts appear in column A2",
    "SQL": "SELECT COUNT(T2.client_id) FROM district AS T1 INNER JOIN client AS T2 ON T1.district_id = T2.district_id WHERE T2.gender = 'F' AND STRFTIME('%Y', T2.birth_date) < '1950' AND T1.A2 = 'Sokolov'",
    "difficulty": "moderate"
  },
  {
    "question_id": 112,
    "db_id": "financial",
    "question": "For the female client who was born in 1976/1/29, which district did she opened her account?",
    "evidence": "Female refers to gender = 'F'; A2 refers to district names",
    "SQL": "SELECT T1.A2 FROM district AS T1 INNER JOIN client AS T2 ON T1.district_id = T2.district_id WHERE T2.birth_date = '1976-01-29' AND T2.gender = 'F'",
    "difficulty": "simple"
  },
  {
    "question_id": 115,
    "db_id": "financial",
    "question": "For the branch which located in the south Bohemia with biggest number of inhabitants, what is the percentage of the male clients?",
    "evidence": "Percentage of the male clients = DIVIDE(COUNT(male clients), COUNT(clients)) * 100; Male refers to gender = 'M', A3 is the region name. A4 contains the information about inhabitants.",
    "SQL": "SELECT CAST(SUM(T1.gender = 'M') AS REAL) * 100 / COUNT(T1.client_id) FROM client AS T1 INNER JOIN district AS T2 ON T1.district_id = T2.district_id WHERE T2.A3 = 'south Bohemia' GROUP BY T2.A4 ORDER BY T2.A4 DESC LIMIT 1",
    "difficulty": "challenging"
  },
  {
    "question_id": 116,
    "db_id": "financial",
    "question": "For the client whose loan was approved first in 1993/7/5, what is the increase rate of his/her account balance from 1993/3/22 to 1998/12/27?",
    "evidence": "Increase rate of his/her account balance = [(balance of date A - balance of date B) / balance of Date B] * 100%",
    "SQL": "SELECT CAST((SUM(IIF(T3.date = '1998-12-27', T3.balance, 0)) - SUM(IIF(T3.date = '1993-03-22', T3.balance, 0))) AS REAL) * 100 / SUM(IIF(T3.date = '1993-03-22', T3.balance, 0)) FROM loan AS T1 INNER JOIN account AS T2 ON T1.account_id = T2.account_id INNER JOIN trans AS T3 ON T3.account_id = T2.account_id WHERE T1.date = '1993-07-05'",
    "difficulty": "challenging"
  },
  {
    "question_id": 117,
    "db_id": "financial",
    "question": "What is the percentage of loan amount that has been fully paid with no issue.",
    "evidence": "Loan paid with no issue means contract finished, no problems; status = 'A' means contract finished, no problems; Percentage of accounts by condition = [(total(amount) & condition) / (total amount)] * 100%",
    "SQL": "SELECT (CAST(SUM(CASE WHEN status = 'A' THEN amount ELSE 0 END) AS REAL) * 100) / SUM(amount) FROM loan",
    "difficulty": "moderate"
  },
  {
    "question_id": 118,
    "db_id": "financial",
    "question": "For loan amount less than USD100,000, what is the percentage of accounts that is still running with no issue.",
    "evidence": "Status = 'C' stands for running contract, ok so far; Percentage of accounts by condition = [(total(amount) & condition) / (total amount)] * 100.",
    "SQL": "SELECT CAST(SUM(status = 'C') AS REAL) * 100 / COUNT(account_id) FROM loan WHERE amount < 100000",
    "difficulty": "moderate"
  },
  {
    "question_id": 125,
    "db_id": "financial",
    "question": "For loans contracts which are still running where client are in debt, list the district of the and the state the percentage unemployment rate increment from year 1995 to 1996.",
    "evidence": "Unemployment increment rate in percentage = [(unemployment rate 2016 - unemployment rate 2015) / unemployment rate 2015] * 100; unemployment rate 2015 appears in the A12; unemployment rate 2016 appears in the A13; Loan contracts which are still running where client are in debt can be presented as status = 'D'",
    "SQL": "SELECT CAST((T3.A13 - T3.A12) AS REAL) * 100 / T3.A12 FROM loan AS T1 INNER JOIN account AS T2 ON T1.account_id = T2.account_id INNER JOIN district AS T3 ON T2.district_id = T3.district_id WHERE T1.status = 'D'",
    "difficulty": "challenging"
  },
  {
    "question_id": 128,
    "db_id": "financial",
    "question": "List the top nine districts, by descending order, from the highest to the lowest, the number of female account holders.",
    "evidence": "A2 refers to districts; Female refers to gender = 'F'",
    "SQL": "SELECT T2.A2, COUNT(T1.client_id) FROM client AS T1 INNER JOIN district AS T2 ON T1.district_id = T2.district_id WHERE T1.gender = 'F' GROUP BY T2.district_id, T2.A2 ORDER BY COUNT(T1.client_id) DESC LIMIT 9",
    "difficulty": "moderate"
  },
  {
    "question_id": 136,
    "db_id": "financial",
    "question": "Between 1/1/1995 and 12/31/1997, how many loans in the amount of at least 250,000 per account that chose monthly statement issuance were approved?",
    "evidence": "Frequency = 'POPLATEK MESICNE' stands for monthly issurance",
    "SQL": "SELECT COUNT(T1.account_id) FROM account AS T1 INNER JOIN loan AS T2 ON T1.account_id = T2.account_id WHERE T2.date BETWEEN '1995-01-01' AND '1997-12-31' AND T1.frequency = 'POPLATEK MESICNE' AND T2.amount >= 250000",
    "difficulty": "moderate"
  },
  {
    "question_id": 137,
    "db_id": "financial",
    "question": "How many accounts have running contracts in Branch location 1?",
    "evidence": "Status = 'C' stands for running contract, OK so far; Status = 'D' stands for running contract, client in debt",
    "SQL": "SELECT COUNT(T1.account_id) FROM account AS T1 INNER JOIN district AS T2 ON T1.district_id = T2.district_id INNER JOIN loan AS T3 ON T1.account_id = T3.account_id WHERE T1.district_id = 1 AND (T3.status = 'C' OR T3.status = 'D')",
    "difficulty": "moderate"
  },
  {
    "question_id": 138,
    "db_id": "financial",
    "question": "In the branch where the second-highest number of crimes were committed in 1995 occurred, how many male clients are there?",
    "evidence": "Male refers to gender = 'M'; A15 stands for no. of commited crimes 1995",
    "SQL": "SELECT COUNT(T1.client_id) FROM client AS T1 INNER JOIN district AS T2 ON T1.district_id = T2.district_id WHERE T1.gender = 'M' AND T2.A15 = (SELECT T3.A15 FROM district AS T3 ORDER BY T3.A15 DESC LIMIT 1, 1)",
    "difficulty": "moderate"
  },
  {
    "question_id": 129,
    "db_id": "financial",
    "question": "Which are the top ten withdrawals (non-credit card) by district names for the month of January 1996?",
    "evidence": "Non-credit card withdraws refers to type = 'VYDAJ'; January 1996 can be found by date LIKE '1996-01%' in the database; A2 means district names",
    "SQL": "SELECT DISTINCT T1.A2 FROM district AS T1 INNER JOIN account AS T2 ON T1.district_id = T2.district_id INNER JOIN trans AS T3 ON T2.account_id = T3.account_id WHERE T3.type = 'VYDAJ' AND T3.date LIKE '1996-01%' ORDER BY A2 ASC LIMIT 10",
    "difficulty": "moderate"
  },
  {
    "question_id": 137,
    "db_id": "financial",
    "question": "How many accounts have running contracts in Branch location 1?",
    "evidence": "Status = 'C' stands for running contract, OK so far; Status = 'D' stands for running contract, client in debt",
    "SQL": "SELECT COUNT(T1.account_id) FROM account AS T1 INNER JOIN district AS T2 ON T1.district_id = T2.district_id INNER JOIN loan AS T3 ON T1.account_id = T3.account_id WHERE T1.district_id = 1 AND (T3.status = 'C' OR T3.status = 'D')",
    "difficulty": "moderate"
  },
  {
    "question_id": 138,
    "db_id": "financial",
    "question": "In the branch where the second-highest number of crimes were committed in 1995 occurred, how many male clients are there?",
    "evidence": "Male refers to gender = 'M'; A15 stands for no. of commited crimes 1995",
    "SQL": "SELECT COUNT(T1.client_id) FROM client AS T1 INNER JOIN district AS T2 ON T1.district_id = T2.district_id WHERE T1.gender = 'M' AND T2.A15 = (SELECT T3.A15 FROM district AS T3 ORDER BY T3.A15 DESC LIMIT 1, 1)",
    "difficulty": "moderate"
  },
  {
    "question_id": 145,
    "db_id": "financial",
    "question": "Who are the account holder identification numbers whose who have transactions on the credit card with the amount is less than the average, in 1998?",
    "evidence": "Operation = 'VYBER KARTOU' refers to credit card withdrawal",
    "SQL": "SELECT T1.account_id FROM trans AS T1 INNER JOIN account AS T2 ON T1.account_id = T2.account_id WHERE STRFTIME('%Y', T1.date) = '1998' AND T1.operation = 'VYBER KARTOU' AND T1.amount < (SELECT AVG(amount) FROM trans WHERE STRFTIME('%Y', date) = '1998')",
    "difficulty": "moderate"
  },
  {
    "question_id": 149,
    "db_id": "financial",
    "question": "Please list the account types that are not eligible for loans, and the average income of residents in the district where the account is located exceeds $8000 but is no more than $9000.",
    "evidence": "A11 represents the average salary; Salary and income share the similar meanings; when the account type = 'OWNER', it's eligible for loans",
    "SQL": "SELECT T3.type FROM district AS T1 INNER JOIN account AS T2 ON T1.district_id = T2.district_id INNER JOIN disp AS T3 ON T2.account_id = T3.account_id WHERE T3.type != 'OWNER' AND T1.A11 BETWEEN 8000 AND 9000",
    "difficulty": "challenging"
  },
  {
    "question_id": 152,
    "db_id": "financial",
    "question": "What is the average number of crimes committed in 1995 in regions where the number exceeds 4000 and the region has accounts that are opened starting from the year 1997?",
    "evidence": "A3 refers to region names; A15 stands for the average number of crimes commited in 1995.",
    "SQL": "SELECT AVG(T1.A15) FROM district AS T1 INNER JOIN account AS T2 ON T1.district_id = T2.district_id WHERE STRFTIME('%Y', T2.date) >= '1997' AND T1.A15 > 4000",
    "difficulty": "moderate"
  },
  {
    "question_id": 159,
    "db_id": "financial",
    "question": "List all the withdrawals in cash transactions that the client with the id 3356 makes.",
    "evidence": "operation = 'VYBER' refers to withdrawal in cash",
    "SQL": "SELECT T4.trans_id FROM client AS T1 INNER JOIN disp AS T2 ON T1.client_id = T2.client_id INNER JOIN account AS T3 ON T2.account_id = T3.account_id INNER JOIN trans AS T4 ON T3.account_id = T4.account_id WHERE T1.client_id = 3356 AND T4.operation = 'VYBER'",
    "difficulty": "simple"
  },
  {
    "question_id": 168,
    "db_id": "financial",
    "question": "What percentage of clients who opened their accounts in the district with an average salary of over 10000 are women?",
    "evidence": "Female refers to gender = 'F'; Woman and female are closed; Average salary can be found in A11",
    "SQL": "SELECT CAST(SUM(T2.gender = 'F') AS REAL) * 100 / COUNT(T2.client_id) FROM district AS T1 INNER JOIN client AS T2 ON T1.district_id = T2.district_id WHERE T1.A11 > 10000",
    "difficulty": "moderate"
  },
  {
    "question_id": 169,
    "db_id": "financial",
    "question": "What was the growth rate of the total amount of loans across all accounts for a male client between 1996 and 1997?",
    "evidence": "Growth rate = (sum of amount_1997 - sum of amount_1996) / (sum of amount_1996) * 100%; Male refers to gender = 'M'",
    "SQL": "SELECT CAST((SUM(CASE WHEN STRFTIME('%Y', T1.date) = '1997' THEN T1.amount ELSE 0 END) - SUM(CASE WHEN STRFTIME('%Y', T1.date) = '1996' THEN T1.amount ELSE 0 END)) AS REAL) * 100 / SUM(CASE WHEN STRFTIME('%Y', T1.date) = '1996' THEN T1.amount ELSE 0 END) FROM loan AS T1 INNER JOIN account AS T2 ON T1.account_id = T2.account_id INNER JOIN disp AS T3 ON T3.account_id = T2.account_id INNER JOIN client AS T4 ON T4.client_id = T3.client_id WHERE T4.gender = 'M' AND T3.type = 'OWNER'",
    "difficulty": "challenging"
  },
  {
    "question_id": 173,
    "db_id": "financial",
    "question": "How often does account number 3 request an account statement to be released? What was the aim of debiting 3539 in total?",
    "evidence": "k_symbol refers to the purpose of payments",
    "SQL": "SELECT T1.frequency, T2.k_symbol FROM account AS T1 INNER JOIN (SELECT account_id, k_symbol, SUM(amount) AS total_amount FROM `order` GROUP BY account_id, k_symbol) AS T2 ON T1.account_id = T2.account_id WHERE T1.account_id = 3 AND T2.total_amount = 3539",
    "difficulty": "challenging"
  },
  {
    "question_id": 186,
    "db_id": "financial",
    "question": "What percentage of male clients request for weekly statements to be issued?",
    "evidence": "Percentage of male clients = [count(male clients who requested weekly statements / count(clients who requested weekly statements)] * 100%; Male means gender = 'M'; 'POPLATEK TYDNE' stands for weekly issuance",
    "SQL": "SELECT CAST(SUM(T1.gender = 'M') AS REAL) * 100 / COUNT(T1.client_id) FROM client AS T1 INNER JOIN district AS T3 ON T1.district_id = T3.district_id INNER JOIN account AS T2 ON T2.district_id = T3.district_id INNER JOIN disp as T4 on T1.client_id = T4.client_id AND T2.account_id = T4.account_id WHERE T2.frequency = 'POPLATEK TYDNE'",
    "difficulty": "moderate"
  },
  {
    "question_id": 189,
    "db_id": "financial",
    "question": "Name the account numbers of female clients who are oldest and have lowest average salary?",
    "evidence": "Female refers to 'F' in the gender; A11 contains information about average salary",
    "SQL": "SELECT T3.account_id FROM client AS T1 INNER JOIN district AS T2 ON T1.district_id = T2.district_id INNER JOIN account AS T3 ON T2.district_id = T3.district_id INNER JOIN disp AS T4 ON T1.client_id = T4.client_id AND T4.account_id = T3.account_id  WHERE T1.gender = 'F' ORDER BY T1.birth_date ASC, T2.A11 ASC LIMIT 1",
    "difficulty": "moderate"
  },
  {
    "question_id": 192,
    "db_id": "financial",
    "question": "What is the average amount of loan which are still on running contract with statement issuance after each transaction?",
    "evidence": "status = 'C' stands for running contract, OK so far; status = 'D' stands for running contract, client in debt. 'POPLATEK PO OBRATU' stands for issuance after transaction",
    "SQL": "SELECT AVG(T2.amount) FROM account AS T1 INNER JOIN loan AS T2 ON T1.account_id = T2.account_id WHERE T2.status IN ('C', 'D') AND T1.frequency = 'POPLATEK PO OBRATU'",
    "difficulty": "moderate"
  },
  {
    "question_id": 194,
    "db_id": "financial",
    "question": "Provide the IDs and age of the client with high level credit card, which is eligible for loans.",
    "evidence": "the credit card is high-level refers to card.type = 'gold'; eligible for loans refers to disp.type = 'OWNER'",
    "SQL": "SELECT T1.client_id, STRFTIME('%Y', CURRENT_TIMESTAMP) - STRFTIME('%Y', T3.birth_date) FROM disp AS T1 INNER JOIN card AS T2 ON T2.disp_id = T1.disp_id INNER JOIN client AS T3 ON T1.client_id = T3.client_id WHERE T2.type = 'gold' AND T1.type = 'OWNER'",
    "difficulty": "moderate"
  }
]
