[
    {
        "task_id": 10000,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "For products whose {{filter_key}} is {{condition}}, count the number of records for tops, bottoms, and the other attributes. There is no need for a filtering except for attribute set and {{filter_key}}. Note that the filtering of the {{filter_key}} might not be visible by default. Final answer should be in the following format by carefully following the instructions on the use of commas and spaces: 'tops: ${number_of_tops}, bottoms: ${number_of_bottoms}, others: ${number_of_others}'.",
        "intent": "For products whose color is black, count the number of records for tops, bottoms, and the other attributes. There is no need for a filtering except for attribute set and color. Note that the filtering of the color might not be visible by default. Final answer should be in the following format by carefully following the instructions on the use of commas and spaces: 'tops: ${number_of_tops}, bottoms: ${number_of_bottoms}, others: ${number_of_others}'.",
        "required_obs": "any",
        "type_main": "others",
        "type_sub": "calc",
        "description": "Go to 'catalog' -> 'products', filter by attribute set and {{filter_key}}, check 'X records found' at the top. Note that color should be added via 'Columns' tab",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product/",
            "difficulty": "hard",
            "filter_key": "color",
            "condition": "black",
            "answer": "tops: 170, bottoms: 94, others: 1"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "tops: 170, bottoms: 94, others: 1"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "tops: 170, bottoms: 94, others: 1"
        }
    },
    {
        "task_id": 10001,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "For products whose {{filter_key}} is {{condition}}, count the number of records for tops, bottoms, and the other attributes. There is no need for a filtering except for attribute set and {{filter_key}}. Note that the filtering of the {{filter_key}} might not be visible by default. Final answer should be in the following format by carefully following the instructions on the use of commas and spaces: 'tops: ${number_of_tops}, bottoms: ${number_of_bottoms}, others: ${number_of_others}'.",
        "intent": "For products whose color is blue, count the number of records for tops, bottoms, and the other attributes. There is no need for a filtering except for attribute set and color. Note that the filtering of the color might not be visible by default. Final answer should be in the following format by carefully following the instructions on the use of commas and spaces: 'tops: ${number_of_tops}, bottoms: ${number_of_bottoms}, others: ${number_of_others}'.",
        "required_obs": "any",
        "type_main": "others",
        "type_sub": "calc",
        "description": "Go to 'catalog' -> 'products', filter by attribute set and {{filter_key}}, check 'X records found' at the top. Note that color should be added via 'Columns' tab",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product/",
            "difficulty": "hard",
            "filter_key": "color",
            "condition": "blue",
            "answer": "tops: 233, bottoms: 106, others: 3"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "tops: 233, bottoms: 106, others: 3"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "tops: 233, bottoms: 106, others: 3"
        }
    },
    {
        "task_id": 10002,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "For products whose {{filter_key}} is {{condition}}, count the number of records for tops, bottoms, and the other attributes. There is no need for a filtering except for attribute set and {{filter_key}}. Note that the filtering of the {{filter_key}} might not be visible by default. Final answer should be in the following format by carefully following the instructions on the use of commas and spaces: 'tops: ${number_of_tops}, bottoms: ${number_of_bottoms}, others: ${number_of_others}'.",
        "intent": "For products whose color is red, count the number of records for tops, bottoms, and the other attributes. There is no need for a filtering except for attribute set and color. Note that the filtering of the color might not be visible by default. Final answer should be in the following format by carefully following the instructions on the use of commas and spaces: 'tops: ${number_of_tops}, bottoms: ${number_of_bottoms}, others: ${number_of_others}'.",
        "required_obs": "any",
        "type_main": "others",
        "type_sub": "calc",
        "description": "Go to 'catalog' -> 'products', filter by attribute set and {{filter_key}}, check 'X records found' at the top. Note that color should be added via 'Columns' tab",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product/",
            "difficulty": "hard",
            "filter_key": "color",
            "condition": "red",
            "answer": "tops: 193, bottoms: 56, others: 3"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "tops: 193, bottoms: 56, others: 3"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "tops: 193, bottoms: 56, others: 3"
        }
    },
    {
        "task_id": 10003,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "For products whose {{filter_key}} is {{condition}}, count the number of records for tops, bottoms, and the other attributes. There is no need for a filtering except for attribute set and {{filter_key}}. Note that the filtering of the {{filter_key}} might not be visible by default. Final answer should be in the following format by carefully following the instructions on the use of commas and spaces: 'tops: ${number_of_tops}, bottoms: ${number_of_bottoms}, others: ${number_of_others}'.",
        "intent": "For products whose price is below 30, count the number of records for tops, bottoms, and the other attributes. There is no need for a filtering except for attribute set and price. Note that the filtering of the price might not be visible by default. Final answer should be in the following format by carefully following the instructions on the use of commas and spaces: 'tops: ${number_of_tops}, bottoms: ${number_of_bottoms}, others: ${number_of_others}'.",
        "required_obs": "any",
        "type_main": "others",
        "type_sub": "calc",
        "description": "Go to 'catalog' -> 'products', filter by attribute set and {{filter_key}}, check 'X records found' at the top. Note that color should be added via 'Columns' tab",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product/",
            "difficulty": "hard",
            "filter_key": "price",
            "condition": "below 30",
            "answer": "tops: 432, bottoms: 120, others: 18"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "tops: 432, bottoms: 120, others: 18"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "tops: 432, bottoms: 120, others: 18"
        }
    },
    {
        "task_id": 10004,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "For products whose {{filter_key}} is {{condition}}, count the number of records for tops, bottoms, and the other attributes. There is no need for a filtering except for attribute set and {{filter_key}}. Note that the filtering of the {{filter_key}} might not be visible by default. Final answer should be in the following format by carefully following the instructions on the use of commas and spaces: 'tops: ${number_of_tops}, bottoms: ${number_of_bottoms}, others: ${number_of_others}'.",
        "intent": "For products whose price is between 30 and 100, count the number of records for tops, bottoms, and the other attributes. There is no need for a filtering except for attribute set and price. Note that the filtering of the price might not be visible by default. Final answer should be in the following format by carefully following the instructions on the use of commas and spaces: 'tops: ${number_of_tops}, bottoms: ${number_of_bottoms}, others: ${number_of_others}'.",
        "required_obs": "any",
        "type_main": "others",
        "type_sub": "calc",
        "description": "Go to 'catalog' -> 'products', filter by attribute set and {{filter_key}}, check 'X records found' at the top. Note that color should be added via 'Columns' tab",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product/",
            "difficulty": "hard",
            "filter_key": "price",
            "condition": "between 30 and 100",
            "answer": "tops: 1029, bottoms: 412, others: 26"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "tops: 1029, bottoms: 412, others: 26"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "tops: 1029, bottoms: 412, others: 26"
        }
    },
    {
        "task_id": 10010,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "Considering only transactions with a status of 'Complete', which month has the highest average purchase amount between {{period1}} and {{period2}} in 2022? Final answer should be 'month, amount' (separated by a comma and a space, without '$') without reasoning. Round the purchase amount to the nearest integer.",
        "intent": "Considering only transactions with a status of 'Complete', which month has the highest average purchase amount between January and April in 2022? Final answer should be 'month, amount' (separated by a comma and a space, without '$') without reasoning. Round the purchase amount to the nearest integer.",
        "required_obs": "any",
        "type_main": "calc",
        "type_sub": "",
        "description": "Go to 'sales' -> 'orders', filter by status and month, and calculate the average of 'Grand Total'. You will get the following numbers... Jan:144.7(=1591.9/11), Feb:132.9(=2125.7/16), Mar:131.6(=1841.8/14), Apr:155.7(=1090.0/7), May:149.2(=1193.6/8), Jun:107.0(=1390.7/13), Jul:142.0(=1278.1/9), Aug:119.3(=954.5/8), Sep:136.2(=1361.7/10), Oct:111.2(=444.8/4), Nov:110.1(=550.7/5), Dec:161.2(=1612.4/10)",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
            "difficulty": "medium",
            "period1": "January",
            "period2": "April",
            "checkpoint": "",
            "answer_month": "April",
            "answer_amount": "156"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "April, 156"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "April, 156"
        }
    },
    {
        "task_id": 10011,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "Considering only transactions with a status of 'Complete', which month has the highest average purchase amount between {{period1}} and {{period2}} in 2022? Final answer should be 'month, amount' (separated by a comma and a space, without '$') without reasoning. Round the purchase amount to the nearest integer.",
        "intent": "Considering only transactions with a status of 'Complete', which month has the highest average purchase amount between May and August in 2022? Final answer should be 'month, amount' (separated by a comma and a space, without '$') without reasoning. Round the purchase amount to the nearest integer.",
        "required_obs": "any",
        "type_main": "calc",
        "type_sub": "",
        "description": "Go to 'sales' -> 'orders', filter by status and month, and calculate the average of 'Grand Total'. You will get the following numbers... Jan:144.7(=1591.9/11), Feb:132.9(=2125.7/16), Mar:131.6(=1841.8/14), Apr:155.7(=1090.0/7), May:149.2(=1193.6/8), Jun:107.0(=1390.7/13), Jul:142.0(=1278.1/9), Aug:119.3(=954.5/8), Sep:136.2(=1361.7/10), Oct:111.2(=444.8/4), Nov:110.1(=550.7/5), Dec:161.2(=1612.4/10)",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
            "difficulty": "medium",
            "period1": "May",
            "period2": "August",
            "checkpoint": "",
            "answer_month": "May",
            "answer_amount": "149"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "May, 149"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "May, 149"
        }
    },
    {
        "task_id": 10012,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "Considering only transactions with a status of 'Complete', which month has the highest average purchase amount between {{period1}} and {{period2}} in 2022? Final answer should be 'month, amount' (separated by a comma and a space, without '$') without reasoning. Round the purchase amount to the nearest integer.",
        "intent": "Considering only transactions with a status of 'Complete', which month has the highest average purchase amount between September and December in 2022? Final answer should be 'month, amount' (separated by a comma and a space, without '$') without reasoning. Round the purchase amount to the nearest integer.",
        "required_obs": "any",
        "type_main": "calc",
        "type_sub": "",
        "description": "Go to 'sales' -> 'orders', filter by status and month, and calculate the average of 'Grand Total'. You will get the following numbers... Jan:144.7(=1591.9/11), Feb:132.9(=2125.7/16), Mar:131.6(=1841.8/14), Apr:155.7(=1090.0/7), May:149.2(=1193.6/8), Jun:107.0(=1390.7/13), Jul:142.0(=1278.1/9), Aug:119.3(=954.5/8), Sep:136.2(=1361.7/10), Oct:111.2(=444.8/4), Nov:110.1(=550.7/5), Dec:161.2(=1612.4/10)",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
            "difficulty": "medium",
            "period1": "September",
            "period2": "December",
            "checkpoint": "",
            "answer_month": "December",
            "answer_amount": "161"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "December, 161"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "December, 161"
        }
    },
    {
        "task_id": 10013,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "Considering only transactions with a status of 'Complete', which month has the highest average purchase amount between {{period1}} and {{period2}} in 2022? Final answer should be 'month, amount' (separated by a comma and a space, without '$') without reasoning. Round the purchase amount to the nearest integer.",
        "intent": "Considering only transactions with a status of 'Complete', which month has the highest average purchase amount between January and June in 2022? Final answer should be 'month, amount' (separated by a comma and a space, without '$') without reasoning. Round the purchase amount to the nearest integer.",
        "required_obs": "any",
        "type_main": "calc",
        "type_sub": "",
        "description": "Go to 'sales' -> 'orders', filter by status and month, and calculate the average of 'Grand Total'. You will get the following numbers... Jan:144.7(=1591.9/11), Feb:132.9(=2125.7/16), Mar:131.6(=1841.8/14), Apr:155.7(=1090.0/7), May:149.2(=1193.6/8), Jun:107.0(=1390.7/13), Jul:142.0(=1278.1/9), Aug:119.3(=954.5/8), Sep:136.2(=1361.7/10), Oct:111.2(=444.8/4), Nov:110.1(=550.7/5), Dec:161.2(=1612.4/10)",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
            "difficulty": "medium",
            "period1": "January",
            "period2": "June",
            "checkpoint": "",
            "answer_month": "April",
            "answer_amount": "156"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "April, 156"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "April, 156"
        }
    },
    {
        "task_id": 10014,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "Considering only transactions with a status of 'Complete', which month has the highest average purchase amount between {{period1}} and {{period2}} in 2022? Final answer should be 'month, amount' (separated by a comma and a space, without '$') without reasoning. Round the purchase amount to the nearest integer.",
        "intent": "Considering only transactions with a status of 'Complete', which month has the highest average purchase amount between January and December in 2022? Final answer should be 'month, amount' (separated by a comma and a space, without '$') without reasoning. Round the purchase amount to the nearest integer.",
        "required_obs": "any",
        "type_main": "calc",
        "type_sub": "",
        "description": "Go to 'sales' -> 'orders', filter by status and month, and calculate the average of 'Grand Total'. You will get the following numbers... Jan:144.7(=1591.9/11), Feb:132.9(=2125.7/16), Mar:131.6(=1841.8/14), Apr:155.7(=1090.0/7), May:149.2(=1193.6/8), Jun:107.0(=1390.7/13), Jul:142.0(=1278.1/9), Aug:119.3(=954.5/8), Sep:136.2(=1361.7/10), Oct:111.2(=444.8/4), Nov:110.1(=550.7/5), Dec:161.2(=1612.4/10)",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
            "difficulty": "medium",
            "period1": "January",
            "period2": "December",
            "checkpoint": "",
            "answer_month": "December",
            "answer_amount": "161"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "December, 161"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "December, 161"
        }
    },
    {
        "task_id": 10020,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/customer/index/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "Who is the first, tenth, and last person in alphabetical order among those living in {{state1}}, {{state2}}, {{state3}} or {{state4}}? There are sometimes multiple people with the same name in different states. In that case, you should consider them as different people. Final answer should be in the following format by carefully following the instructions on the use of commas and spaces: '(1) ${first person} (2) ${tenth person} (3) ${last person}'",
        "intent": "Who is the first, tenth, and last person in alphabetical order among those living in California, Florida, Illinois or New York? There are sometimes multiple people with the same name in different states. In that case, you should consider them as different people. Final answer should be in the following format by carefully following the instructions on the use of commas and spaces: '(1) ${first person} (2) ${tenth person} (3) ${last person}'",
        "required_obs": "any",
        "type_main": "massive_memory",
        "type_sub": "",
        "description": "Go to 'Customers' -> 'All Customers', filter by the given states, and memorize the name for each state. Then sort the names in alphabetical order.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/customer/index/",
            "difficulty": "medium",
            "state1": "California",
            "state2": "Florida",
            "state3": "Illinois",
            "state4": "New York",
            "checkpoint": "Alex Johnson, Alex Martin, Alexander Thomas, Ava Brown, Emily Wilson, Emma Lopez, Isabella Santos, James Kim, Jane Doe, Jane Doe, Jane Smith, Jennifer White, Jessica Wong, John Doe, John Lee, Julia Williams, Julie Nguyen, Kate Jones, Lily Potter, Mary Martin, Maxwell Baker, Michael Nguyen, Roberto Lopez, Sam Wilson, Samantha Jones, Samantha Wu, Sarah Miller, Sophie Taylor",
            "answer_first": "Alex Johnson",
            "answer_tenth": "Jane Doe",
            "answer_last": "Sophie Taylor"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "exact_match": "(1) Alex Johnson (2) Jane Doe (3) Sophie Taylor"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "(1) Alex Johnson (2) Jane Doe (3) Sophie Taylor"
        }
    },
    {
        "task_id": 10021,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/customer/index/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "Who is the first, tenth, and last person in alphabetical order among those living in {{state1}}, {{state2}}, {{state3}} or {{state4}}? There are sometimes multiple people with the same name in different states. In that case, you should consider them as different people. Final answer should be in the following format by carefully following the instructions on the use of commas and spaces: '(1) ${first person} (2) ${tenth person} (3) ${last person}'",
        "intent": "Who is the first, tenth, and last person in alphabetical order among those living in California, Florida, New York or Texas? There are sometimes multiple people with the same name in different states. In that case, you should consider them as different people. Final answer should be in the following format by carefully following the instructions on the use of commas and spaces: '(1) ${first person} (2) ${tenth person} (3) ${last person}'",
        "required_obs": "any",
        "type_main": "massive_memory",
        "type_sub": "",
        "description": "Go to 'Customers' -> 'All Customers', filter by the given states, and memorize the name for each state. Then sort the names in alphabetical order.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/customer/index/",
            "difficulty": "medium",
            "state1": "California",
            "state2": "Florida",
            "state3": "New York",
            "state4": "Texas",
            "checkpoint": "Alex Johnson, Alex Martin, Alexander Thomas, Anna Nguyen, Ava Brown, Bob Johnson, Bob Jones, David Smith, Emma Davis, Emma Lopez, Ethan Garcia, Isabella Santos, James Baker, Jane Doe, Jane Doe, Jane Smith, Jennifer White, John Doe, John Smith, Julia Williams, Julie Nguyen, Kate Jones, Lisa Green, Lisa Kim, Mary Martin, Olivia Lee, Roberto Lopez, Sam Wilson, Samantha Jones, Samantha Nguyen, Samantha Wu, Sarah Miller, Sophie Taylor",
            "answer_first": "Alex Johnson",
            "answer_tenth": "Emma Lopez",
            "answer_last": "Sophie Taylor"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "exact_match": "(1) Alex Johnson (2) Emma Lopez (3) Sophie Taylor"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "(1) Alex Johnson (2) Emma Lopez (3) Sophie Taylor"
        }
    },
    {
        "task_id": 10022,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/customer/index/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "Who is the first, tenth, and last person in alphabetical order among those living in {{state1}}, {{state2}}, {{state3}} or {{state4}}? There are sometimes multiple people with the same name in different states. In that case, you should consider them as different people. Final answer should be in the following format by carefully following the instructions on the use of commas and spaces: '(1) ${first person} (2) ${tenth person} (3) ${last person}'",
        "intent": "Who is the first, tenth, and last person in alphabetical order among those living in Florida, Illinois, New York or Texas? There are sometimes multiple people with the same name in different states. In that case, you should consider them as different people. Final answer should be in the following format by carefully following the instructions on the use of commas and spaces: '(1) ${first person} (2) ${tenth person} (3) ${last person}'",
        "required_obs": "any",
        "type_main": "massive_memory",
        "type_sub": "",
        "description": "Go to 'Customers' -> 'All Customers', filter by the given states, and memorize the name for each state. Then sort the names in alphabetical order.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/customer/index/",
            "difficulty": "medium",
            "state1": "Florida",
            "state2": "Illinois",
            "state3": "New York",
            "state4": "Texas",
            "checkpoint": "Alex Martin, Anna Nguyen, Bob Johnson, Bob Jones, David Smith, Emily Wilson, Emma Davis, Ethan Garcia, Isabella Santos, James Baker, James Kim, Jane Doe, Jane Doe, Jessica Wong, John Doe, John Lee, John Smith, Julia Williams, Kate Jones, Lily Potter, Lisa Green, Lisa Kim, Mary Martin, Maxwell Baker, Michael Nguyen, Olivia Lee, Roberto Lopez, Samantha Jones, Samantha Nguyen, Samantha Wu, Sophie Taylor",
            "answer_first": "Alex Martin",
            "answer_tenth": "James Baker",
            "answer_last": "Sophie Taylor"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "exact_match": "(1) Alex Martin (2) James Baker (3) Sophie Taylor"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "(1) Alex Martin (2) James Baker (3) Sophie Taylor"
        }
    },
    {
        "task_id": 10023,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/customer/index/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "Who is the first, tenth, and last person in alphabetical order among those living in {{state1}}, {{state2}}, {{state3}} or {{state4}}? There are sometimes multiple people with the same name in different states. In that case, you should consider them as different people. Final answer should be in the following format by carefully following the instructions on the use of commas and spaces: '(1) ${first person} (2) ${tenth person} (3) ${last person}'",
        "intent": "Who is the first, tenth, and last person in alphabetical order among those living in Illinois, New York, Texas or Washington? There are sometimes multiple people with the same name in different states. In that case, you should consider them as different people. Final answer should be in the following format by carefully following the instructions on the use of commas and spaces: '(1) ${first person} (2) ${tenth person} (3) ${last person}'",
        "required_obs": "any",
        "type_main": "massive_memory",
        "type_sub": "",
        "description": "Go to 'Customers' -> 'All Customers', filter by the given states, and memorize the name for each state. Then sort the names in alphabetical order.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/customer/index/",
            "difficulty": "medium",
            "state1": "Illinois",
            "state2": "New York",
            "state3": "Texas",
            "state4": "Washington",
            "checkpoint": "Adam Garcia, Alex Martin, Anna Nguyen, Bob Johnson, Bob Jones, David Smith, Emily Wilson, Emma Davis, Ethan Garcia, James Baker, James Kim, Jane Doe, Jane Smith, Jessica Chang, Jessica Nguyen, Jessica Wong, John Doe, John Lee, John Smith, Julia Williams, Kate Jones, Katie Wong, Lily Potter, Lisa Green, Lisa Kim, Maxwell Baker, Michael Nguyen, Olivia Lee, Roberto Lopez, Samantha Nguyen, William Chang",
            "answer_first": "Adam Garcia",
            "answer_tenth": "James Baker",
            "answer_last": "William Chang"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "exact_match": "(1) Adam Garcia (2) James Baker (3) William Chang"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "(1) Adam Garcia (2) James Baker (3) William Chang"
        }
    },
    {
        "task_id": 10024,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/customer/index/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "Who is the first, tenth, and last person in alphabetical order among those living in {{state1}}, {{state2}}, {{state3}} or {{state4}}? There are sometimes multiple people with the same name in different states. In that case, you should consider them as different people. Final answer should be in the following format by carefully following the instructions on the use of commas and spaces: '(1) ${first person} (2) ${tenth person} (3) ${last person}'",
        "intent": "Who is the first, tenth, and last person in alphabetical order among those living in Arizona, Colorado, Massachusetts or New Jersey? There are sometimes multiple people with the same name in different states. In that case, you should consider them as different people. Final answer should be in the following format by carefully following the instructions on the use of commas and spaces: '(1) ${first person} (2) ${tenth person} (3) ${last person}'",
        "required_obs": "any",
        "type_main": "massive_memory",
        "type_sub": "",
        "description": "Go to 'Customers' -> 'All Customers', filter by the given states, and memorize the name for each state. Then sort the names in alphabetical order.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/customer/index/",
            "difficulty": "medium",
            "state1": "Arizona",
            "state2": "Colorado",
            "state3": "Massachusetts",
            "state4": "New Jersey",
            "checkpoint": "Adam Garcia, Amanda Kim, David Lee, Emily Chen, Grace Nguyen, Isaac Rodriguez, Jacob Rivera, Jason Miller, Lucy Garcia, Matthew Kim, Natalie Kim, Nathan Chen, Olivia Jackson, Robert Johnson, Sophia Young",
            "answer_first": "Adam Garcia",
            "answer_tenth": "Matthew Kim",
            "answer_last": "Sophia Young"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "exact_match": "(1) Adam Garcia (2) Matthew Kim (3) Sophia Young"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "(1) Adam Garcia (2) Matthew Kim (3) Sophia Young"
        }
    },
    {
        "task_id": 10030,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "Calculate the total amount of canceled records (in monetary terms) for each month from {{period1}} to {{period2}} in 2022. Which month has the most cancellations? Final answer should be 'month, amount_of_cancellations' (separated by a comma and a space, without '$', and formatted with a comma as a thousands separator (e.g., April, 12,345)) without reasoning. Round the purchase amount to the nearest integer.",
        "intent": "Calculate the total amount of canceled records (in monetary terms) for each month from January to April in 2022. Which month has the most cancellations? Final answer should be 'month, amount_of_cancellations' (separated by a comma and a space, without '$', and formatted with a comma as a thousands separator (e.g., April, 12,345)) without reasoning. Round the purchase amount to the nearest integer.",
        "required_obs": "any",
        "type_main": "calc",
        "type_sub": "",
        "description": "Go to 'sales' -> 'orders', filter by status and month, and calculate the sum of 'Grand Total'. You will get the following numbers... Jan($637.20,cnt=6), Feb($737.49,cnt=6), Mar($937.91,cnt=9), Apr($1353.30,cnt=12), May($649.70,cnt=7), Jun($1957.50,cnt=15), Jul($720.85,cnt=7), Aug($1568.79,cnt=11), Sep($810.50,cnt=6), Oct($584.75,cnt=5), Nov($1234.70,cnt=9), Dec($519.76,cnt=6)",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
            "difficulty": "medium",
            "period1": "January",
            "period2": "April",
            "answer": "April, 1,353"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "April, 1,353"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "April, 1,353"
        }
    },
    {
        "task_id": 10031,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "Calculate the total amount of canceled records (in monetary terms) for each month from {{period1}} to {{period2}} in 2022. Which month has the most cancellations? Final answer should be 'month, amount_of_cancellations' (separated by a comma and a space, without '$', and formatted with a comma as a thousands separator (e.g., April, 12,345)) without reasoning. Round the purchase amount to the nearest integer.",
        "intent": "Calculate the total amount of canceled records (in monetary terms) for each month from May to August in 2022. Which month has the most cancellations? Final answer should be 'month, amount_of_cancellations' (separated by a comma and a space, without '$', and formatted with a comma as a thousands separator (e.g., April, 12,345)) without reasoning. Round the purchase amount to the nearest integer.",
        "required_obs": "any",
        "type_main": "calc",
        "type_sub": "",
        "description": "Go to 'sales' -> 'orders', filter by status and month, and calculate the sum of 'Grand Total'. You will get the following numbers... Jan($637.20,cnt=6), Feb($737.49,cnt=6), Mar($937.91,cnt=9), Apr($1353.30,cnt=12), May($649.70,cnt=7), Jun($1957.50,cnt=15), Jul($720.85,cnt=7), Aug($1568.79,cnt=11), Sep($810.50,cnt=6), Oct($584.75,cnt=5), Nov($1234.70,cnt=9), Dec($519.76,cnt=6)",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
            "difficulty": "medium",
            "period1": "May",
            "period2": "August",
            "answer": "June, 1,957 |OR| June, 1,958"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "June, 1,957 |OR| June, 1,958"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "June, 1,957 |OR| June, 1,958"
        }
    },
    {
        "task_id": 10032,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "Calculate the total amount of canceled records (in monetary terms) for each month from {{period1}} to {{period2}} in 2022. Which month has the most cancellations? Final answer should be 'month, amount_of_cancellations' (separated by a comma and a space, without '$', and formatted with a comma as a thousands separator (e.g., April, 12,345)) without reasoning. Round the purchase amount to the nearest integer.",
        "intent": "Calculate the total amount of canceled records (in monetary terms) for each month from September to December in 2022. Which month has the most cancellations? Final answer should be 'month, amount_of_cancellations' (separated by a comma and a space, without '$', and formatted with a comma as a thousands separator (e.g., April, 12,345)) without reasoning. Round the purchase amount to the nearest integer.",
        "required_obs": "any",
        "type_main": "calc",
        "type_sub": "",
        "description": "Go to 'sales' -> 'orders', filter by status and month, and calculate the sum of 'Grand Total'. You will get the following numbers... Jan($637.20,cnt=6), Feb($737.49,cnt=6), Mar($937.91,cnt=9), Apr($1353.30,cnt=12), May($649.70,cnt=7), Jun($1957.50,cnt=15), Jul($720.85,cnt=7), Aug($1568.79,cnt=11), Sep($810.50,cnt=6), Oct($584.75,cnt=5), Nov($1234.70,cnt=9), Dec($519.76,cnt=6)",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
            "difficulty": "medium",
            "period1": "September",
            "period2": "December",
            "answer": "November, 1,235"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "November, 1,235"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "November, 1,235"
        }
    },
    {
        "task_id": 10033,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "Calculate the total amount of canceled records (in monetary terms) for each month from {{period1}} to {{period2}} in 2022. Which month has the most cancellations? Final answer should be 'month, amount_of_cancellations' (separated by a comma and a space, without '$', and formatted with a comma as a thousands separator (e.g., April, 12,345)) without reasoning. Round the purchase amount to the nearest integer.",
        "intent": "Calculate the total amount of canceled records (in monetary terms) for each month from January to June in 2022. Which month has the most cancellations? Final answer should be 'month, amount_of_cancellations' (separated by a comma and a space, without '$', and formatted with a comma as a thousands separator (e.g., April, 12,345)) without reasoning. Round the purchase amount to the nearest integer.",
        "required_obs": "any",
        "type_main": "calc",
        "type_sub": "",
        "description": "Go to 'sales' -> 'orders', filter by status and month, and calculate the sum of 'Grand Total'. You will get the following numbers... Jan($637.20,cnt=6), Feb($737.49,cnt=6), Mar($937.91,cnt=9), Apr($1353.30,cnt=12), May($649.70,cnt=7), Jun($1957.50,cnt=15), Jul($720.85,cnt=7), Aug($1568.79,cnt=11), Sep($810.50,cnt=6), Oct($584.75,cnt=5), Nov($1234.70,cnt=9), Dec($519.76,cnt=6)",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
            "difficulty": "medium",
            "period1": "January",
            "period2": "June",
            "answer": "June, 1,957 |OR| June, 1,958"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "June, 1,957 |OR| June, 1,958"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "June, 1,957 |OR| June, 1,958"
        }
    },
    {
        "task_id": 10034,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "Calculate the total amount of canceled records (in monetary terms) for each month from {{period1}} to {{period2}} in 2022. Which month has the most cancellations? Final answer should be 'month, amount_of_cancellations' (separated by a comma and a space, without '$', and formatted with a comma as a thousands separator (e.g., April, 12,345)) without reasoning. Round the purchase amount to the nearest integer.",
        "intent": "Calculate the total amount of canceled records (in monetary terms) for each month from January to December in 2022. Which month has the most cancellations? Final answer should be 'month, amount_of_cancellations' (separated by a comma and a space, without '$', and formatted with a comma as a thousands separator (e.g., April, 12,345)) without reasoning. Round the purchase amount to the nearest integer.",
        "required_obs": "any",
        "type_main": "calc",
        "type_sub": "",
        "description": "Go to 'sales' -> 'orders', filter by status and month, and calculate the sum of 'Grand Total'. You will get the following numbers... Jan($637.20,cnt=6), Feb($737.49,cnt=6), Mar($937.91,cnt=9), Apr($1353.30,cnt=12), May($649.70,cnt=7), Jun($1957.50,cnt=15), Jul($720.85,cnt=7), Aug($1568.79,cnt=11), Sep($810.50,cnt=6), Oct($584.75,cnt=5), Nov($1234.70,cnt=9), Dec($519.76,cnt=6)",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
            "difficulty": "medium",
            "period1": "January",
            "period2": "December",
            "answer": "June, 1,957 |OR| June, 1,958"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "June, 1,957 |OR| June, 1,958"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "June, 1,957 |OR| June, 1,958"
        }
    },
    {
        "task_id": 10040,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "Find the cheapest product for each of {{attribute1}} and {{attribute2}}. If there are multiple products with the same price for the same attribute, choose the first one in the alphabetical order. If we sell all (every colors and sizes) of the salable quantity by the specified price for both of the products, how much money do we make? Final answer should be '{amount}' without reasoning, without '$', and formatted with a comma as a thousands separator (e.g., 12,345). Round the purchase amount to the nearest integer.",
        "intent": "Find the cheapest product for each of tops and bottoms. If there are multiple products with the same price for the same attribute, choose the first one in the alphabetical order. If we sell all (every colors and sizes) of the salable quantity by the specified price for both of the products, how much money do we make? Final answer should be '{amount}' without reasoning, without '$', and formatted with a comma as a thousands separator (e.g., 12,345). Round the purchase amount to the nearest integer.",
        "required_obs": "any",
        "type_main": "calc",
        "description": "Go to 'catalog' -> 'products', filter by attribute, and find the cheapest product. Filter by the name of the found product and calculate the sum of Price * 'Salable Quantity'.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product/",
            "difficulty": "medium",
            "attribute1": "tops",
            "attribute2": "bottoms",
            "hint": "Since the prices cannot be sorted, filtering is needed to narrow down the results to a manageable number.",
            "checkpoint": "tops:18 (Atlas Fitness Tank, salable=492), bottoms: 20(Arcadio Gym Short, salable=1189) -> 18*492+20*1189=32636",
            "answer": "32636 |OR| 32,636"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "32636 |OR| 32,636"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "32636 |OR| 32,636"
        }
    },
    {
        "task_id": 10041,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "Find the cheapest product for each of {{attribute1}} and {{attribute2}}. If there are multiple products with the same price for the same attribute, choose the first one in the alphabetical order. If we sell all (every colors and sizes) of the salable quantity by the specified price for both of the products, how much money do we make? Final answer should be '{amount}' without reasoning, without '$', and formatted with a comma as a thousands separator (e.g., 12,345). Round the purchase amount to the nearest integer.",
        "intent": "Find the cheapest product for each of bag and bottoms. If there are multiple products with the same price for the same attribute, choose the first one in the alphabetical order. If we sell all (every colors and sizes) of the salable quantity by the specified price for both of the products, how much money do we make? Final answer should be '{amount}' without reasoning, without '$', and formatted with a comma as a thousands separator (e.g., 12,345). Round the purchase amount to the nearest integer.",
        "required_obs": "any",
        "type_main": "calc",
        "description": "Go to 'catalog' -> 'products', filter by attribute, and find the cheapest product. Filter by the name of the found product and calculate the sum of Price * 'Salable Quantity'.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product/",
            "difficulty": "medium",
            "attribute1": "bag",
            "attribute2": "bottoms",
            "hint": "Since the prices cannot be sorted, filtering is needed to narrow down the results to a manageable number.",
            "checkpoint": "bag: 32 (Compete Track Tote, salable=100), bottoms: 20 (Arcadio Gym Short, salable=1189) -> 32*100+20*1189=26980",
            "answer": "26980 |OR| 26,980"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "26980 |OR| 26,980"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "26980 |OR| 26,980"
        }
    },
    {
        "task_id": 10042,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "Find the cheapest product for each of {{attribute1}} and {{attribute2}}. If there are multiple products with the same price for the same attribute, choose the first one in the alphabetical order. If we sell all (every colors and sizes) of the salable quantity by the specified price for both of the products, how much money do we make? Final answer should be '{amount}' without reasoning, without '$', and formatted with a comma as a thousands separator (e.g., 12,345). Round the purchase amount to the nearest integer.",
        "intent": "Find the cheapest product for each of Gear and bottoms. If there are multiple products with the same price for the same attribute, choose the first one in the alphabetical order. If we sell all (every colors and sizes) of the salable quantity by the specified price for both of the products, how much money do we make? Final answer should be '{amount}' without reasoning, without '$', and formatted with a comma as a thousands separator (e.g., 12,345). Round the purchase amount to the nearest integer.",
        "required_obs": "any",
        "type_main": "calc",
        "description": "Go to 'catalog' -> 'products', filter by attribute, and find the cheapest product. Filter by the name of the found product and calculate the sum of Price * 'Salable Quantity'.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product/",
            "difficulty": "medium",
            "attribute1": "Gear",
            "attribute2": "bottoms",
            "hint": "Since the prices cannot be sorted, filtering is needed to narrow down the results to a manageable number.",
            "checkpoint": "Gear: 5 (Sprite Foam Yoga Brick, salable=94), bottoms: 20 (Arcadio Gym Short, salable=1189) -> 5*94+20*1189=24250",
            "answer": "24250 |OR| 24,250"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "24250 |OR| 24,250"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "24250 |OR| 24,250"
        }
    },
    {
        "task_id": 10043,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "Find the cheapest product for each of {{attribute1}} and {{attribute2}}. If there are multiple products with the same price for the same attribute, choose the first one in the alphabetical order. If we sell all (every colors and sizes) of the salable quantity by the specified price for both of the products, how much money do we make? Final answer should be '{amount}' without reasoning, without '$', and formatted with a comma as a thousands separator (e.g., 12,345). Round the purchase amount to the nearest integer.",
        "intent": "Find the cheapest product for each of tops and Gear. If there are multiple products with the same price for the same attribute, choose the first one in the alphabetical order. If we sell all (every colors and sizes) of the salable quantity by the specified price for both of the products, how much money do we make? Final answer should be '{amount}' without reasoning, without '$', and formatted with a comma as a thousands separator (e.g., 12,345). Round the purchase amount to the nearest integer.",
        "required_obs": "any",
        "type_main": "calc",
        "description": "Go to 'catalog' -> 'products', filter by attribute, and find the cheapest product. Filter by the name of the found product and calculate the sum of Price * 'Salable Quantity'.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product/",
            "difficulty": "medium",
            "attribute1": "tops",
            "attribute2": "Gear",
            "hint": "Since the prices cannot be sorted, filtering is needed to narrow down the results to a manageable number.",
            "checkpoint": "tops: 18 (Atlas Fitness Tank, salable=492), Gear: 5 (Sprite Foam Yoga Brick, salable=94) -> 18*492+5*94=9326",
            "answer": "9326 |OR| 9,326"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "9326 |OR| 9,326"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "9326 |OR| 9,326"
        }
    },
    {
        "task_id": 10044,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "Find the cheapest product for each of {{attribute1}} and {{attribute2}}. If there are multiple products with the same price for the same attribute, choose the first one in the alphabetical order. If we sell all (every colors and sizes) of the salable quantity by the specified price for both of the products, how much money do we make? Final answer should be '{amount}' without reasoning, without '$', and formatted with a comma as a thousands separator (e.g., 12,345). Round the purchase amount to the nearest integer.",
        "intent": "Find the cheapest product for each of bag and Gear. If there are multiple products with the same price for the same attribute, choose the first one in the alphabetical order. If we sell all (every colors and sizes) of the salable quantity by the specified price for both of the products, how much money do we make? Final answer should be '{amount}' without reasoning, without '$', and formatted with a comma as a thousands separator (e.g., 12,345). Round the purchase amount to the nearest integer.",
        "required_obs": "any",
        "type_main": "calc",
        "description": "Go to 'catalog' -> 'products', filter by attribute, and find the cheapest product. Filter by the name of the found product and calculate the sum of Price * 'Salable Quantity'.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product/",
            "difficulty": "medium",
            "attribute1": "bag",
            "attribute2": "Gear",
            "hint": "Since the prices cannot be sorted, some heuristic filtering is needed to narrow down the results to a manageable number.",
            "checkpoint": "bag: 32 (Compete Track Tote, salable=100), Gear: 5 (Sprite Foam Yoga Brick, salable=94) -> 32*100+5*94=3670",
            "answer": "3670 |OR| 3,670"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "3670 |OR| 3,670"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "3670 |OR| 3,670"
        }
    },
    {
        "task_id": 10050,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "Identify the person who placed the largest total purchase amount (in monetary terms) 'for each month' between {{period1}} and {{period2}} in 2022. Sort them in descending order by 'total purchase amount across all recorded time periods' (not just between {{period1}} and {{period2}}). Consider only transactions with a status of 'Complete' throughout the question. Final answer should be in the following format: '${name1}(${total amount across all period}), ${name2}(${total amount across all period}), ...'. Round the purchase amount to the nearest integer. Amounts should be without '$', formatted with a comma as a thousands separator (e.g., 12,345). If the same person has recorded maximums in more than one month, omit duplicates in the output.",
        "intent": "Identify the person who placed the largest total purchase amount (in monetary terms) 'for each month' between January and March in 2022. Sort them in descending order by 'total purchase amount across all recorded time periods' (not just between January and March). Consider only transactions with a status of 'Complete' throughout the question. Final answer should be in the following format: '${name1}(${total amount across all period}), ${name2}(${total amount across all period}), ...'. Round the purchase amount to the nearest integer. Amounts should be without '$', formatted with a comma as a thousands separator (e.g., 12,345). If the same person has recorded maximums in more than one month, omit duplicates in the output.",
        "required_obs": "any",
        "type_main": "calc",
        "type_sub": "",
        "description": "Go to 'Sales' -> 'Orders', filter by status and month for each month, and calculate the sum of 'Grand Total' for each person. For each person obtained, calculate the total amount without filtering by month. You will get the following result: Jan(Samantha Jones, sum=964.0), Feb(Michael Nguyen, sum=893.2), Mar(Grace Nguyen, sum=1069.9), Apr(Lucy Garcia, sum=899.1), May(Ava Brown, sum=1068.8), Jun(Jennifer White, sum=987.8), Jul(Jane Smith, sum=998.0), Aug(John Smith, sum=957.8), Sep(Lily Potter, sum=682.2), Oct(Jane Doe, sum=889.9), Nov(Grace Nguyen, sum=1069.9), Dec(Jason Miller, sum=842.4).",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
            "difficulty": "medium",
            "period1": "January",
            "period2": "March",
            "checkpoint": "Jan(Samantha Jones, sum=964.0), Feb(Michael Nguyen, sum=893.2), Mar(Grace Nguyen, sum=1069.9)",
            "answer": "Grace Nguyen(1,070), Samantha Jones(964), Michael Nguyen(893)"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "Grace Nguyen(1,070), Samantha Jones(964), Michael Nguyen(893)"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "Grace Nguyen(1,070), Samantha Jones(964), Michael Nguyen(893)"
        }
    },
    {
        "task_id": 10051,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "Identify the person who placed the largest total purchase amount (in monetary terms) 'for each month' between {{period1}} and {{period2}} in 2022. Sort them in descending order by 'total purchase amount across all recorded time periods' (not just between {{period1}} and {{period2}}). Consider only transactions with a status of 'Complete' throughout the question. Final answer should be in the following format: '${name1}(${total amount across all period}), ${name2}(${total amount across all period}), ...'. Round the purchase amount to the nearest integer. Amounts should be without '$', formatted with a comma as a thousands separator (e.g., 12,345). If the same person has recorded maximums in more than one month, omit duplicates in the output.",
        "intent": "Identify the person who placed the largest total purchase amount (in monetary terms) 'for each month' between January and April in 2022. Sort them in descending order by 'total purchase amount across all recorded time periods' (not just between January and April). Consider only transactions with a status of 'Complete' throughout the question. Final answer should be in the following format: '${name1}(${total amount across all period}), ${name2}(${total amount across all period}), ...'. Round the purchase amount to the nearest integer. Amounts should be without '$', formatted with a comma as a thousands separator (e.g., 12,345). If the same person has recorded maximums in more than one month, omit duplicates in the output.",
        "required_obs": "any",
        "type_main": "calc",
        "type_sub": "",
        "description": "Go to 'Sales' -> 'Orders', filter by status and month for each month, and calculate the sum of 'Grand Total' for each person. For each person obtained, calculate the total amount without filtering by month. You will get the following result: Jan(Samantha Jones, sum=964.0), Feb(Michael Nguyen, sum=893.2), Mar(Grace Nguyen, sum=1069.9), Apr(Lucy Garcia, sum=899.1), May(Ava Brown, sum=1068.8), Jun(Jennifer White, sum=987.8), Jul(Jane Smith, sum=998.0), Aug(John Smith, sum=957.8), Sep(Lily Potter, sum=682.2), Oct(Jane Doe, sum=889.9), Nov(Grace Nguyen, sum=1069.9), Dec(Jason Miller, sum=842.4).",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
            "difficulty": "medium",
            "period1": "January",
            "period2": "April",
            "checkpoint": "Jan(Samantha Jones, sum=964.0), Feb(Michael Nguyen, sum=893.2), Mar(Grace Nguyen, sum=1069.9), Apr(Lucy Garcia, sum=899.1)",
            "answer": "Grace Nguyen(1,070), Samantha Jones(964), Lucy Garcia(899), Michael Nguyen(893)"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "Grace Nguyen(1,070), Samantha Jones(964), Lucy Garcia(899), Michael Nguyen(893)"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "Grace Nguyen(1,070), Samantha Jones(964), Lucy Garcia(899), Michael Nguyen(893)"
        }
    },
    {
        "task_id": 10052,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "Identify the person who placed the largest total purchase amount (in monetary terms) 'for each month' between {{period1}} and {{period2}} in 2022. Sort them in descending order by 'total purchase amount across all recorded time periods' (not just between {{period1}} and {{period2}}). Consider only transactions with a status of 'Complete' throughout the question. Final answer should be in the following format: '${name1}(${total amount across all period}), ${name2}(${total amount across all period}), ...'. Round the purchase amount to the nearest integer. Amounts should be without '$', formatted with a comma as a thousands separator (e.g., 12,345). If the same person has recorded maximums in more than one month, omit duplicates in the output.",
        "intent": "Identify the person who placed the largest total purchase amount (in monetary terms) 'for each month' between January and June in 2022. Sort them in descending order by 'total purchase amount across all recorded time periods' (not just between January and June). Consider only transactions with a status of 'Complete' throughout the question. Final answer should be in the following format: '${name1}(${total amount across all period}), ${name2}(${total amount across all period}), ...'. Round the purchase amount to the nearest integer. Amounts should be without '$', formatted with a comma as a thousands separator (e.g., 12,345). If the same person has recorded maximums in more than one month, omit duplicates in the output.",
        "required_obs": "any",
        "type_main": "calc",
        "type_sub": "",
        "description": "Go to 'Sales' -> 'Orders', filter by status and month for each month, and calculate the sum of 'Grand Total' for each person. For each person obtained, calculate the total amount without filtering by month. You will get the following result: Jan(Samantha Jones, sum=964.0), Feb(Michael Nguyen, sum=893.2), Mar(Grace Nguyen, sum=1069.9), Apr(Lucy Garcia, sum=899.1), May(Ava Brown, sum=1068.8), Jun(Jennifer White, sum=987.8), Jul(Jane Smith, sum=998.0), Aug(John Smith, sum=957.8), Sep(Lily Potter, sum=682.2), Oct(Jane Doe, sum=889.9), Nov(Grace Nguyen, sum=1069.9), Dec(Jason Miller, sum=842.4).",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
            "difficulty": "medium",
            "period1": "January",
            "period2": "June",
            "checkpoint": "Jan(Samantha Jones, sum=964.0), Feb(Michael Nguyen, sum=893.2), Mar(Grace Nguyen, sum=1069.9), Apr(Lucy Garcia, sum=899.1), May(Ava Brown, sum=1068.8), Jun(Jennifer White, sum=987.8),",
            "answer": "Grace Nguyen(1,070), Ava Brown(1,069), Jennifer White(988), Samantha Jones(964), Lucy Garcia(899), Michael Nguyen(893)"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "Grace Nguyen(1,070), Ava Brown(1,069), Jennifer White(988), Samantha Jones(964), Lucy Garcia(899), Michael Nguyen(893)"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "Grace Nguyen(1,070), Ava Brown(1,069), Jennifer White(988), Samantha Jones(964), Lucy Garcia(899), Michael Nguyen(893)"
        }
    },
    {
        "task_id": 10053,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "Identify the person who placed the largest total purchase amount (in monetary terms) 'for each month' between {{period1}} and {{period2}} in 2022. Sort them in descending order by 'total purchase amount across all recorded time periods' (not just between {{period1}} and {{period2}}). Consider only transactions with a status of 'Complete' throughout the question. Final answer should be in the following format: '${name1}(${total amount across all period}), ${name2}(${total amount across all period}), ...'. Round the purchase amount to the nearest integer. Amounts should be without '$', formatted with a comma as a thousands separator (e.g., 12,345). If the same person has recorded maximums in more than one month, omit duplicates in the output.",
        "intent": "Identify the person who placed the largest total purchase amount (in monetary terms) 'for each month' between January and September in 2022. Sort them in descending order by 'total purchase amount across all recorded time periods' (not just between January and September). Consider only transactions with a status of 'Complete' throughout the question. Final answer should be in the following format: '${name1}(${total amount across all period}), ${name2}(${total amount across all period}), ...'. Round the purchase amount to the nearest integer. Amounts should be without '$', formatted with a comma as a thousands separator (e.g., 12,345). If the same person has recorded maximums in more than one month, omit duplicates in the output.",
        "required_obs": "any",
        "type_main": "calc",
        "type_sub": "",
        "description": "Go to 'Sales' -> 'Orders', filter by status and month for each month, and calculate the sum of 'Grand Total' for each person. For each person obtained, calculate the total amount without filtering by month. You will get the following result: Jan(Samantha Jones, sum=964.0), Feb(Michael Nguyen, sum=893.2), Mar(Grace Nguyen, sum=1069.9), Apr(Lucy Garcia, sum=899.1), May(Ava Brown, sum=1068.8), Jun(Jennifer White, sum=987.8), Jul(Jane Smith, sum=998.0), Aug(John Smith, sum=957.8), Sep(Lily Potter, sum=682.2), Oct(Jane Doe, sum=889.9), Nov(Grace Nguyen, sum=1069.9), Dec(Jason Miller, sum=842.4).",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
            "difficulty": "medium",
            "period1": "January",
            "period2": "September",
            "checkpoint": "Jan(Samantha Jones, sum=964.0), Feb(Michael Nguyen, sum=893.2), Mar(Grace Nguyen, sum=1069.9), Apr(Lucy Garcia, sum=899.1), May(Ava Brown, sum=1068.8), Jun(Jennifer White, sum=987.8), Jul(Jane Smith, sum=998.0), Aug(John Smith, sum=957.8), Sep(Lily Potter, sum=682.2)",
            "answer": "Grace Nguyen(1,070), Ava Brown(1,069), Jane Smith(998), Jennifer White(988), Samantha Jones(964), John Smith(958), Lucy Garcia(899), Michael Nguyen(893), Lily Potter(682)"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "Grace Nguyen(1,070), Ava Brown(1,069), Jane Smith(998), Jennifer White(988), Samantha Jones(964), John Smith(958), Lucy Garcia(899), Michael Nguyen(893), Lily Potter(682)"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "Grace Nguyen(1,070), Ava Brown(1,069), Jane Smith(998), Jennifer White(988), Samantha Jones(964), John Smith(958), Lucy Garcia(899), Michael Nguyen(893), Lily Potter(682)"
        }
    },
    {
        "task_id": 10054,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "Identify the person who placed the largest total purchase amount (in monetary terms) 'for each month' between {{period1}} and {{period2}} in 2022. Sort them in descending order by 'total purchase amount across all recorded time periods' (not just between {{period1}} and {{period2}}). Consider only transactions with a status of 'Complete' throughout the question. Final answer should be in the following format: '${name1}(${total amount across all period}), ${name2}(${total amount across all period}), ...'. Round the purchase amount to the nearest integer. Amounts should be without '$', formatted with a comma as a thousands separator (e.g., 12,345). If the same person has recorded maximums in more than one month, omit duplicates in the output.",
        "intent": "Identify the person who placed the largest total purchase amount (in monetary terms) 'for each month' between January and December in 2022. Sort them in descending order by 'total purchase amount across all recorded time periods' (not just between January and December). Consider only transactions with a status of 'Complete' throughout the question. Final answer should be in the following format: '${name1}(${total amount across all period}), ${name2}(${total amount across all period}), ...'. Round the purchase amount to the nearest integer. Amounts should be without '$', formatted with a comma as a thousands separator (e.g., 12,345). If the same person has recorded maximums in more than one month, omit duplicates in the output.",
        "required_obs": "any",
        "type_main": "calc",
        "type_sub": "",
        "description": "Go to 'Sales' -> 'Orders', filter by status and month for each month, and calculate the sum of 'Grand Total' for each person. For each person obtained, calculate the total amount without filtering by month. You will get the following result: Jan(Samantha Jones, sum=964.0), Feb(Michael Nguyen, sum=893.2), Mar(Grace Nguyen, sum=1069.9), Apr(Lucy Garcia, sum=899.1), May(Ava Brown, sum=1068.8), Jun(Jennifer White, sum=987.8), Jul(Jane Smith, sum=998.0), Aug(John Smith, sum=957.8), Sep(Lily Potter, sum=682.2), Oct(Jane Doe, sum=889.9), Nov(Grace Nguyen, sum=1069.9), Dec(Jason Miller, sum=842.4).",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
            "difficulty": "medium",
            "period1": "January",
            "period2": "December",
            "checkpoint": "Jan(Samantha Jones, sum=964.0), Feb(Michael Nguyen, sum=893.2), Mar(Grace Nguyen, sum=1069.9), Apr(Lucy Garcia, sum=899.1), May(Ava Brown, sum=1068.8), Jun(Jennifer White, sum=987.8), Jul(Jane Smith, sum=998.0), Aug(John Smith, sum=957.8), Sep(Lily Potter, sum=682.2), Oct(Jane Doe, sum=889.9), Nov(Grace Nguyen, sum=1069.9), Dec(Jason Miller, sum=842.4)",
            "answer": "Grace Nguyen(1,070), Ava Brown(1,069), Jane Smith(998), Jennifer White(988), Samantha Jones(964), John Smith(958), Lucy Garcia(899), Michael Nguyen(893), Jane Doe(890), Jason Miller(842), Lily Potter(682)"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "Grace Nguyen(1,070), Ava Brown(1,069), Jane Smith(998), Jennifer White(988), Samantha Jones(964), John Smith(958), Lucy Garcia(899), Michael Nguyen(893), Jane Doe(890), Jason Miller(842), Lily Potter(682)"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "Grace Nguyen(1,070), Ava Brown(1,069), Jane Smith(998), Jennifer White(988), Samantha Jones(964), John Smith(958), Lucy Garcia(899), Michael Nguyen(893), Jane Doe(890), Jason Miller(842), Lily Potter(682)"
        }
    },
    {
        "task_id": 10060,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/customer/index/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "For each zip code that belongs to {{state}}, calculate the total completed purchase amount (in monetary terms) by the people living in the zip code. Which zip has the highest purchase amount and how high is it? Final answer should be in the following format: '${zip}: ${purchase_amount}'. Round the purchase amount to the nearest integer just before answering each question. Amounts should be without '$',formatted with a comma as a thousands separator (e.g., 12,345).",
        "intent": "For each zip code that belongs to California, calculate the total completed purchase amount (in monetary terms) by the people living in the zip code. Which zip has the highest purchase amount and how high is it? Final answer should be in the following format: '${zip}: ${purchase_amount}'. Round the purchase amount to the nearest integer just before answering each question. Amounts should be without '$',formatted with a comma as a thousands separator (e.g., 12,345).",
        "required_obs": "any",
        "type_main": "calc",
        "type_sub": "massive_memory",
        "description": "Go to 'Customers' -> 'All Customers', filter by state, and memorize the zip codes that appears in the filtered table. Then go to 'Sales' -> 'Orders', filter by status and zip code (you can add the Shipping Address column from 'Columns' with gear icon. There you can type the zip code for filtering), and calculate the sum of 'Grand Total' in the filtered table.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/customer/index/",
            "difficulty": "hard",
            "state": "California",
            "hint": "Zip can be found in 'Customers', but 'edit' -> 'orders' here includes canceled orders, so you have to use 'orders' page. Identify users with the same name by email.",
            "checkpoint": "94602: Sarah Miller(helloworld@yahoo.com, cnt=8, total=$1212.6), total: 1212.6",
            "answer": "94602: 1,213"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "94602: 1,213"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "94602: 1,213"
        }
    },
    {
        "task_id": 10061,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/customer/index/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "For each zip code that belongs to {{state}}, calculate the total completed purchase amount (in monetary terms) by the people living in the zip code. Which zip has the highest purchase amount and how high is it? Final answer should be in the following format: '${zip}: ${purchase_amount}'. Round the purchase amount to the nearest integer just before answering each question. Amounts should be without '$',formatted with a comma as a thousands separator (e.g., 12,345).",
        "intent": "For each zip code that belongs to Illinois, calculate the total completed purchase amount (in monetary terms) by the people living in the zip code. Which zip has the highest purchase amount and how high is it? Final answer should be in the following format: '${zip}: ${purchase_amount}'. Round the purchase amount to the nearest integer just before answering each question. Amounts should be without '$',formatted with a comma as a thousands separator (e.g., 12,345).",
        "required_obs": "any",
        "type_main": "calc",
        "type_sub": "massive_memory",
        "description": "Go to 'Customers' -> 'All Customers', filter by state, and memorize the zip codes that appears in the filtered table. Then go to 'Sales' -> 'Orders', filter by status and zip code (you can add the Shipping Address column from 'Columns' with gear icon. There you can type the zip code for filtering), and calculate the sum of 'Grand Total' in the filtered table.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/customer/index/",
            "difficulty": "hard",
            "state": "Illinois",
            "hint": "Zip can be found in 'Customers', but 'edit' -> 'orders' here includes canceled orders, so you have to use 'orders' page. Identify users with the same name by email.",
            "checkpoint": "60606: Michael Nguyen(michael.nguyen@yahoo.com, cnt=8, total=$893.2), total: 893.2",
            "answer": "60606: 893"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "60606: 893"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "60606: 893"
        }
    },
    {
        "task_id": 10062,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/customer/index/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "For each zip code that belongs to {{state}}, calculate the total completed purchase amount (in monetary terms) by the people living in the zip code. Which zip has the highest purchase amount and how high is it? Final answer should be in the following format: '${zip}: ${purchase_amount}'. Round the purchase amount to the nearest integer just before answering each question. Amounts should be without '$',formatted with a comma as a thousands separator (e.g., 12,345).",
        "intent": "For each zip code that belongs to Colorado, calculate the total completed purchase amount (in monetary terms) by the people living in the zip code. Which zip has the highest purchase amount and how high is it? Final answer should be in the following format: '${zip}: ${purchase_amount}'. Round the purchase amount to the nearest integer just before answering each question. Amounts should be without '$',formatted with a comma as a thousands separator (e.g., 12,345).",
        "required_obs": "any",
        "type_main": "calc",
        "type_sub": "massive_memory",
        "description": "Go to 'Customers' -> 'All Customers', filter by state, and memorize the zip codes that appears in the filtered table. Then go to 'Sales' -> 'Orders', filter by status and zip code (you can add the Shipping Address column from 'Columns' with gear icon. There you can type the zip code for filtering), and calculate the sum of 'Grand Total' in the filtered table.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/customer/index/",
            "difficulty": "hard",
            "state": "Colorado",
            "hint": "Zip can be found in 'Customers', but 'edit' -> 'orders' here includes canceled orders, so you have to use 'orders' page. Identify users with the same name by email.",
            "checkpoint": "Lucy Garcia(artsygal123@hotmail.com, cnt=6, total=$899.1), Jason Miller(jason.miller@yahoo.com, cnt=5, total=$842.4), Olivia Jackson(olivia.jackson@gmail.com, cnt=0, total=$0.0), Nathan Chen(nathan.chen@gmail.com, cnt=0, total=$0.0), total: 1741.6",
            "answer": "80202: 1,742"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "80202: 1,742"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "80202: 1,742"
        }
    },
    {
        "task_id": 10063,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/customer/index/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "For each zip code that belongs to {{state}}, calculate the total completed purchase amount (in monetary terms) by the people living in the zip code. Which zip has the highest purchase amount and how high is it? Final answer should be in the following format: '${zip}: ${purchase_amount}'. Round the purchase amount to the nearest integer just before answering each question. Amounts should be without '$',formatted with a comma as a thousands separator (e.g., 12,345).",
        "intent": "For each zip code that belongs to Texas, calculate the total completed purchase amount (in monetary terms) by the people living in the zip code. Which zip has the highest purchase amount and how high is it? Final answer should be in the following format: '${zip}: ${purchase_amount}'. Round the purchase amount to the nearest integer just before answering each question. Amounts should be without '$',formatted with a comma as a thousands separator (e.g., 12,345).",
        "required_obs": "any",
        "type_main": "calc",
        "type_sub": "massive_memory",
        "description": "Go to 'Customers' -> 'All Customers', filter by state, and memorize the zip codes that appears in the filtered table. Then go to 'Sales' -> 'Orders', filter by status and zip code (you can add the Shipping Address column from 'Columns' with gear icon. There you can type the zip code for filtering), and calculate the sum of 'Grand Total' in the filtered table.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/customer/index/",
            "difficulty": "hard",
            "state": "Texas",
            "hint": "Zip can be found in 'Customers', but 'edit' -> 'orders' here includes canceled orders, so you have to use 'orders' page. Identify users with the same name by email.",
            "checkpoint": "75202: Bob Jones(bbjones@gmail.com, cnt=6, total=$530.0), total: 530.0",
            "answer": "75202: 530"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "75202: 530"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "75202: 530"
        }
    },
    {
        "task_id": 10064,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/customer/index/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "For each zip code that belongs to {{state}}, calculate the total completed purchase amount (in monetary terms) by the people living in the zip code. Which zip has the highest purchase amount and how high is it? Final answer should be in the following format: '${zip}: ${purchase_amount}'. Round the purchase amount to the nearest integer just before answering each question. Amounts should be without '$',formatted with a comma as a thousands separator (e.g., 12,345).",
        "intent": "For each zip code that belongs to New York, calculate the total completed purchase amount (in monetary terms) by the people living in the zip code. Which zip has the highest purchase amount and how high is it? Final answer should be in the following format: '${zip}: ${purchase_amount}'. Round the purchase amount to the nearest integer just before answering each question. Amounts should be without '$',formatted with a comma as a thousands separator (e.g., 12,345).",
        "required_obs": "any",
        "type_main": "calc",
        "type_sub": "massive_memory",
        "description": "Go to 'Customers' -> 'All Customers', filter by state, and memorize the zip codes that appears in the filtered table. Then go to 'Sales' -> 'Orders', filter by status and zip code (you can add the Shipping Address column from 'Columns' with gear icon. There you can type the zip code for filtering), and calculate the sum of 'Grand Total' in the filtered table.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/customer/index/",
            "difficulty": "hard",
            "state": "New York",
            "hint": "Zip can be found in 'Customers', but 'edit' -> 'orders' here includes canceled orders, so you have to use 'orders' page. Identify users with the same name by email.",
            "checkpoint": "10065: Julia Williams(jla_7781@gmail.com, cnt=2, total=$265.6), total: 265.6, 10001: John Doe(johndoe123@gmail.com, cnt=3, total=$371.0), Alex Martin(alex.martin@gmail.com, cnt=6, total=$733.0), Kate Jones(kate.jones@gmail.com, cnt=0, total=$0.0), Roberto Lopez(roberto.lopez@hotmail.com, cnt=0, total=$0.0), Jane Doe(jane.doe@gmail.com, cnt=0, total=$0.0), total: 1104.0",
            "answer": "10001: 1,104"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "10001: 1,104"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "10001: 1,104"
        }
    },
    {
        "task_id": 10070,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "I want to analyze the total purchase amount (in monetary terms) for each month between {{period1}} and {{period2}} in 2022. Sort the months in the descending order of absolute difference between complete and canceled purchase amount. Final answer should be in the following format by carefully following the instructions on the use of commas and spaces: '${month1}(${absolute gap}), ${month2}(${absolute gap}), ...'. Do all calculations in decimals and round the purchase amount to the nearest integer just before answering the question. Amounts should be without '$' and months should be without year.",
        "intent": "I want to analyze the total purchase amount (in monetary terms) for each month between January and April in 2022. Sort the months in the descending order of absolute difference between complete and canceled purchase amount. Final answer should be in the following format by carefully following the instructions on the use of commas and spaces: '${month1}(${absolute gap}), ${month2}(${absolute gap}), ...'. Do all calculations in decimals and round the purchase amount to the nearest integer just before answering the question. Amounts should be without '$' and months should be without year.",
        "required_obs": "any",
        "type_main": "calc",
        "type_sub": "",
        "description": "Go to 'sales' -> 'orders', filter by status and month, and calculate the sum of 'Grand Total'. You will get the following numbers... COMPLETE: Jan($1592), Feb($2126), Mar($1842), Apr($1090), May($1194), Jun($1391), Jul($1278), Aug($954), Sep($1362), Oct($445), Nov($551), Dec($1612), CANCELED: Jan($637), Feb($737), Mar($938), Apr($1353), May($650), Jun($1958), Jul($721), Aug($1569), Sep($810), Oct($585), Nov($1235), Dec($520) GAP: Jan($955), Feb($1388), Mar($904), Apr($263), May($544), Jun($567), Jul($557), Aug($614), Sep($551), Oct($140), Nov($684), Dec($1093)",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
            "difficulty": "hard",
            "period1": "January",
            "period2": "April",
            "checkpoint": "",
            "answer": "February(1388), January(955), March(904), April(263) |OR| February(1,388), January(955), March(904), April(263)"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "February(1388), January(955), March(904), April(263) |OR| February(1,388), January(955), March(904), April(263)"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "February(1388), January(955), March(904), April(263) |OR| February(1,388), January(955), March(904), April(263)"
        }
    },
    {
        "task_id": 10071,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "I want to analyze the total purchase amount (in monetary terms) for each month between {{period1}} and {{period2}} in 2022. Sort the months in the descending order of absolute difference between complete and canceled purchase amount. Final answer should be in the following format by carefully following the instructions on the use of commas and spaces: '${month1}(${absolute gap}), ${month2}(${absolute gap}), ...'. Do all calculations in decimals and round the purchase amount to the nearest integer just before answering the question. Amounts should be without '$' and months should be without year.",
        "intent": "I want to analyze the total purchase amount (in monetary terms) for each month between May and August in 2022. Sort the months in the descending order of absolute difference between complete and canceled purchase amount. Final answer should be in the following format by carefully following the instructions on the use of commas and spaces: '${month1}(${absolute gap}), ${month2}(${absolute gap}), ...'. Do all calculations in decimals and round the purchase amount to the nearest integer just before answering the question. Amounts should be without '$' and months should be without year.",
        "required_obs": "any",
        "type_main": "calc",
        "type_sub": "",
        "description": "Go to 'sales' -> 'orders', filter by status and month, and calculate the sum of 'Grand Total'. You will get the following numbers... COMPLETE: Jan($1592), Feb($2126), Mar($1842), Apr($1090), May($1194), Jun($1391), Jul($1278), Aug($954), Sep($1362), Oct($445), Nov($551), Dec($1612), CANCELED: Jan($637), Feb($737), Mar($938), Apr($1353), May($650), Jun($1958), Jul($721), Aug($1569), Sep($810), Oct($585), Nov($1235), Dec($520) GAP: Jan($955), Feb($1388), Mar($904), Apr($263), May($544), Jun($567), Jul($557), Aug($614), Sep($551), Oct($140), Nov($684), Dec($1093)",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
            "difficulty": "hard",
            "period1": "May",
            "period2": "August",
            "checkpoint": "",
            "answer": "August(614), June(567), July(557), May(544)"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "August(614), June(567), July(557), May(544)"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "August(614), June(567), July(557), May(544)"
        }
    },
    {
        "task_id": 10072,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "I want to analyze the total purchase amount (in monetary terms) for each month between {{period1}} and {{period2}} in 2022. Sort the months in the descending order of absolute difference between complete and canceled purchase amount. Final answer should be in the following format by carefully following the instructions on the use of commas and spaces: '${month1}(${absolute gap}), ${month2}(${absolute gap}), ...'. Do all calculations in decimals and round the purchase amount to the nearest integer just before answering the question. Amounts should be without '$' and months should be without year.",
        "intent": "I want to analyze the total purchase amount (in monetary terms) for each month between September and December in 2022. Sort the months in the descending order of absolute difference between complete and canceled purchase amount. Final answer should be in the following format by carefully following the instructions on the use of commas and spaces: '${month1}(${absolute gap}), ${month2}(${absolute gap}), ...'. Do all calculations in decimals and round the purchase amount to the nearest integer just before answering the question. Amounts should be without '$' and months should be without year.",
        "required_obs": "any",
        "type_main": "calc",
        "type_sub": "",
        "description": "Go to 'sales' -> 'orders', filter by status and month, and calculate the sum of 'Grand Total'. You will get the following numbers... COMPLETE: Jan($1592), Feb($2126), Mar($1842), Apr($1090), May($1194), Jun($1391), Jul($1278), Aug($954), Sep($1362), Oct($445), Nov($551), Dec($1612), CANCELED: Jan($637), Feb($737), Mar($938), Apr($1353), May($650), Jun($1958), Jul($721), Aug($1569), Sep($810), Oct($585), Nov($1235), Dec($520) GAP: Jan($955), Feb($1388), Mar($904), Apr($263), May($544), Jun($567), Jul($557), Aug($614), Sep($551), Oct($140), Nov($684), Dec($1093)",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
            "difficulty": "hard",
            "period1": "September",
            "period2": "December",
            "checkpoint": "",
            "answer": "December(1093), November(684), September(551), October(140) |OR| December(1,093), November(684), September(551), October(140)"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "December(1093), November(684), September(551), October(140) |OR| December(1,093), November(684), September(551), October(140)"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "December(1093), November(684), September(551), October(140) |OR| December(1,093), November(684), September(551), October(140)"
        }
    },
    {
        "task_id": 10073,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "I want to analyze the total purchase amount (in monetary terms) for each month between {{period1}} and {{period2}} in 2022. Sort the months in the descending order of absolute difference between complete and canceled purchase amount. Final answer should be in the following format by carefully following the instructions on the use of commas and spaces: '${month1}(${absolute gap}), ${month2}(${absolute gap}), ...'. Do all calculations in decimals and round the purchase amount to the nearest integer just before answering the question. Amounts should be without '$' and months should be without year.",
        "intent": "I want to analyze the total purchase amount (in monetary terms) for each month between January and June in 2022. Sort the months in the descending order of absolute difference between complete and canceled purchase amount. Final answer should be in the following format by carefully following the instructions on the use of commas and spaces: '${month1}(${absolute gap}), ${month2}(${absolute gap}), ...'. Do all calculations in decimals and round the purchase amount to the nearest integer just before answering the question. Amounts should be without '$' and months should be without year.",
        "required_obs": "any",
        "type_main": "calc",
        "type_sub": "",
        "description": "Go to 'sales' -> 'orders', filter by status and month, and calculate the sum of 'Grand Total'. You will get the following numbers... COMPLETE: Jan($1592), Feb($2126), Mar($1842), Apr($1090), May($1194), Jun($1391), Jul($1278), Aug($954), Sep($1362), Oct($445), Nov($551), Dec($1612), CANCELED: Jan($637), Feb($737), Mar($938), Apr($1353), May($650), Jun($1958), Jul($721), Aug($1569), Sep($810), Oct($585), Nov($1235), Dec($520) GAP: Jan($955), Feb($1388), Mar($904), Apr($263), May($544), Jun($567), Jul($557), Aug($614), Sep($551), Oct($140), Nov($684), Dec($1093)",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
            "difficulty": "hard",
            "period1": "January",
            "period2": "June",
            "checkpoint": "",
            "answer": "February(1388), January(955), March(904), June(567), May(544), April(263) |OR| February(1,388), January(955), March(904), June(567), May(544), April(263)"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "February(1388), January(955), March(904), June(567), May(544), April(263) |OR| February(1,388), January(955), March(904), June(567), May(544), April(263)"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "February(1388), January(955), March(904), June(567), May(544), April(263) |OR| February(1,388), January(955), March(904), June(567), May(544), April(263)"
        }
    },
    {
        "task_id": 10074,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "I want to analyze the total purchase amount (in monetary terms) for each month between {{period1}} and {{period2}} in 2022. Sort the months in the descending order of absolute difference between complete and canceled purchase amount. Final answer should be in the following format by carefully following the instructions on the use of commas and spaces: '${month1}(${absolute gap}), ${month2}(${absolute gap}), ...'. Do all calculations in decimals and round the purchase amount to the nearest integer just before answering the question. Amounts should be without '$' and months should be without year.",
        "intent": "I want to analyze the total purchase amount (in monetary terms) for each month between July and December in 2022. Sort the months in the descending order of absolute difference between complete and canceled purchase amount. Final answer should be in the following format by carefully following the instructions on the use of commas and spaces: '${month1}(${absolute gap}), ${month2}(${absolute gap}), ...'. Do all calculations in decimals and round the purchase amount to the nearest integer just before answering the question. Amounts should be without '$' and months should be without year.",
        "required_obs": "any",
        "type_main": "calc",
        "type_sub": "",
        "description": "Go to 'sales' -> 'orders', filter by status and month, and calculate the sum of 'Grand Total'. You will get the following numbers... COMPLETE: Jan($1592), Feb($2126), Mar($1842), Apr($1090), May($1194), Jun($1391), Jul($1278), Aug($954), Sep($1362), Oct($445), Nov($551), Dec($1612), CANCELED: Jan($637), Feb($737), Mar($938), Apr($1353), May($650), Jun($1958), Jul($721), Aug($1569), Sep($810), Oct($585), Nov($1235), Dec($520) GAP: Jan($955), Feb($1388), Mar($904), Apr($263), May($544), Jun($567), Jul($557), Aug($614), Sep($551), Oct($140), Nov($684), Dec($1093)",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
            "difficulty": "hard",
            "period1": "July",
            "period2": "December",
            "checkpoint": "",
            "answer": "December(1093), November(684), August(614), July(557), September(551), October(140) |OR| December(1,093), November(684), August(614), July(557), September(551), October(140)"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "December(1093), November(684), August(614), July(557), September(551), October(140) |OR| December(1,093), November(684), August(614), July(557), September(551), October(140)"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "December(1093), November(684), August(614), July(557), September(551), October(140) |OR| December(1,093), November(684), August(614), July(557), September(551), October(140)"
        }
    },
    {
        "task_id": 10080,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "Count the number of configurations registered for {{product_name1}} and {{product_name2}}. (1) Which product has more variety? (2) How many configurations are registered for the product in (1)? (3) How many reviews are assigned for the product in (1)? (4) How many 'Related Products, Up-Sells, and Cross-Sells' are registered in total for the product in (1)? Final answer should be in the following format by carefully following the instructions on the use of commas and spaces: '(1) ${product name}, (2) ${number of configs}, (3) ${number of reviews}, (4) ${total number of related, up-sell, cross-sell products}'.",
        "intent": "Count the number of configurations registered for Breathe-Easy Tank and Cronus Yoga Pant. (1) Which product has more variety? (2) How many configurations are registered for the product in (1)? (3) How many reviews are assigned for the product in (1)? (4) How many 'Related Products, Up-Sells, and Cross-Sells' are registered in total for the product in (1)? Final answer should be in the following format by carefully following the instructions on the use of commas and spaces: '(1) ${product name}, (2) ${number of configs}, (3) ${number of reviews}, (4) ${total number of related, up-sell, cross-sell products}'.",
        "required_obs": "any",
        "type_main": "long-term",
        "description": "Go to 'catalog' -> 'products', filter by product name, and press the 'Configurable Product' row. There you can find/count each numbers. Note that the color column should be added via 'Columns' tab",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product/",
            "difficulty": "hard",
            "product_name1": "Breathe-Easy Tank",
            "product_name2": "Cronus Yoga Pant",
            "checkpoint": "Breathe-Easy Tank:15, Hero Cronus Yoga Pant:12",
            "answer_product_name": "Breathe-Easy Tank",
            "answer_num_configs": "15",
            "answer_num_reviews": "2",
            "answer_num_related": "8"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "(1) Breathe-Easy Tank, (2) 15, (3) 2, (4) 8"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "(1) Breathe-Easy Tank, (2) 15, (3) 2, (4) 8"
        }
    },
    {
        "task_id": 10081,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "Count the number of configurations registered for {{product_name1}} and {{product_name2}}. (1) Which product has more variety? (2) How many configurations are registered for the product in (1)? (3) How many reviews are assigned for the product in (1)? (4) How many 'Related Products, Up-Sells, and Cross-Sells' are registered in total for the product in (1)? Final answer should be in the following format by carefully following the instructions on the use of commas and spaces: '(1) ${product name}, (2) ${number of configs}, (3) ${number of reviews}, (4) ${total number of related, up-sell, cross-sell products}'.",
        "intent": "Count the number of configurations registered for Electra Bra Top and Stellar Solar Jacket. (1) Which product has more variety? (2) How many configurations are registered for the product in (1)? (3) How many reviews are assigned for the product in (1)? (4) How many 'Related Products, Up-Sells, and Cross-Sells' are registered in total for the product in (1)? Final answer should be in the following format by carefully following the instructions on the use of commas and spaces: '(1) ${product name}, (2) ${number of configs}, (3) ${number of reviews}, (4) ${total number of related, up-sell, cross-sell products}'.",
        "required_obs": "any",
        "type_main": "long-term",
        "description": "Go to 'catalog' -> 'products', filter by product name, and press the 'Configurable Product' row. There you can find/count each numbers. Note that the color column should be added via 'Columns' tab",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product/",
            "difficulty": "hard",
            "product_name1": "Electra Bra Top",
            "product_name2": "Stellar Solar Jacket",
            "checkpoint": "Electra Bra Top:15, Stellar Solar Jacket:9",
            "answer_product_name": "Electra Bra Top",
            "answer_num_configs": "15",
            "answer_num_reviews": "4",
            "answer_num_related": "8"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "(1) Electra Bra Top, (2) 15, (3) 4, (4) 8"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "(1) Electra Bra Top, (2) 15, (3) 4, (4) 8"
        }
    },
    {
        "task_id": 10082,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "Count the number of configurations registered for {{product_name1}} and {{product_name2}}. (1) Which product has more variety? (2) How many configurations are registered for the product in (1)? (3) How many reviews are assigned for the product in (1)? (4) How many 'Related Products, Up-Sells, and Cross-Sells' are registered in total for the product in (1)? Final answer should be in the following format by carefully following the instructions on the use of commas and spaces: '(1) ${product name}, (2) ${number of configs}, (3) ${number of reviews}, (4) ${total number of related, up-sell, cross-sell products}'.",
        "intent": "Count the number of configurations registered for Bardot Capri and Hera Pullover Hoodie. (1) Which product has more variety? (2) How many configurations are registered for the product in (1)? (3) How many reviews are assigned for the product in (1)? (4) How many 'Related Products, Up-Sells, and Cross-Sells' are registered in total for the product in (1)? Final answer should be in the following format by carefully following the instructions on the use of commas and spaces: '(1) ${product name}, (2) ${number of configs}, (3) ${number of reviews}, (4) ${total number of related, up-sell, cross-sell products}'.",
        "required_obs": "any",
        "type_main": "long-term",
        "description": "Go to 'catalog' -> 'products', filter by product name, and press the 'Configurable Product' row. There you can find/count each numbers. Note that the color column should be added via 'Columns' tab",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product/",
            "difficulty": "hard",
            "product_name1": "Bardot Capri",
            "product_name2": "Hera Pullover Hoodie",
            "checkpoint": "Bardot Capri:6, Hera Pullover Hoodie:15",
            "answer_product_name": "Hera Pullover Hoodie",
            "answer_num_configs": "15",
            "answer_num_reviews": "3",
            "answer_num_related": "8"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "(1) Hera Pullover Hoodie, (2) 15, (3) 3, (4) 8"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "(1) Hera Pullover Hoodie, (2) 15, (3) 3, (4) 8"
        }
    },
    {
        "task_id": 10083,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "Count the number of configurations registered for {{product_name1}} and {{product_name2}}. (1) Which product has more variety? (2) How many configurations are registered for the product in (1)? (3) How many reviews are assigned for the product in (1)? (4) How many 'Related Products, Up-Sells, and Cross-Sells' are registered in total for the product in (1)? Final answer should be in the following format by carefully following the instructions on the use of commas and spaces: '(1) ${product name}, (2) ${number of configs}, (3) ${number of reviews}, (4) ${total number of related, up-sell, cross-sell products}'.",
        "intent": "Count the number of configurations registered for Orestes Fitness Short and Portia Capri. (1) Which product has more variety? (2) How many configurations are registered for the product in (1)? (3) How many reviews are assigned for the product in (1)? (4) How many 'Related Products, Up-Sells, and Cross-Sells' are registered in total for the product in (1)? Final answer should be in the following format by carefully following the instructions on the use of commas and spaces: '(1) ${product name}, (2) ${number of configs}, (3) ${number of reviews}, (4) ${total number of related, up-sell, cross-sell products}'.",
        "required_obs": "any",
        "type_main": "long-term",
        "description": "Go to 'catalog' -> 'products', filter by product name, and press the 'Configurable Product' row. There you can find/count each numbers. Note that the color column should be added via 'Columns' tab",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product/",
            "difficulty": "hard",
            "product_name1": "Orestes Fitness Short",
            "product_name2": "Portia Capri",
            "checkpoint": "Orestes Fitness Short:12, Portia Capri:6",
            "answer_product_name": "Orestes Fitness Short",
            "answer_num_configs": "12",
            "answer_num_reviews": "2",
            "answer_num_related": "8"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "(1) Orestes Fitness Short, (2) 12, (3) 2, (4) 8"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "(1) Orestes Fitness Short, (2) 12, (3) 2, (4) 8"
        }
    },
    {
        "task_id": 10084,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "Count the number of configurations registered for {{product_name1}} and {{product_name2}}. (1) Which product has more variety? (2) How many configurations are registered for the product in (1)? (3) How many reviews are assigned for the product in (1)? (4) How many 'Related Products, Up-Sells, and Cross-Sells' are registered in total for the product in (1)? Final answer should be in the following format by carefully following the instructions on the use of commas and spaces: '(1) ${product name}, (2) ${number of configs}, (3) ${number of reviews}, (4) ${total number of related, up-sell, cross-sell products}'.",
        "intent": "Count the number of configurations registered for WJ05 and MP01. (1) Which product has more variety? (2) How many configurations are registered for the product in (1)? (3) How many reviews are assigned for the product in (1)? (4) How many 'Related Products, Up-Sells, and Cross-Sells' are registered in total for the product in (1)? Final answer should be in the following format by carefully following the instructions on the use of commas and spaces: '(1) ${product name}, (2) ${number of configs}, (3) ${number of reviews}, (4) ${total number of related, up-sell, cross-sell products}'.",
        "required_obs": "any",
        "type_main": "long-term",
        "description": "Go to 'catalog' -> 'products', filter by product name, and press the 'Configurable Product' row. There you can find/count each numbers. Note that the color column should be added via 'Columns' tab",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product/",
            "difficulty": "hard",
            "product_name1": "WJ05",
            "product_name2": "MP01",
            "checkpoint": "WJ05(Riona Full Zip Jacket):15, MP01(Caesar Warm-Up Pant):12",
            "answer_product_name": "Riona Full Zip Jacket",
            "answer_num_configs": "15",
            "answer_num_reviews": "3",
            "answer_num_related": "8"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "(1) Riona Full Zip Jacket, (2) 15, (3) 3, (4) 8"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "(1) Riona Full Zip Jacket, (2) 15, (3) 3, (4) 8"
        }
    },
    {
        "task_id": 10090,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/customer/index/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "Among people in {{place}}, list the name of the people whose total amount is larger than the average of the people in the area. Consider only transactions with a status of 'Complete' and when calculating the average, exclude people who do not have complete purchase. Final answer should be in descending order of purchase amount and in the following format by carefully following the instructions on the use of commas and spaces: '${person1}(${purchase amount}), ${person2}(purchase amount), ...'. Round the purchase amount to the nearest integer. Amounts should be without '$', formatted with a comma as a thousands separator (e.g., 12,345).",
        "intent": "Among people in California, list the name of the people whose total amount is larger than the average of the people in the area. Consider only transactions with a status of 'Complete' and when calculating the average, exclude people who do not have complete purchase. Final answer should be in descending order of purchase amount and in the following format by carefully following the instructions on the use of commas and spaces: '${person1}(${purchase amount}), ${person2}(purchase amount), ...'. Round the purchase amount to the nearest integer. Amounts should be without '$', formatted with a comma as a thousands separator (e.g., 12,345).",
        "required_obs": "any",
        "type_main": "calc",
        "type_sub": "",
        "description": "Go to 'Customers' -> 'All Customers', filter by the given place, and memorize the name and email adress of the people. Here, email adress is important to identify the same person as there are sometimes people with the same name. Then, go to 'Sales' -> 'Orders', filter by each person, calculate the average of 'Grand Total' for the people in the area.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/customer/index/",
            "difficulty": "medium",
            "place": "California",
            "checkpoint": "Sarah Miller(cnt=8, total=$1212.6), Jane Smith(cnt=9, total=$998.0), Ava Brown(cnt=7, total=$1068.8), Alex Johnson(cnt=6, total=$845.4), Jennifer White(cnt=5, total=$987.8), Alexander Thomas(cnt=2, total=$189.0), avg=$883.6",
            "answer": "Sarah Miller(1,213), Ava Brown(1,069), Jane Smith(998), Jennifer White(988)"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "Sarah Miller(1,213), Ava Brown(1,069), Jane Smith(998), Jennifer White(988)"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "Sarah Miller(1,213), Ava Brown(1,069), Jane Smith(998), Jennifer White(988)"
        }
    },
    {
        "task_id": 10091,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/customer/index/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "Among people in {{place}}, list the name of the people whose total amount is larger than the average of the people in the area. Consider only transactions with a status of 'Complete' and when calculating the average, exclude people who do not have complete purchase. Final answer should be in descending order of purchase amount and in the following format by carefully following the instructions on the use of commas and spaces: '${person1}(${purchase amount}), ${person2}(purchase amount), ...'. Round the purchase amount to the nearest integer. Amounts should be without '$', formatted with a comma as a thousands separator (e.g., 12,345).",
        "intent": "Among people in Texas, list the name of the people whose total amount is larger than the average of the people in the area. Consider only transactions with a status of 'Complete' and when calculating the average, exclude people who do not have complete purchase. Final answer should be in descending order of purchase amount and in the following format by carefully following the instructions on the use of commas and spaces: '${person1}(${purchase amount}), ${person2}(purchase amount), ...'. Round the purchase amount to the nearest integer. Amounts should be without '$', formatted with a comma as a thousands separator (e.g., 12,345).",
        "required_obs": "any",
        "type_main": "calc",
        "type_sub": "",
        "description": "Go to 'Customers' -> 'All Customers', filter by the given place, and memorize the name and email adress of the people. Here, email adress is important to identify the same person as there are sometimes people with the same name. Then, go to 'Sales' -> 'Orders', filter by each person, calculate the average of 'Grand Total' for the people in the area.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/customer/index/",
            "difficulty": "medium",
            "place": "Texas",
            "checkpoint": "Bob Jones(cnt=6, total=$530.0), Bob Johnson(cnt=6, total=$528.0), Lisa Kim(cnt=2, total=$263.1), Olivia Lee(cnt=3, total=$374.2), Emma Davis(cnt=1, total=$109.0), Lisa Green(cnt=2, total=$269.4), Samantha Nguyen(cnt=1, total=$230.1), avg=$329.1",
            "answer": "Bob Jones(530), Bob Johnson(528), Olivia Lee(374)"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "Bob Jones(530), Bob Johnson(528), Olivia Lee(374)"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "Bob Jones(530), Bob Johnson(528), Olivia Lee(374)"
        }
    },
    {
        "task_id": 10092,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/customer/index/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "Among people in {{place}}, list the name of the people whose total amount is larger than the average of the people in the area. Consider only transactions with a status of 'Complete' and when calculating the average, exclude people who do not have complete purchase. Final answer should be in descending order of purchase amount and in the following format by carefully following the instructions on the use of commas and spaces: '${person1}(${purchase amount}), ${person2}(purchase amount), ...'. Round the purchase amount to the nearest integer. Amounts should be without '$', formatted with a comma as a thousands separator (e.g., 12,345).",
        "intent": "Among people in Colorado, list the name of the people whose total amount is larger than the average of the people in the area. Consider only transactions with a status of 'Complete' and when calculating the average, exclude people who do not have complete purchase. Final answer should be in descending order of purchase amount and in the following format by carefully following the instructions on the use of commas and spaces: '${person1}(${purchase amount}), ${person2}(purchase amount), ...'. Round the purchase amount to the nearest integer. Amounts should be without '$', formatted with a comma as a thousands separator (e.g., 12,345).",
        "required_obs": "any",
        "type_main": "calc",
        "type_sub": "",
        "description": "Go to 'Customers' -> 'All Customers', filter by the given place, and memorize the name and email adress of the people. Here, email adress is important to identify the same person as there are sometimes people with the same name. Then, go to 'Sales' -> 'Orders', filter by each person, calculate the average of 'Grand Total' for the people in the area.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/customer/index/",
            "difficulty": "medium",
            "place": "Colorado",
            "checkpoint": "Lucy Garcia(cnt=6, total=$899.1), Jason Miller(cnt=5, total=$842.4), avg=$870.8",
            "answer": "Lucy Garcia(899)"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "Lucy Garcia(899)"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "Lucy Garcia(899)"
        }
    },
    {
        "task_id": 10093,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/customer/index/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "Among people in {{place}}, list the name of the people whose total amount is larger than the average of the people in the area. Consider only transactions with a status of 'Complete' and when calculating the average, exclude people who do not have complete purchase. Final answer should be in descending order of purchase amount and in the following format by carefully following the instructions on the use of commas and spaces: '${person1}(${purchase amount}), ${person2}(purchase amount), ...'. Round the purchase amount to the nearest integer. Amounts should be without '$', formatted with a comma as a thousands separator (e.g., 12,345).",
        "intent": "Among people in Illinois, list the name of the people whose total amount is larger than the average of the people in the area. Consider only transactions with a status of 'Complete' and when calculating the average, exclude people who do not have complete purchase. Final answer should be in descending order of purchase amount and in the following format by carefully following the instructions on the use of commas and spaces: '${person1}(${purchase amount}), ${person2}(purchase amount), ...'. Round the purchase amount to the nearest integer. Amounts should be without '$', formatted with a comma as a thousands separator (e.g., 12,345).",
        "required_obs": "any",
        "type_main": "calc",
        "type_sub": "",
        "description": "Go to 'Customers' -> 'All Customers', filter by the given place, and memorize the name and email adress of the people. Here, email adress is important to identify the same person as there are sometimes people with the same name. Then, go to 'Sales' -> 'Orders', filter by each person, calculate the average of 'Grand Total' for the people in the area.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/customer/index/",
            "difficulty": "medium",
            "place": "Illinois",
            "checkpoint": "John Lee(cnt=4, total=$578.0), Lily Potter(cnt=4, total=$682.2), Michael Nguyen(cnt=8, total=$893.2), avg=$717.8",
            "answer": "Michael Nguyen(893)"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "Michael Nguyen(893)"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "Michael Nguyen(893)"
        }
    },
    {
        "task_id": 10094,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/customer/index/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "Among people in {{place}}, list the name of the people whose total amount is larger than the average of the people in the area. Consider only transactions with a status of 'Complete' and when calculating the average, exclude people who do not have complete purchase. Final answer should be in descending order of purchase amount and in the following format by carefully following the instructions on the use of commas and spaces: '${person1}(${purchase amount}), ${person2}(purchase amount), ...'. Round the purchase amount to the nearest integer. Amounts should be without '$', formatted with a comma as a thousands separator (e.g., 12,345).",
        "intent": "Among people in New York, list the name of the people whose total amount is larger than the average of the people in the area. Consider only transactions with a status of 'Complete' and when calculating the average, exclude people who do not have complete purchase. Final answer should be in descending order of purchase amount and in the following format by carefully following the instructions on the use of commas and spaces: '${person1}(${purchase amount}), ${person2}(purchase amount), ...'. Round the purchase amount to the nearest integer. Amounts should be without '$', formatted with a comma as a thousands separator (e.g., 12,345).",
        "required_obs": "any",
        "type_main": "calc",
        "type_sub": "",
        "description": "Go to 'Customers' -> 'All Customers', filter by the given place, and memorize the name and email adress of the people. Here, email adress is important to identify the same person as there are sometimes people with the same name. Then, go to 'Sales' -> 'Orders', filter by each person, calculate the average of 'Grand Total' for the people in the area.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/customer/index/",
            "difficulty": "medium",
            "place": "New York",
            "checkpoint": "Julia Williams(cnt=2, total=$265.6), John Doe(cnt=3, total=$371.0), Alex Martin(cnt=6, total=$733.0), avg=$456.5",
            "answer": "Alex Martin(733)"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "Alex Martin(733)"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "Alex Martin(733)"
        }
    },
    {
        "task_id": 10100,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
        "start_url_lite": "http://172.16.2.4:7780/admin/review/product/index/filter//internal_reviews//form_key/RXMoSJ9fUA70DBmM/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "How much time has passed between the newest review and the oldest review in Approved status? Provide the answer in seconds. Answer only with the result and the unit, like 3 s.",
        "intent": "How much time has passed between the newest review and the oldest review in Approved status? Provide the answer in seconds. Answer only with the result and the unit, like 3 s.",
        "required_obs": "any",
        "type_main": "others",
        "description": "First, navigate to the Review page. Then, filter to show only the Approved reviews. Identify both the most recent and the oldest review. Finally, calculate the difference between them.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
            "start_url_lite": "http://172.16.2.4:7780/admin/review/product/index/filter//internal_reviews//form_key/RXMoSJ9fUA70DBmM/",
            "contents": "10 s",
            "checkpoint1": "12:15:10",
            "checkpoint2": "12:15:20",
            "checkpoint_info": "",
            "difficulty": "medium"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "exact_match": "10 s"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "10 s"
        }
    },
    {
        "task_id": 10110,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
        "start_url_lite": "http://172.16.2.4:7780/admin/review/product/index/filter//internal_reviews//form_key/RXMoSJ9fUA70DBmM/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "Calculate the average of the top {{number}} {{order}} submitted reviews with a status of {{status}}. Round to the nearest tenth. Output the final answer concisely.",
        "intent": "Calculate the average of the top 5 newest submitted reviews with a status of Approved. Round to the nearest tenth. Output the final answer concisely.",
        "required_obs": "image",
        "type_main": "long-term",
        "type_sub": "calc",
        "description": "First, navigate to the Review page. Then, filter to show only the Approved reviews. Sort the reviews in either newest or oldest order. Next, click the top 5 reviews in that order and retrieve the number of stars for each.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
            "start_url_lite": "http://172.16.2.4:7780/admin/review/product/index/filter//internal_reviews//form_key/RXMoSJ9fUA70DBmM/",
            "number": "5",
            "order": "newest",
            "status": "Approved",
            "contents": "3.8",
            "checkpoint1": "4, 5, 4, 2, 4",
            "checkpoint2": "19",
            "checkpoint_info": "",
            "difficulty": "medium"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "exact_match": "3.8"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "3.8"
        }
    },
    {
        "task_id": 10111,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
        "start_url_lite": "http://172.16.2.4:7780/admin/review/product/index/filter//internal_reviews//form_key/RXMoSJ9fUA70DBmM/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "Calculate the average of the top {{number}} {{order}} submitted reviews with a status of {{status}}. Round to the nearest tenth. Output the final answer concisely.",
        "intent": "Calculate the average of the top 5 oldest submitted reviews with a status of Approved. Round to the nearest tenth. Output the final answer concisely.",
        "required_obs": "image",
        "type_main": "long-term",
        "type_sub": "calc",
        "description": "First, navigate to the Review page. Then, filter to show only the Approved reviews. Sort the reviews in either newest or oldest order. Next, click the top 5 reviews in that order and retrieve the number of stars for each.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
            "start_url_lite": "http://172.16.2.4:7780/admin/review/product/index/filter//internal_reviews//form_key/RXMoSJ9fUA70DBmM/",
            "number": "5",
            "order": "oldest",
            "status": "Approved",
            "contents": "3.0",
            "checkpoint1": "2, 2, 2, 4, 5",
            "checkpoint2": "15",
            "checkpoint_info": "",
            "difficulty": "medium"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "exact_match": "3.0"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "3.0"
        }
    },
    {
        "task_id": 10120,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
        "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "What is the difference in average price between the top {{number}} {{state}} Simple Products with the Attribute {{attribute1}} and those with the Attribute {{attribute2}}? Here, if the same {{condition}} appears beyond the top {{number}}, include those as well. If a product with the same ID appears more than once, it is a bug, so please ignore it. Do all calculations in decimals and round the purchase amount to the nearest integer just before answering.",
        "intent": "What is the difference in average price between the top 10 most recently updated Simple Products with the Attribute Bag and those with the Attribute Gear? Here, if the same updated time appears beyond the top 10, include those as well. If a product with the same ID appears more than once, it is a bug, so please ignore it. Do all calculations in decimals and round the purchase amount to the nearest integer just before answering.",
        "required_obs": "any",
        "type_main": "massive_memory",
        "type_sub": "calc",
        "description": "First, go to the Product page. Then, apply filters using attribute1 and Simple Products. Sort the products to matches the specified status. Retrieve the prices of the top number products and calculate their average. Repeat the same process for attribute2, and compute the difference between the two averages.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
            "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product",
            "contents": "6",
            "number": "10",
            "state": "most recently updated",
            "condition": "updated time",
            "attribute1": "Bag",
            "attribute2": "Gear",
            "checkpoint1": "42.45454545",
            "checkpoint2": "48.25",
            "checkpoint_info": "",
            "difficulty": "medium"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "6"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "6"
        }
    },
    {
        "task_id": 10121,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
        "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "What is the difference in average price between the top {{number}} {{state}} Simple Products with the Attribute {{attribute1}} and those with the Attribute {{attribute2}}? Here, if the same {{condition}} appears beyond the top {{number}}, include those as well. If a product with the same ID appears more than once, it is a bug, so please ignore it. Do all calculations in decimals and round the purchase amount to the nearest integer just before answering.",
        "intent": "What is the difference in average price between the top 50 oldest updated Simple Products with the Attribute Bottom and those with the Attribute Top? Here, if the same updated time appears beyond the top 50, include those as well. If a product with the same ID appears more than once, it is a bug, so please ignore it. Do all calculations in decimals and round the purchase amount to the nearest integer just before answering.",
        "required_obs": "any",
        "type_main": "massive_memory",
        "type_sub": "calc",
        "description": "First, go to the Product page. Then, apply filters using attribute1 and Simple Products. Sort the products to matches the specified status. Retrieve the prices of the top number products and calculate their average. Repeat the same process for attribute2, and compute the difference between the two averages.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
            "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product",
            "contents": "13",
            "number": "50",
            "state": "oldest updated",
            "condition": "updated time",
            "attribute1": "Bottom",
            "attribute2": "Top",
            "checkpoint1": "45.45283019",
            "checkpoint2": "58.3125",
            "checkpoint_info": "",
            "difficulty": "hard"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "13"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "13"
        }
    },
    {
        "task_id": 10122,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
        "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "What is the difference in average price between the top {{number}} {{state}} Simple Products with the Attribute {{attribute1}} and those with the Attribute {{attribute2}}? Here, if the same {{condition}} appears beyond the top {{number}}, include those as well. If a product with the same ID appears more than once, it is a bug, so please ignore it. Do all calculations in decimals and round the purchase amount to the nearest integer just before answering.",
        "intent": "What is the difference in average price between the top 10 lowest price Simple Products with the Attribute Bag and those with the Attribute Gear? Here, if the same price appears beyond the top 10, include those as well. If a product with the same ID appears more than once, it is a bug, so please ignore it. Do all calculations in decimals and round the purchase amount to the nearest integer just before answering.",
        "required_obs": "any",
        "type_main": "massive_memory",
        "type_sub": "calc",
        "description": "First, go to the Product page. Then, apply filters using attribute1 and Simple Products. Sort the products to matches the specified status. Retrieve the prices of the top number products and calculate their average. Repeat the same process for attribute2, and compute the difference between the two averages.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
            "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product",
            "contents": "20",
            "number": "10",
            "state": "lowest price",
            "condition": "price",
            "attribute1": "Bag",
            "attribute2": "Gear",
            "checkpoint1": "37.41666667",
            "checkpoint2": "17.4",
            "checkpoint_info": "",
            "difficulty": "medium"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "20"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "20"
        }
    },
    {
        "task_id": 10123,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
        "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "What is the difference in average price between the top {{number}} {{state}} Simple Products with the Attribute {{attribute1}} and those with the Attribute {{attribute2}}? Here, if the same {{condition}} appears beyond the top {{number}}, include those as well. If a product with the same ID appears more than once, it is a bug, so please ignore it. Do all calculations in decimals and round the purchase amount to the nearest integer just before answering.",
        "intent": "What is the difference in average price between the top 50 most recently updated Simple Products with the Attribute Bottom and those with the Attribute Top? Here, if the same updated time appears beyond the top 50, include those as well. If a product with the same ID appears more than once, it is a bug, so please ignore it. Do all calculations in decimals and round the purchase amount to the nearest integer just before answering.",
        "required_obs": "any",
        "type_main": "massive_memory",
        "type_sub": "calc",
        "description": "First, go to the Product page. Then, apply filters using attribute1 and Simple Products. Sort the products to matches the specified status. Retrieve the prices of the top number products and calculate their average. Repeat the same process for attribute2, and compute the difference between the two averages.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
            "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product",
            "contents": "6",
            "number": "50",
            "state": "most recently updated",
            "condition": "updated time",
            "attribute1": "Bottom",
            "attribute2": "Top",
            "checkpoint1": "40.40909091",
            "checkpoint2": "34.28125",
            "checkpoint_info": "",
            "difficulty": "hard"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "6"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "6"
        }
    },
    {
        "task_id": 10124,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
        "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "What is the difference in average price between the top {{number}} {{state}} Simple Products with the Attribute {{attribute1}} and those with the Attribute {{attribute2}}? Here, if the same {{condition}} appears beyond the top {{number}}, include those as well. If a product with the same ID appears more than once, it is a bug, so please ignore it. Do all calculations in decimals and round the purchase amount to the nearest integer just before answering.",
        "intent": "What is the difference in average price between the top 50 higest price Simple Products with the Attribute Bottom and those with the Attribute Top? Here, if the same price appears beyond the top 50, include those as well. If a product with the same ID appears more than once, it is a bug, so please ignore it. Do all calculations in decimals and round the purchase amount to the nearest integer just before answering.",
        "required_obs": "any",
        "type_main": "massive_memory",
        "type_sub": "calc",
        "description": "First, go to the Product page. Then, apply filters using attribute1 and Simple Products. Sort the products to matches the specified status. Retrieve the prices of the top number products and calculate their average. Repeat the same process for attribute2, and compute the difference between the two averages.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
            "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product",
            "contents": "9",
            "number": "50",
            "state": "higest price",
            "condition": "price",
            "attribute1": "Bottom",
            "attribute2": "Top",
            "checkpoint1": "74.8",
            "checkpoint2": "84.25",
            "checkpoint_info": "",
            "difficulty": "hard"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "9"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "9"
        }
    },
    {
        "task_id": 10130,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
        "start_url_lite": "http://172.16.2.4:7780/admin/customer/index/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "How many users are registered with a domain other than {{domain_name}}?",
        "intent": "How many users are registered with a domain other than gmail.com?",
        "required_obs": "any",
        "type_main": "others",
        "description": "First, navigate to the customer page. Then, count the number of users with the specified domain_name. Finally, subtract this number from the total number of users.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
            "start_url_lite": "http://172.16.2.4:7780/admin/customer/index/",
            "contents": "33",
            "domain_name": "gmail.com",
            "checkpoint1": "70",
            "checkpoint2": "37",
            "checkpoint_info": "checkpoint1: total user number, checkpoint2: user number with domain_name",
            "difficulty": "medium"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "33"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "33"
        }
    },
    {
        "task_id": 10131,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
        "start_url_lite": "http://172.16.2.4:7780/admin/customer/index/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "How many users are registered with a domain other than {{domain_name}}?",
        "intent": "How many users are registered with a domain other than yahoo.com?",
        "required_obs": "any",
        "type_main": "others",
        "description": "First, navigate to the customer page. Then, count the number of users with the specified domain_name. Finally, subtract this number from the total number of users.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
            "start_url_lite": "http://172.16.2.4:7780/admin/customer/index/",
            "contents": "52",
            "domain_name": "yahoo.com",
            "checkpoint1": "70",
            "checkpoint2": "18",
            "checkpoint_info": "checkpoint1: total user number, checkpoint2: user number with domain_name",
            "difficulty": "medium"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "52"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "52"
        }
    },
    {
        "task_id": 10132,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
        "start_url_lite": "http://172.16.2.4:7780/admin/customer/index/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "How many users are registered with a domain other than {{domain_name}}?",
        "intent": "How many users are registered with a domain other than hotmail.com?",
        "required_obs": "any",
        "type_main": "others",
        "description": "First, navigate to the customer page. Then, count the number of users with the specified domain_name. Finally, subtract this number from the total number of users.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
            "start_url_lite": "http://172.16.2.4:7780/admin/customer/index/",
            "contents": "56",
            "domain_name": "hotmail.com",
            "checkpoint1": "70",
            "checkpoint2": "14",
            "checkpoint_info": "checkpoint1: total user number, checkpoint2: user number with domain_name",
            "difficulty": "medium"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "56"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "56"
        }
    },
    {
        "task_id": 10140,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
        "start_url_lite": "http://172.16.2.4:7780/admin/customer/index/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "How many users are there outside {{state1}} and {{state2}} states?",
        "intent": "How many users are there outside New York and New Jersey states?",
        "required_obs": "any",
        "type_main": "others",
        "description": "First, navigate to the customer page. Then, count the number of users in state1 and state2. Finally, subtract this number from the total number of users.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
            "start_url_lite": "http://172.16.2.4:7780/admin/customer/index/",
            "contents": "61",
            "state1": "New York",
            "state2": "New Jersey",
            "checkpoint1": "70",
            "checkpoint2": "6",
            "checkpoint3": "3",
            "checkpoint_info": "checkpoint1: total user number, checkpoint2: user number in state1, checkpoint3: user number in state2",
            "difficulty": "medium"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "61"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "61"
        }
    },
    {
        "task_id": 10141,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
        "start_url_lite": "http://172.16.2.4:7780/admin/customer/index/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "How many users are there outside {{state1}} and {{state2}} states?",
        "intent": "How many users are there outside New Jersey and Florida states?",
        "required_obs": "any",
        "type_main": "others",
        "description": "First, navigate to the customer page. Then, count the number of users in state1 and state2. Finally, subtract this number from the total number of users.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
            "start_url_lite": "http://172.16.2.4:7780/admin/customer/index/",
            "contents": "61",
            "state1": "New Jersey",
            "state2": "Florida",
            "checkpoint1": "70",
            "checkpoint2": "3",
            "checkpoint3": "6",
            "checkpoint_info": "checkpoint1: total user number, checkpoint2: user number in state1, checkpoint3: user number in state2",
            "difficulty": "medium"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "61"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "61"
        }
    },
    {
        "task_id": 10142,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
        "start_url_lite": "http://172.16.2.4:7780/admin/customer/index/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "How many users are there outside {{state1}} and {{state2}} states?",
        "intent": "How many users are there outside Alabama and Illinois states?",
        "required_obs": "any",
        "type_main": "others",
        "description": "First, navigate to the customer page. Then, count the number of users in state1 and state2. Finally, subtract this number from the total number of users.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
            "start_url_lite": "http://172.16.2.4:7780/admin/customer/index/",
            "contents": "62",
            "state1": "Alabama",
            "state2": "Illinois",
            "checkpoint1": "70",
            "checkpoint2": "1",
            "checkpoint3": "7",
            "checkpoint_info": "checkpoint1: total user number, checkpoint2: user number in state1, checkpoint3: user number in state2",
            "difficulty": "medium"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "62"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "62"
        }
    },
    {
        "task_id": 10143,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
        "start_url_lite": "http://172.16.2.4:7780/admin/customer/index/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "How many users are there outside {{state1}} and {{state2}} states?",
        "intent": "How many users are there outside Colorado and Washington states?",
        "required_obs": "any",
        "type_main": "others",
        "description": "First, navigate to the customer page. Then, count the number of users in state1 and state2. Finally, subtract this number from the total number of users.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
            "start_url_lite": "http://172.16.2.4:7780/admin/customer/index/",
            "contents": "60",
            "state1": "Colorado",
            "state2": "Washington",
            "checkpoint1": "70",
            "checkpoint2": "4",
            "checkpoint3": "6",
            "checkpoint_info": "checkpoint1: total user number, checkpoint2: user number in state1, checkpoint3: user number in state2",
            "difficulty": "medium"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "60"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "60"
        }
    },
    {
        "task_id": 10144,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
        "start_url_lite": "http://172.16.2.4:7780/admin/customer/index/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "How many users are there outside {{state1}} and {{state2}} states?",
        "intent": "How many users are there outside Massachusetts and New York states?",
        "required_obs": "any",
        "type_main": "others",
        "description": "First, navigate to the customer page. Then, count the number of users in state1 and state2. Finally, subtract this number from the total number of users.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
            "start_url_lite": "http://172.16.2.4:7780/admin/customer/index/",
            "contents": "59",
            "state1": "Massachusetts",
            "state2": "New York",
            "checkpoint1": "70",
            "checkpoint2": "5",
            "checkpoint3": "6",
            "checkpoint_info": "checkpoint1: total user number, checkpoint2: user number in state1, checkpoint3: user number in state2",
            "difficulty": "medium"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "59"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "59"
        }
    },
    {
        "task_id": 10150,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
        "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_customer/orders/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "Please provide the top 10 days with the highest sales from {{month1}}/{{year1}} to {{month2}}/{{year2}}. Present the dates in the MM/DD/YYYY format (e.g., January 1st in 2022 should be written as 1/1/2022). List them in descending order of sales. Example: [2/5/2022, 1/1/2022, 3/15/2022, ...]",
        "intent": "Please provide the top 10 days with the highest sales from 1/2022 to 6/2022. Present the dates in the MM/DD/YYYY format (e.g., January 1st in 2022 should be written as 1/1/2022). List them in descending order of sales. Example: [2/5/2022, 1/1/2022, 3/15/2022, ...]",
        "required_obs": "any",
        "type_main": "massive_memory",
        "description": "First, navigate to the Order Count Report in the reports section. Then, retrieve the sales data for the specified period. Finally, identify the top 10 days with the highest sales.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
            "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_customer/orders/",
            "contents": "2/8/2022, 5/2/2022, 4/21/2022, 2/3/2022, 3/17/2022, 1/20/2022, 2/6/2022, 1/28/2022, 4/6/2022, 5/29/2022",
            "year1": "2022",
            "year2": "2022",
            "month1": "1",
            "month2": "6",
            "checkpoint1": "",
            "checkpoint2": "",
            "checkpoint_info": "",
            "difficulty": "medium"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "[2/8/2022, 5/2/2022, 4/21/2022, 2/3/2022, 3/17/2022, 1/20/2022, 2/6/2022, 1/28/2022, 4/6/2022, 5/29/2022]"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "[2/8/2022, 5/2/2022, 4/21/2022, 2/3/2022, 3/17/2022, 1/20/2022, 2/6/2022, 1/28/2022, 4/6/2022, 5/29/2022]"
        }
    },
    {
        "task_id": 10151,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
        "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_customer/orders/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "Please provide the top 10 days with the highest sales from {{month1}}/{{year1}} to {{month2}}/{{year2}}. Present the dates in the MM/DD/YYYY format (e.g., January 1st in 2022 should be written as 1/1/2022). List them in descending order of sales. Example: [2/5/2022, 1/1/2022, 3/15/2022, ...]",
        "intent": "Please provide the top 10 days with the highest sales from 7/2022 to 12/2022. Present the dates in the MM/DD/YYYY format (e.g., January 1st in 2022 should be written as 1/1/2022). List them in descending order of sales. Example: [2/5/2022, 1/1/2022, 3/15/2022, ...]",
        "required_obs": "any",
        "type_main": "massive_memory",
        "description": "First, navigate to the Order Count Report in the reports section. Then, retrieve the sales data for the specified period. Finally, identify the top 10 days with the highest sales.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
            "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_customer/orders/",
            "contents": "12/11/2022, 8/28/2022, 12/6/2022, 9/2/2022, 7/6/2022, 12/24/2022, 9/23/2022, 7/12/2022, 9/30/2022, 11/3/2022",
            "year1": "2022",
            "year2": "2022",
            "month1": "7",
            "month2": "12",
            "checkpoint1": "",
            "checkpoint2": "",
            "checkpoint_info": "",
            "difficulty": "medium"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "[12/11/2022, 8/28/2022, 12/6/2022, 9/2/2022, 7/6/2022, 12/24/2022, 9/23/2022, 7/12/2022, 9/30/2022, 11/3/2022]"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "[12/11/2022, 8/28/2022, 12/6/2022, 9/2/2022, 7/6/2022, 12/24/2022, 9/23/2022, 7/12/2022, 9/30/2022, 11/3/2022]"
        }
    },
    {
        "task_id": 10152,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
        "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_customer/orders/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "Please provide the top 10 days with the highest sales from {{month1}}/{{year1}} to {{month2}}/{{year2}}. Present the dates in the MM/DD/YYYY format (e.g., January 1st in 2022 should be written as 1/1/2022). List them in descending order of sales. Example: [2/5/2022, 1/1/2022, 3/15/2022, ...]",
        "intent": "Please provide the top 10 days with the highest sales from 1/2023 to 5/2023. Present the dates in the MM/DD/YYYY format (e.g., January 1st in 2022 should be written as 1/1/2022). List them in descending order of sales. Example: [2/5/2022, 1/1/2022, 3/15/2022, ...]",
        "required_obs": "any",
        "type_main": "massive_memory",
        "description": "First, navigate to the Order Count Report in the reports section. Then, retrieve the sales data for the specified period. Finally, identify the top 10 days with the highest sales.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
            "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_customer/orders/",
            "contents": "4/5/2023, 3/31/2023, 1/9/2023, 1/12/2023, 5/31/2023, 1/28/2023, 1/6/2023, 5/28/2023, 1/16/2023, 1/13/2023",
            "year1": "2023",
            "year2": "2023",
            "month1": "1",
            "month2": "5",
            "checkpoint1": "",
            "checkpoint2": "",
            "checkpoint_info": "",
            "difficulty": "medium"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "[4/5/2023, 3/31/2023, 1/9/2023, 1/12/2023, 5/31/2023, 1/28/2023, 1/6/2023, 5/28/2023, 1/16/2023, 1/13/2023]"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "[4/5/2023, 3/31/2023, 1/9/2023, 1/12/2023, 5/31/2023, 1/28/2023, 1/6/2023, 5/28/2023, 1/16/2023, 1/13/2023]"
        }
    },
    {
        "task_id": 10153,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
        "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_customer/orders/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "Please provide the top 10 days with the highest sales from {{month1}}/{{year1}} to {{month2}}/{{year2}}. Present the dates in the MM/DD/YYYY format (e.g., January 1st in 2022 should be written as 1/1/2022). List them in descending order of sales. Example: [2/5/2022, 1/1/2022, 3/15/2022, ...]",
        "intent": "Please provide the top 10 days with the highest sales from 1/2022 to 5/2023. Present the dates in the MM/DD/YYYY format (e.g., January 1st in 2022 should be written as 1/1/2022). List them in descending order of sales. Example: [2/5/2022, 1/1/2022, 3/15/2022, ...]",
        "required_obs": "any",
        "type_main": "massive_memory",
        "description": "First, navigate to the Order Count Report in the reports section. Then, retrieve the sales data for the specified period. Finally, identify the top 10 days with the highest sales.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
            "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_customer/orders/",
            "contents": "4/5/2023, 12/11/2022, 2/8/2022, 5/2/2022, 8/28/2022, 4/21/2022, 2/3/2022, 12/6/2022, 3/17/2022, 1/20/2022",
            "year1": "2022",
            "year2": "2023",
            "month1": "1",
            "month2": "5",
            "checkpoint1": "",
            "checkpoint2": "",
            "checkpoint_info": "",
            "difficulty": "medium"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "[4/5/2023, 12/11/2022, 2/8/2022, 5/2/2022, 8/28/2022, 4/21/2022, 2/3/2022, 12/6/2022, 3/17/2022, 1/20/2022]"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "[4/5/2023, 12/11/2022, 2/8/2022, 5/2/2022, 8/28/2022, 4/21/2022, 2/3/2022, 12/6/2022, 3/17/2022, 1/20/2022]"
        }
    },
    {
        "task_id": 10160,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
        "start_url_lite": "http://172.16.2.4:7780/admin/sales/order",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "What is the total cost for completed orders that {{user1}} and {{user2}} purchased?",
        "intent": "What is the total cost for completed orders that Alex Martin and Grace Nguyen purchased?",
        "required_obs": "any",
        "type_main": "calc",
        "description": "First, navigate to the sales order page. Then, calculate the total cost of completed orders for user1 and user2. Finally, sum the total costs for user1 and user2.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
            "start_url_lite": "http://172.16.2.4:7780/admin/sales/order",
            "contents": "1802.88 |OR| 1,802.88",
            "user1": "Alex Martin",
            "user2": "Grace Nguyen",
            "checkpoint1": "733.00",
            "checkpoint2": "1069.88",
            "checkpoint_info": "checkpoint1: total cost for user1, checkpoint2: total cost for user2",
            "difficulty": "medium"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "1802.88 |OR| 1,802.88"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "1802.88 |OR| 1,802.88"
        }
    },
    {
        "task_id": 10161,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
        "start_url_lite": "http://172.16.2.4:7780/admin/sales/order",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "What is the total cost for completed orders that {{user1}} and {{user2}} purchased?",
        "intent": "What is the total cost for completed orders that Jane Doe and Julia Williams purchased?",
        "required_obs": "any",
        "type_main": "calc",
        "description": "First, navigate to the sales order page. Then, calculate the total cost of completed orders for user1 and user2. Finally, sum the total costs for user1 and user2.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
            "start_url_lite": "http://172.16.2.4:7780/admin/sales/order",
            "contents": "1155.52 |OR| 1,155.52",
            "user1": "Jane Doe",
            "user2": "Julia Williams",
            "checkpoint1": "889.92",
            "checkpoint2": "265.60",
            "checkpoint_info": "checkpoint1: total cost for user1, checkpoint2: total cost for user2",
            "difficulty": "medium"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "1155.52 |OR| 1,155.52"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "1155.52 |OR| 1,155.52"
        }
    },
    {
        "task_id": 10162,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
        "start_url_lite": "http://172.16.2.4:7780/admin/sales/order",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "What is the total cost for completed orders that {{user1}} and {{user2}} purchased?",
        "intent": "What is the total cost for completed orders that Lily Potter and Daniel Jackson purchased?",
        "required_obs": "any",
        "type_main": "calc",
        "description": "First, navigate to the sales order page. Then, calculate the total cost of completed orders for user1 and user2. Finally, sum the total costs for user1 and user2.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
            "start_url_lite": "http://172.16.2.4:7780/admin/sales/order",
            "contents": "843.6",
            "user1": "Lily Potter",
            "user2": "Daniel Jackson",
            "checkpoint1": "682.20",
            "checkpoint2": "161.40",
            "checkpoint_info": "checkpoint1: total cost for user1, checkpoint2: total cost for user2",
            "difficulty": "medium"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "843.6"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "843.6"
        }
    },
    {
        "task_id": 10163,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
        "start_url_lite": "http://172.16.2.4:7780/admin/sales/order",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "What is the total cost for completed orders that {{user1}} and {{user2}} purchased?",
        "intent": "What is the total cost for completed orders that Sarah Miller and Michael Nguyen purchased?",
        "required_obs": "any",
        "type_main": "calc",
        "description": "First, navigate to the sales order page. Then, calculate the total cost of completed orders for user1 and user2. Finally, sum the total costs for user1 and user2.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
            "start_url_lite": "http://172.16.2.4:7780/admin/sales/order",
            "contents": "2105.84 |OR| 2,105.84",
            "user1": "Sarah Miller",
            "user2": "Michael Nguyen",
            "checkpoint1": "1212.60",
            "checkpoint2": "893.24",
            "checkpoint_info": "checkpoint1: total cost for user1, checkpoint2: total cost for user2",
            "difficulty": "medium"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "2105.84 |OR| 2,105.84"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "2105.84 |OR| 2,105.84"
        }
    },
    {
        "task_id": 10164,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
        "start_url_lite": "http://172.16.2.4:7780/admin/sales/order",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "What is the total cost for completed orders that {{user1}} and {{user2}} purchased?",
        "intent": "What is the total cost for completed orders that Samantha Jones and Adam Garcia purchased?",
        "required_obs": "any",
        "type_main": "calc",
        "description": "First, navigate to the sales order page. Then, calculate the total cost of completed orders for user1 and user2. Finally, sum the total costs for user1 and user2.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
            "start_url_lite": "http://172.16.2.4:7780/admin/sales/order",
            "contents": "1834.56 |OR| 1,834.56",
            "user1": "Samantha Jones",
            "user2": "Adam Garcia",
            "checkpoint1": "964.00",
            "checkpoint2": "870.56",
            "checkpoint_info": "checkpoint1: total cost for user1, checkpoint2: total cost for user2",
            "difficulty": "medium"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "1834.56 |OR| 1,834.56"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "1834.56 |OR| 1,834.56"
        }
    },
    {
        "task_id": 10170,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
        "start_url_lite": "http://172.16.2.4:7780/admin/review/product",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "List the IDs of approved reviews that contain the word {{word}} from the top 50 most recently created approved reviews. Sort them by creation date, from newest to oldest. When creation dates are the same, treat those appearing earlier in the system display-when sorted from newest to oldest-as being earlier. Provide the answer in the following format: [ID1, ID2, ID3, ...]",
        "intent": "List the IDs of approved reviews that contain the word good from the top 50 most recently created approved reviews. Sort them by creation date, from newest to oldest. When creation dates are the same, treat those appearing earlier in the system display-when sorted from newest to oldest-as being earlier. Provide the answer in the following format: [ID1, ID2, ID3, ...]",
        "required_obs": "any",
        "type_main": "massive_memory",
        "description": "First, navigate to the Review page. Then, filter to show only the Approved reviews. Retrieve the top 50 most recently created approved reviews. Finally, identify the IDs of those reviews that contain the word {{word}}.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
            "start_url_lite": "http://172.16.2.4:7780/admin/review/product",
            "contents": "338, 343, 308, 330, 329, 326, 282",
            "word": "good",
            "checkpoint1": "",
            "checkpoint2": "",
            "checkpoint_info": "",
            "difficulty": "hard"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "exact_match": "[338, 343, 308, 330, 329, 326, 282]"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "338, 343, 308, 330, 329, 326, 282"
        }
    },
    {
        "task_id": 10171,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
        "start_url_lite": "http://172.16.2.4:7780/admin/review/product",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "List the IDs of approved reviews that contain the word {{word}} from the top 50 most recently created approved reviews. Sort them by creation date, from newest to oldest. When creation dates are the same, treat those appearing earlier in the system display-when sorted from newest to oldest-as being earlier. Provide the answer in the following format: [ID1, ID2, ID3, ...]",
        "intent": "List the IDs of approved reviews that contain the word never from the top 50 most recently created approved reviews. Sort them by creation date, from newest to oldest. When creation dates are the same, treat those appearing earlier in the system display-when sorted from newest to oldest-as being earlier. Provide the answer in the following format: [ID1, ID2, ID3, ...]",
        "required_obs": "any",
        "type_main": "massive_memory",
        "description": "First, navigate to the Review page. Then, filter to show only the Approved reviews. Retrieve the top 50 most recently created approved reviews. Finally, identify the IDs of those reviews that contain the word {{word}}.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
            "start_url_lite": "http://172.16.2.4:7780/admin/review/product",
            "contents": "337, 307, 326",
            "word": "never",
            "checkpoint1": "",
            "checkpoint2": "",
            "checkpoint_info": "",
            "difficulty": "hard"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "exact_match": "[337, 307, 326]"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "337, 307, 326"
        }
    },
    {
        "task_id": 10172,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
        "start_url_lite": "http://172.16.2.4:7780/admin/review/product",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "List the IDs of approved reviews that contain the word {{word}} from the top 50 most recently created approved reviews. Sort them by creation date, from newest to oldest. When creation dates are the same, treat those appearing earlier in the system display-when sorted from newest to oldest-as being earlier. Provide the answer in the following format: [ID1, ID2, ID3, ...]",
        "intent": "List the IDs of approved reviews that contain the word love from the top 50 most recently created approved reviews. Sort them by creation date, from newest to oldest. When creation dates are the same, treat those appearing earlier in the system display-when sorted from newest to oldest-as being earlier. Provide the answer in the following format: [ID1, ID2, ID3, ...]",
        "required_obs": "any",
        "type_main": "massive_memory",
        "description": "First, navigate to the Review page. Then, filter to show only the Approved reviews. Retrieve the top 50 most recently created approved reviews. Finally, identify the IDs of those reviews that contain the word {{word}}.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
            "start_url_lite": "http://172.16.2.4:7780/admin/review/product",
            "contents": "352, 340, 344, 346, 314, 315, 310, 306, 323",
            "word": "love",
            "checkpoint1": "",
            "checkpoint2": "",
            "checkpoint_info": "",
            "difficulty": "hard"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "exact_match": "[352, 340, 344, 346, 314, 315, 310, 306, 323]"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "352, 340, 344, 346, 314, 315, 310, 306, 323"
        }
    },
    {
        "task_id": 10173,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
        "start_url_lite": "http://172.16.2.4:7780/admin/review/product",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "List the IDs of approved reviews that contain the word {{word}} from the top 50 most recently created approved reviews. Sort them by creation date, from newest to oldest. When creation dates are the same, treat those appearing earlier in the system display-when sorted from newest to oldest-as being earlier. Provide the answer in the following format: [ID1, ID2, ID3, ...]",
        "intent": "List the IDs of approved reviews that contain the word cute from the top 50 most recently created approved reviews. Sort them by creation date, from newest to oldest. When creation dates are the same, treat those appearing earlier in the system display-when sorted from newest to oldest-as being earlier. Provide the answer in the following format: [ID1, ID2, ID3, ...]",
        "required_obs": "any",
        "type_main": "massive_memory",
        "description": "First, navigate to the Review page. Then, filter to show only the Approved reviews. Retrieve the top 50 most recently created approved reviews. Finally, identify the IDs of those reviews that contain the word {{word}}.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
            "start_url_lite": "http://172.16.2.4:7780/admin/review/product",
            "contents": "339, 346, 315, 312, 311, 305, 328, 327, 326, 322",
            "word": "cute",
            "checkpoint1": "",
            "checkpoint2": "",
            "checkpoint_info": "",
            "difficulty": "hard"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "exact_match": "[339, 346, 315, 312, 311, 305, 328, 327, 326, 322]"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "339, 346, 315, 312, 311, 305, 328, 327, 326, 322"
        }
    },
    {
        "task_id": 10174,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
        "start_url_lite": "http://172.16.2.4:7780/admin/review/product",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "List the IDs of approved reviews that contain the word {{word}} from the top 50 most recently created approved reviews. Sort them by creation date, from newest to oldest. When creation dates are the same, treat those appearing earlier in the system display-when sorted from newest to oldest-as being earlier. Provide the answer in the following format: [ID1, ID2, ID3, ...]",
        "intent": "List the IDs of approved reviews that contain the word comfortable from the top 50 most recently created approved reviews. Sort them by creation date, from newest to oldest. When creation dates are the same, treat those appearing earlier in the system display-when sorted from newest to oldest-as being earlier. Provide the answer in the following format: [ID1, ID2, ID3, ...]",
        "required_obs": "any",
        "type_main": "massive_memory",
        "description": "First, navigate to the Review page. Then, filter to show only the Approved reviews. Retrieve the top 50 most recently created approved reviews. Finally, identify the IDs of those reviews that contain the word {{word}}.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
            "start_url_lite": "http://172.16.2.4:7780/admin/review/product",
            "contents": "352, 351, 347, 341, 345, 316, 312, 305, 322, 324, 283",
            "word": "comfortable",
            "checkpoint1": "",
            "checkpoint2": "",
            "checkpoint_info": "",
            "difficulty": "hard"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "exact_match": "[352, 351, 347, 341, 345, 316, 312, 305, 322, 324, 283]"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "352, 351, 347, 341, 345, 316, 312, 305, 322, 324, 283"
        }
    },
    {
        "task_id": 10180,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
        "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "The product page of this website has a specification where changing the number of items displayed per page causes the order of some products to change. Please analyze this specification. Specifically, for {{status}} Simple Products with {{attribute}}, identify the top 50 products based on their ranking when displayed on 50 items per page. Then, compare this ranking with the rankings when displayed on 20 items per page. Extract the IDs of products whose rankings differ between these page settings. As a note, list all product IDs whose rankings are misaligned. For example, if the order for 50 items per page is: 1, 2, 3, 4, 5 and for 20 items per page it's: 1, 14, 2, 3, 4 then the correct answer is: 2, 3, 4, 5. Format your answer as follows, listing IDs in the order they appear earliest on the 50 items per page setting:\nID7, ID2, ID3, ..., ID20",
        "intent": "The product page of this website has a specification where changing the number of items displayed per page causes the order of some products to change. Please analyze this specification. Specifically, for most recently updated Simple Products with Top, identify the top 50 products based on their ranking when displayed on 50 items per page. Then, compare this ranking with the rankings when displayed on 20 items per page. Extract the IDs of products whose rankings differ between these page settings. As a note, list all product IDs whose rankings are misaligned. For example, if the order for 50 items per page is: 1, 2, 3, 4, 5 and for 20 items per page it's: 1, 14, 2, 3, 4 then the correct answer is: 2, 3, 4, 5. Format your answer as follows, listing IDs in the order they appear earliest on the 50 items per page setting:\nID7, ID2, ID3, ..., ID20",
        "required_obs": "any",
        "type_main": "massive_memory",
        "description": "First, navigate to the Product page. Then, filter by attribute. Next, sort by status. Retrieve the product IDs in the state of 50 items per page. After that, retrieve the product IDs in the state of 20 items per page. Finally, compare both IDs and extract the different ones.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
            "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product/",
            "contents": "1810, 1809, 1811, 1808, 1768, 1763, 1792, 1785, 1752, 1775, 1776, 1779, 1758, 1765, 1772, 1789, 1756, 1791, 1782, 1750, 1773, 1788, 1784, 1790, 1771, 1754, 1747, 1794, 1749, 1774, 1759, 1760, 1786, 1770, 1793, 1769, 1777, 1757, 1781, 1797, 1766, 1795, 1762, 1787, 1783, 1751",
            "status": "most recently updated",
            "attribute": "Top",
            "checkpoint1": "",
            "checkpoint2": "",
            "checkpoint_info": "",
            "difficulty": "hard"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "exact_match": "1810, 1809, 1811, 1808, 1768, 1763, 1792, 1785, 1752, 1775, 1776, 1779, 1758, 1765, 1772, 1789, 1756, 1791, 1782, 1750, 1773, 1788, 1784, 1790, 1771, 1754, 1747, 1794, 1749, 1774, 1759, 1760, 1786, 1770, 1793, 1769, 1777, 1757, 1781, 1797, 1766, 1795, 1762, 1787, 1783, 1751"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "1810, 1809, 1811, 1808, 1768, 1763, 1792, 1785, 1752, 1775, 1776, 1779, 1758, 1765, 1772, 1789, 1756, 1791, 1782, 1750, 1773, 1788, 1784, 1790, 1771, 1754, 1747, 1794, 1749, 1774, 1759, 1760, 1786, 1770, 1793, 1769, 1777, 1757, 1781, 1797, 1766, 1795, 1762, 1787, 1783, 1751"
        }
    },
    {
        "task_id": 10181,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
        "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "The product page of this website has a specification where changing the number of items displayed per page causes the order of some products to change. Please analyze this specification. Specifically, for {{status}} Simple Products with {{attribute}}, identify the top 50 products based on their ranking when displayed on 50 items per page. Then, compare this ranking with the rankings when displayed on 20 items per page. Extract the IDs of products whose rankings differ between these page settings. As a note, list all product IDs whose rankings are misaligned. For example, if the order for 50 items per page is: 1, 2, 3, 4, 5 and for 20 items per page it's: 1, 14, 2, 3, 4 then the correct answer is: 2, 3, 4, 5. Format your answer as follows, listing IDs in the order they appear earliest on the 50 items per page setting:\nID7, ID2, ID3, ..., ID20",
        "intent": "The product page of this website has a specification where changing the number of items displayed per page causes the order of some products to change. Please analyze this specification. Specifically, for lowest price Simple Products with Bottom, identify the top 50 products based on their ranking when displayed on 50 items per page. Then, compare this ranking with the rankings when displayed on 20 items per page. Extract the IDs of products whose rankings differ between these page settings. As a note, list all product IDs whose rankings are misaligned. For example, if the order for 50 items per page is: 1, 2, 3, 4, 5 and for 20 items per page it's: 1, 14, 2, 3, 4 then the correct answer is: 2, 3, 4, 5. Format your answer as follows, listing IDs in the order they appear earliest on the 50 items per page setting:\nID7, ID2, ID3, ..., ID20",
        "required_obs": "any",
        "type_main": "massive_memory",
        "description": "First, navigate to the Product page. Then, filter by attribute. Next, sort by status. Retrieve the product IDs in the state of 50 items per page. After that, retrieve the product IDs in the state of 20 items per page. Finally, compare both IDs and extract the different ones.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
            "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product/",
            "contents": "1011, 1004, 1008, 1012, 1005, 1009, 1013, 1006, 1010, 1003, 1014, 1007, 984, 1994, 977, 988, 981, 1991, 985, 1995, 978, 982, 1992, 986, 1996, 979, 983, 1993, 987, 980, 1019, 1023, 1016, 1027, 1020, 1024, 1017, 1021, 1025, 1018, 1022, 1026, 1932, 795, 1978, 1925, 1971, 799, 1929, 1982",
            "status": "lowest price",
            "attribute": "Bottom",
            "checkpoint1": "",
            "checkpoint2": "",
            "checkpoint_info": "",
            "difficulty": "hard"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "exact_match": "1011, 1004, 1008, 1012, 1005, 1009, 1013, 1006, 1010, 1003, 1014, 1007, 984, 1994, 977, 988, 981, 1991, 985, 1995, 978, 982, 1992, 986, 1996, 979, 983, 1993, 987, 980, 1019, 1023, 1016, 1027, 1020, 1024, 1017, 1021, 1025, 1018, 1022, 1026, 1932, 795, 1978, 1925, 1971, 799, 1929, 1982"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "1011, 1004, 1008, 1012, 1005, 1009, 1013, 1006, 1010, 1003, 1014, 1007, 984, 1994, 977, 988, 981, 1991, 985, 1995, 978, 982, 1992, 986, 1996, 979, 983, 1993, 987, 980, 1019, 1023, 1016, 1027, 1020, 1024, 1017, 1021, 1025, 1018, 1022, 1026, 1932, 795, 1978, 1925, 1971, 799, 1929, 1982"
        }
    },
    {
        "task_id": 10182,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
        "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "The product page of this website has a specification where changing the number of items displayed per page causes the order of some products to change. Please analyze this specification. Specifically, for {{status}} Simple Products with {{attribute}}, identify the top 50 products based on their ranking when displayed on 50 items per page. Then, compare this ranking with the rankings when displayed on 20 items per page. Extract the IDs of products whose rankings differ between these page settings. As a note, list all product IDs whose rankings are misaligned. For example, if the order for 50 items per page is: 1, 2, 3, 4, 5 and for 20 items per page it's: 1, 14, 2, 3, 4 then the correct answer is: 2, 3, 4, 5. Format your answer as follows, listing IDs in the order they appear earliest on the 50 items per page setting:\nID7, ID2, ID3, ..., ID20",
        "intent": "The product page of this website has a specification where changing the number of items displayed per page causes the order of some products to change. Please analyze this specification. Specifically, for highest price Simple Products with Top, identify the top 50 products based on their ranking when displayed on 50 items per page. Then, compare this ranking with the rankings when displayed on 20 items per page. Extract the IDs of products whose rankings differ between these page settings. As a note, list all product IDs whose rankings are misaligned. For example, if the order for 50 items per page is: 1, 2, 3, 4, 5 and for 20 items per page it's: 1, 14, 2, 3, 4 then the correct answer is: 2, 3, 4, 5. Format your answer as follows, listing IDs in the order they appear earliest on the 50 items per page setting:\nID7, ID2, ID3, ..., ID20",
        "required_obs": "any",
        "type_main": "massive_memory",
        "description": "First, navigate to the Product page. Then, filter by attribute. Next, sort by status. Retrieve the product IDs in the state of 50 items per page. After that, retrieve the product IDs in the state of 20 items per page. Finally, compare both IDs and extract the different ones.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
            "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product/",
            "contents": "333, 326, 319, 330, 323, 327, 320, 331, 324, 328, 321, 332, 325, 329, 322, 1263, 1256, 1267, 1260, 1253, 1264, 1257, 1261, 1254, 1265, 1258, 1262, 1255, 1266, 1259, 1381, 1373, 1366, 1392, 1385, 1377, 1370, 1389, 1382, 1374, 1367, 1393, 1386, 1378, 1371, 1390, 1383, 1375, 1368, 1394",
            "status": "highest price",
            "attribute": "Top",
            "checkpoint1": "",
            "checkpoint2": "",
            "checkpoint_info": "",
            "difficulty": "hard"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "exact_match": "333, 326, 319, 330, 323, 327, 320, 331, 324, 328, 321, 332, 325, 329, 322, 1263, 1256, 1267, 1260, 1253, 1264, 1257, 1261, 1254, 1265, 1258, 1262, 1255, 1266, 1259, 1381, 1373, 1366, 1392, 1385, 1377, 1370, 1389, 1382, 1374, 1367, 1393, 1386, 1378, 1371, 1390, 1383, 1375, 1368, 1394"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "333, 326, 319, 330, 323, 327, 320, 331, 324, 328, 321, 332, 325, 329, 322, 1263, 1256, 1267, 1260, 1253, 1264, 1257, 1261, 1254, 1265, 1258, 1262, 1255, 1266, 1259, 1381, 1373, 1366, 1392, 1385, 1377, 1370, 1389, 1382, 1374, 1367, 1393, 1386, 1378, 1371, 1390, 1383, 1375, 1368, 1394"
        }
    },
    {
        "task_id": 10183,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
        "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "The product page of this website has a specification where changing the number of items displayed per page causes the order of some products to change. Please analyze this specification. Specifically, for {{status}} Simple Products with {{attribute}}, identify the top 50 products based on their ranking when displayed on 50 items per page. Then, compare this ranking with the rankings when displayed on 20 items per page. Extract the IDs of products whose rankings differ between these page settings. As a note, list all product IDs whose rankings are misaligned. For example, if the order for 50 items per page is: 1, 2, 3, 4, 5 and for 20 items per page it's: 1, 14, 2, 3, 4 then the correct answer is: 2, 3, 4, 5. Format your answer as follows, listing IDs in the order they appear earliest on the 50 items per page setting:\nID7, ID2, ID3, ..., ID20",
        "intent": "The product page of this website has a specification where changing the number of items displayed per page causes the order of some products to change. Please analyze this specification. Specifically, for most recently updated Simple Products with Bottom, identify the top 50 products based on their ranking when displayed on 50 items per page. Then, compare this ranking with the rankings when displayed on 20 items per page. Extract the IDs of products whose rankings differ between these page settings. As a note, list all product IDs whose rankings are misaligned. For example, if the order for 50 items per page is: 1, 2, 3, 4, 5 and for 20 items per page it's: 1, 14, 2, 3, 4 then the correct answer is: 2, 3, 4, 5. Format your answer as follows, listing IDs in the order they appear earliest on the 50 items per page setting:\nID7, ID2, ID3, ..., ID20",
        "required_obs": "any",
        "type_main": "massive_memory",
        "description": "First, navigate to the Product page. Then, filter by attribute. Next, sort by status. Retrieve the product IDs in the state of 50 items per page. After that, retrieve the product IDs in the state of 20 items per page. Finally, compare both IDs and extract the different ones.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
            "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product/",
            "contents": "2039, 2015, 2007, 2005, 2008, 2019, 2011, 2037, 2023, 2030, 2033, 2036, 2020, 2009, 2031, 2028, 2038, 2035, 2025, 2006, 2002, 2013, 2032, 2012, 2018, 2029, 2014, 2016, 2026, 2004, 2027, 2022, 2021, 2034, 1976, 1981, 1977, 1995, 1991, 1989, 1988, 1980, 1994, 1985, 1979, 1984, 1986, 1993",
            "status": "most recently updated",
            "attribute": "Bottom",
            "checkpoint1": "",
            "checkpoint2": "",
            "checkpoint_info": "",
            "difficulty": "hard"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "exact_match": "2039, 2015, 2007, 2005, 2008, 2019, 2011, 2037, 2023, 2030, 2033, 2036, 2020, 2009, 2031, 2028, 2038, 2035, 2025, 2006, 2002, 2013, 2032, 2012, 2018, 2029, 2014, 2016, 2026, 2004, 2027, 2022, 2021, 2034, 1976, 1981, 1977, 1995, 1991, 1989, 1988, 1980, 1994, 1985, 1979, 1984, 1986, 1993"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "2039, 2015, 2007, 2005, 2008, 2019, 2011, 2037, 2023, 2030, 2033, 2036, 2020, 2009, 2031, 2028, 2038, 2035, 2025, 2006, 2002, 2013, 2032, 2012, 2018, 2029, 2014, 2016, 2026, 2004, 2027, 2022, 2021, 2034, 1976, 1981, 1977, 1995, 1991, 1989, 1988, 1980, 1994, 1985, 1979, 1984, 1986, 1993"
        }
    },
    {
        "task_id": 10190,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
        "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "I want to replenish 40 units of each Simple Product that has the Attribute {{attribute}}. These products should be among the top {{number}} {{status}} products within that category. Here, if the same {{condition}} appears beyond the top {{number}}, include those as well. If a product with the same ID appears more than once, it is a bug, so please ignore it. If the purchase unit price for restocking is 60% of the selling price, how much will the total cost be? Do all calculations in decimals and round the purchase amount to the nearest integer just before answering.",
        "intent": "I want to replenish 40 units of each Simple Product that has the Attribute Top. These products should be among the top 50 most recently updated products within that category. Here, if the same updated time appears beyond the top 50, include those as well. If a product with the same ID appears more than once, it is a bug, so please ignore it. If the purchase unit price for restocking is 60% of the selling price, how much will the total cost be? Do all calculations in decimals and round the purchase amount to the nearest integer just before answering.",
        "required_obs": "any",
        "type_main": "massive_memory",
        "type_sub": "calc",
        "description": "First, navigate to the Product page. Then, apply filters using attribute and Simple Products. Sort the products to match the specified status. Next, retrieve the prices of the top number products, multiply each by 0.6, sum them up, and finally multiply by 40.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
            "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product/",
            "contents": "52656 |OR| 52,656",
            "attribute": "Top",
            "condition": "updated time",
            "status": "most recently updated",
            "number": "50",
            "checkpoint_info": "",
            "difficulty": "hard"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "52656 |OR| 52,656"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "52656 |OR| 52,656"
        }
    },
    {
        "task_id": 10191,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
        "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "I want to replenish 40 units of each Simple Product that has the Attribute {{attribute}}. These products should be among the top {{number}} {{status}} products within that category. Here, if the same {{condition}} appears beyond the top {{number}}, include those as well. If a product with the same ID appears more than once, it is a bug, so please ignore it. If the purchase unit price for restocking is 60% of the selling price, how much will the total cost be? Do all calculations in decimals and round the purchase amount to the nearest integer just before answering.",
        "intent": "I want to replenish 40 units of each Simple Product that has the Attribute Bottom. These products should be among the top 50 highest price products within that category. Here, if the same price appears beyond the top 50, include those as well. If a product with the same ID appears more than once, it is a bug, so please ignore it. If the purchase unit price for restocking is 60% of the selling price, how much will the total cost be? Do all calculations in decimals and round the purchase amount to the nearest integer just before answering.",
        "required_obs": "any",
        "type_main": "massive_memory",
        "type_sub": "calc",
        "description": "First, navigate to the Product page. Then, apply filters using attribute and Simple Products. Sort the products to match the specified status. Next, retrieve the prices of the top number products, multiply each by 0.6, sum them up, and finally multiply by 40.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
            "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product/",
            "contents": "107712 |OR| 107,712",
            "attribute": "Bottom",
            "condition": "price",
            "status": "highest price",
            "number": "50",
            "checkpoint_info": "",
            "difficulty": "hard"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "107712 |OR| 107,712"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "107712 |OR| 107,712"
        }
    },
    {
        "task_id": 10192,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
        "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "I want to replenish 40 units of each Simple Product that has the Attribute {{attribute}}. These products should be among the top {{number}} {{status}} products within that category. Here, if the same {{condition}} appears beyond the top {{number}}, include those as well. If a product with the same ID appears more than once, it is a bug, so please ignore it. If the purchase unit price for restocking is 60% of the selling price, how much will the total cost be? Do all calculations in decimals and round the purchase amount to the nearest integer just before answering.",
        "intent": "I want to replenish 40 units of each Simple Product that has the Attribute Top. These products should be among the top 30 lowest price products within that category. Here, if the same price appears beyond the top 30, include those as well. If a product with the same ID appears more than once, it is a bug, so please ignore it. If the purchase unit price for restocking is 60% of the selling price, how much will the total cost be? Do all calculations in decimals and round the purchase amount to the nearest integer just before answering.",
        "required_obs": "any",
        "type_main": "massive_memory",
        "type_sub": "calc",
        "description": "First, navigate to the Product page. Then, apply filters using attribute and Simple Products. Sort the products to match the specified status. Next, retrieve the prices of the top number products, multiply each by 0.6, sum them up, and finally multiply by 40.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
            "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product/",
            "contents": "17040 |OR| 17,040",
            "attribute": "Top",
            "condition": "price",
            "status": "lowest price",
            "number": "30",
            "checkpoint_info": "",
            "difficulty": "medium"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "17040 |OR| 17,040"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "17040 |OR| 17,040"
        }
    },
    {
        "task_id": 10193,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
        "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "I want to replenish 40 units of each Simple Product that has the Attribute {{attribute}}. These products should be among the top {{number}} {{status}} products within that category. Here, if the same {{condition}} appears beyond the top {{number}}, include those as well. If a product with the same ID appears more than once, it is a bug, so please ignore it. If the purchase unit price for restocking is 60% of the selling price, how much will the total cost be? Do all calculations in decimals and round the purchase amount to the nearest integer just before answering.",
        "intent": "I want to replenish 40 units of each Simple Product that has the Attribute Bottom. These products should be among the top 50 oldest last-updated products within that category. Here, if the same updated time appears beyond the top 50, include those as well. If a product with the same ID appears more than once, it is a bug, so please ignore it. If the purchase unit price for restocking is 60% of the selling price, how much will the total cost be? Do all calculations in decimals and round the purchase amount to the nearest integer just before answering.",
        "required_obs": "any",
        "type_main": "massive_memory",
        "type_sub": "calc",
        "description": "First, navigate to the Product page. Then, apply filters using attribute and Simple Products. Sort the products to match the specified status. Next, retrieve the prices of the top number products, multiply each by 0.6, sum them up, and finally multiply by 40.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
            "start_url_lite": "http://172.16.2.4:7780/admin/catalog/product/",
            "contents": "57816 |OR| 57,816",
            "attribute": "Bottom",
            "condition": "updated time",
            "status": "oldest last-updated",
            "number": "50",
            "checkpoint_info": "",
            "difficulty": "hard"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "57816 |OR| 57,816"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "57816 |OR| 57,816"
        }
    },
    {
        "task_id": 10200,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
        "start_url_lite": "http://172.16.2.4:7780/admin/sales/order",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "How many products did {{user1}} purchase in completed orders? Note that the 'Grand Total' on the order page represents the total amount for multiple products purchased in a single transaction.",
        "intent": "How many products did Sarah Miller purchase in completed orders? Note that the 'Grand Total' on the order page represents the total amount for multiple products purchased in a single transaction.",
        "required_obs": "any",
        "type_main": "long-term",
        "description": "First, navigate to the sales order page. Then, retrieve the list of completed orders for user1. After that, click on each order to find out how many products were purchased. Finally, sum the total number of products purchased.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
            "start_url_lite": "http://172.16.2.4:7780/admin/sales/order",
            "contents": "26",
            "user1": "Sarah Miller",
            "checkpoint1": "26",
            "checkpoint_info": "checkpoint1: total products purchased by user1, checkpoint2: total products purchased by user2",
            "difficulty": "hard"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "26"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "26"
        }
    },
    {
        "task_id": 10201,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
        "start_url_lite": "http://172.16.2.4:7780/admin/sales/order",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "How many products did {{user1}} purchase in completed orders? Note that the 'Grand Total' on the order page represents the total amount for multiple products purchased in a single transaction.",
        "intent": "How many products did Adam Garcia purchase in completed orders? Note that the 'Grand Total' on the order page represents the total amount for multiple products purchased in a single transaction.",
        "required_obs": "any",
        "type_main": "long-term",
        "description": "First, navigate to the sales order page. Then, retrieve the list of completed orders for user1. After that, click on each order to find out how many products were purchased. Finally, sum the total number of products purchased.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
            "start_url_lite": "http://172.16.2.4:7780/admin/sales/order",
            "contents": "20",
            "user1": "Adam Garcia",
            "checkpoint1": "20",
            "checkpoint_info": "checkpoint1: total products purchased by user1, checkpoint2: total products purchased by user2",
            "difficulty": "hard"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "20"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "20"
        }
    },
    {
        "task_id": 10202,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
        "start_url_lite": "http://172.16.2.4:7780/admin/sales/order",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "How many products did {{user1}} purchase in completed orders? Note that the 'Grand Total' on the order page represents the total amount for multiple products purchased in a single transaction.",
        "intent": "How many products did Grace Nguyen purchase in completed orders? Note that the 'Grand Total' on the order page represents the total amount for multiple products purchased in a single transaction.",
        "required_obs": "any",
        "type_main": "long-term",
        "description": "First, navigate to the sales order page. Then, retrieve the list of completed orders for user1. After that, click on each order to find out how many products were purchased. Finally, sum the total number of products purchased.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard",
            "start_url_lite": "http://172.16.2.4:7780/admin/sales/order",
            "contents": "24",
            "user1": "Grace Nguyen",
            "checkpoint1": "24",
            "checkpoint_info": "checkpoint1: total products purchased by user1, checkpoint2: total products purchased by user2",
            "difficulty": "hard"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "24"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "24"
        }
    },
    {
        "task_id": 10210,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_review/product/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "What is the average rating (including pending and not approved review) for all {{product_type}} products? The product name must contain '{{product_type}}'. The rating is a value between 0 and 100 and final answer should be rounded to the nearest integer. Final answer should be 'average_rating' without reasoning.",
        "intent": "What is the average rating (including pending and not approved review) for all hoodie products? The product name must contain 'hoodie'. The rating is a value between 0 and 100 and final answer should be rounded to the nearest integer. Final answer should be 'average_rating' without reasoning.",
        "required_obs": "any",
        "type_main": "calc",
        "type_sub": "",
        "description": "Go to Reports -> Reviews -> By Products, and search for {{product type}} in the Product field. Then, calculate the weighted average using the average rating and the number of reviews of the search results.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_review/product/",
            "product_type": "hoodie",
            "contents": "71",
            "checkpoint1": "",
            "checkpoint2": "",
            "checkpoint_info": "",
            "difficulty": "medium"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "71"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "71"
        }
    },
    {
        "task_id": 10211,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_review/product/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "What is the average rating (including pending and not approved review) for all {{product_type}} products? The product name must contain '{{product_type}}'. The rating is a value between 0 and 100 and final answer should be rounded to the nearest integer. Final answer should be 'average_rating' without reasoning.",
        "intent": "What is the average rating (including pending and not approved review) for all bra products? The product name must contain 'bra'. The rating is a value between 0 and 100 and final answer should be rounded to the nearest integer. Final answer should be 'average_rating' without reasoning.",
        "required_obs": "any",
        "type_main": "calc",
        "type_sub": "",
        "description": "Go to Reports -> Reviews -> By Products, and search for {{product type}} in the Product field. Then, calculate the weighted average using the average rating and the number of reviews of the search results.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_review/product/",
            "product_type": "bra",
            "contents": "61",
            "checkpoint1": "",
            "checkpoint2": "",
            "checkpoint_info": "",
            "difficulty": "medium"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "61"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "61"
        }
    },
    {
        "task_id": 10212,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_review/product/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "What is the average rating (including pending and not approved review) for all {{product_type}} products? The product name must contain '{{product_type}}'. The rating is a value between 0 and 100 and final answer should be rounded to the nearest integer. Final answer should be 'average_rating' without reasoning.",
        "intent": "What is the average rating (including pending and not approved review) for all bag products? The product name must contain 'bag'. The rating is a value between 0 and 100 and final answer should be rounded to the nearest integer. Final answer should be 'average_rating' without reasoning.",
        "required_obs": "any",
        "type_main": "calc",
        "type_sub": "",
        "description": "Go to Reports -> Reviews -> By Products, and search for {{product type}} in the Product field. Then, calculate the weighted average using the average rating and the number of reviews of the search results.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_review/product/",
            "product_type": "bag",
            "contents": "64",
            "checkpoint1": "",
            "checkpoint2": "",
            "checkpoint_info": "",
            "difficulty": "medium"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "64"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "64"
        }
    },
    {
        "task_id": 10213,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_review/product/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "What is the average rating (including pending and not approved review) for all {{product_type}} products? The product name must contain '{{product_type}}'. The rating is a value between 0 and 100 and final answer should be rounded to the nearest integer. Final answer should be 'average_rating' without reasoning.",
        "intent": "What is the average rating (including pending and not approved review) for all tank products? The product name must contain 'tank'. The rating is a value between 0 and 100 and final answer should be rounded to the nearest integer. Final answer should be 'average_rating' without reasoning.",
        "required_obs": "any",
        "type_main": "calc",
        "type_sub": "",
        "description": "Go to Reports -> Reviews -> By Products, and search for {{product type}} in the Product field. Then, calculate the weighted average using the average rating and the number of reviews of the search results.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_review/product/",
            "product_type": "tank",
            "contents": "65",
            "checkpoint1": "",
            "checkpoint2": "",
            "checkpoint_info": "",
            "difficulty": "medium"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "65"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "65"
        }
    },
    {
        "task_id": 10214,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_review/product/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "What is the average rating (including pending and not approved review) for all {{product_type}} products? The product name must contain '{{product_type}}'. The rating is a value between 0 and 100 and final answer should be rounded to the nearest integer. Final answer should be 'average_rating' without reasoning.",
        "intent": "What is the average rating (including pending and not approved review) for all jacket products? The product name must contain 'jacket'. The rating is a value between 0 and 100 and final answer should be rounded to the nearest integer. Final answer should be 'average_rating' without reasoning.",
        "required_obs": "any",
        "type_main": "calc",
        "type_sub": "",
        "description": "Go to Reports -> Reviews -> By Products, and search for {{product type}} in the Product field. Then, calculate the weighted average using the average rating and the number of reviews of the search results.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_review/product/",
            "product_type": "jacket",
            "contents": "75",
            "checkpoint1": "",
            "checkpoint2": "",
            "checkpoint_info": "",
            "difficulty": "hard"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "75"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "75"
        }
    },
    {
        "task_id": 10220,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/review/product/index/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "What is the average price of the product(s) that received a review (any statuses are acceptable) stating it is {{adj}}? The review must contain the word '{{adj}}' and should not include contexts where it is stated as not {{adj}}. Answer should be rounded to the nearest integer. Final answer should be 'average_price' without reasoning. If there is no applicable review, answer 'N/A'",
        "intent": "What is the average price of the product(s) that received a review (any statuses are acceptable) stating it is expensive? The review must contain the word 'expensive' and should not include contexts where it is stated as not expensive. Answer should be rounded to the nearest integer. Final answer should be 'average_price' without reasoning. If there is no applicable review, answer 'N/A'",
        "required_obs": "any",
        "type_main": "long-term",
        "type_sub": "calc",
        "description": "Go to Marketing -> User Content -> All Reviews, and search for {{adj}} in the Review field. Click the Edit button to view the content of each review in the search results and confirm that the review is not actually using {{adj}} in a negative context. Finally, search for the prices of the corresponding products in Catalog -> Products, and calculate the average.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/review/product/index/",
            "adj": "expensive",
            "contents": "19",
            "checkpoint1": "Quest Lumaflex™ Band",
            "checkpoint2": "",
            "checkpoint_info": "",
            "difficulty": "medium"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "19"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "19"
        }
    },
    {
        "task_id": 10221,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/review/product/index/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "What is the average price of the product(s) that received a review (any statuses are acceptable) stating it is {{adj}}? The review must contain the word '{{adj}}' and should not include contexts where it is stated as not {{adj}}. Answer should be rounded to the nearest integer. Final answer should be 'average_price' without reasoning. If there is no applicable review, answer 'N/A'",
        "intent": "What is the average price of the product(s) that received a review (any statuses are acceptable) stating it is bulky? The review must contain the word 'bulky' and should not include contexts where it is stated as not bulky. Answer should be rounded to the nearest integer. Final answer should be 'average_price' without reasoning. If there is no applicable review, answer 'N/A'",
        "required_obs": "any",
        "type_main": "long-term",
        "type_sub": "calc",
        "description": "Go to Marketing -> User Content -> All Reviews, and search for {{adj}} in the Review field. Click the Edit button to view the content of each review in the search results and confirm that the review is not actually using {{adj}} in a negative context. Finally, search for the prices of the corresponding products in Catalog -> Products, and calculate the average.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/review/product/index/",
            "adj": "bulky",
            "contents": "62",
            "checkpoint1": "",
            "checkpoint2": "",
            "checkpoint_info": "",
            "difficulty": "hard"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "62"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "62"
        }
    },
    {
        "task_id": 10222,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/review/product/index/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "What is the average price of the product(s) that received a review (any statuses are acceptable) stating it is {{adj}}? The review must contain the word '{{adj}}' and should not include contexts where it is stated as not {{adj}}. Answer should be rounded to the nearest integer. Final answer should be 'average_price' without reasoning. If there is no applicable review, answer 'N/A'",
        "intent": "What is the average price of the product(s) that received a review (any statuses are acceptable) stating it is high quality? The review must contain the word 'high quality' and should not include contexts where it is stated as not high quality. Answer should be rounded to the nearest integer. Final answer should be 'average_price' without reasoning. If there is no applicable review, answer 'N/A'",
        "required_obs": "any",
        "type_main": "long-term",
        "type_sub": "calc",
        "description": "Go to Marketing -> User Content -> All Reviews, and search for {{adj}} in the Review field. Click the Edit button to view the content of each review in the search results and confirm that the review is not actually using {{adj}} in a negative context. Finally, search for the prices of the corresponding products in Catalog -> Products, and calculate the average.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/review/product/index/",
            "adj": "high quality",
            "contents": "N/A",
            "checkpoint1": "",
            "checkpoint2": "",
            "checkpoint_info": "",
            "difficulty": "medium"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "N/A"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "N/A"
        }
    },
    {
        "task_id": 10223,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/review/product/index/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "What is the average price of the product(s) that received a review (any statuses are acceptable) stating it is {{adj}}? The review must contain the word '{{adj}}' and should not include contexts where it is stated as not {{adj}}. Answer should be rounded to the nearest integer. Final answer should be 'average_price' without reasoning. If there is no applicable review, answer 'N/A'",
        "intent": "What is the average price of the product(s) that received a review (any statuses are acceptable) stating it is versatile? The review must contain the word 'versatile' and should not include contexts where it is stated as not versatile. Answer should be rounded to the nearest integer. Final answer should be 'average_price' without reasoning. If there is no applicable review, answer 'N/A'",
        "required_obs": "any",
        "type_main": "long-term",
        "type_sub": "calc",
        "description": "Go to Marketing -> User Content -> All Reviews, and search for {{adj}} in the Review field. Click the Edit button to view the content of each review in the search results and confirm that the review is not actually using {{adj}} in a negative context. Finally, search for the prices of the corresponding products in Catalog -> Products, and calculate the average.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/review/product/index/",
            "adj": "versatile",
            "contents": "49 |OR| 50",
            "checkpoint1": "",
            "checkpoint2": "",
            "checkpoint_info": "",
            "difficulty": "medium"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "49 |OR| 50"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "49 |OR| 50"
        }
    },
    {
        "task_id": 10224,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/review/product/index/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "What is the average price of the product(s) that received a review (any statuses are acceptable) stating it is {{adj}}? The review must contain the word '{{adj}}' and should not include contexts where it is stated as not {{adj}}. Answer should be rounded to the nearest integer. Final answer should be 'average_price' without reasoning. If there is no applicable review, answer 'N/A'",
        "intent": "What is the average price of the product(s) that received a review (any statuses are acceptable) stating it is lightweight? The review must contain the word 'lightweight' and should not include contexts where it is stated as not lightweight. Answer should be rounded to the nearest integer. Final answer should be 'average_price' without reasoning. If there is no applicable review, answer 'N/A'",
        "required_obs": "any",
        "type_main": "long-term",
        "type_sub": "calc",
        "description": "Go to Marketing -> User Content -> All Reviews, and search for {{adj}} in the Review field. Click the Edit button to view the content of each review in the search results and confirm that the review is not actually using {{adj}} in a negative context. Finally, search for the prices of the corresponding products in Catalog -> Products, and calculate the average.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/review/product/index/",
            "adj": "lightweight",
            "contents": "51",
            "checkpoint1": "",
            "checkpoint2": "",
            "checkpoint_info": "",
            "difficulty": "hard"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "51"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "51"
        }
    },
    {
        "task_id": 10230,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_sales/bestsellers/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "List all the dates in {{month}} when the best-selling product(s) of that month were sold. If there are multiple best-selling products, include the dates for all of them. The date should be an integer without including the month or year. Final answer should be in the format '[date1, date2, ...]' without reasoning, sorted in chronological order.",
        "intent": "List all the dates in March, 2022 when the best-selling product(s) of that month were sold. If there are multiple best-selling products, include the dates for all of them. The date should be an integer without including the month or year. Final answer should be in the format '[date1, date2, ...]' without reasoning, sorted in chronological order.",
        "required_obs": "any",
        "type_main": "long-term",
        "type_sub": "",
        "description": "Go to Reports -> Products -> Bestsellers, set the Period to Month, and retrieve the best-selling products for {{month}}. Then, check the dates on which each product was sold during {{month}} in Reports -> Products -> Ordered.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_sales/bestsellers/",
            "month": "March, 2022",
            "contents": "[12, 22]",
            "checkpoint1": "Quest Lumaflex™ Band",
            "checkpoint2": "",
            "checkpoint_info": "",
            "difficulty": "medium"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "[12, 22]"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "[12, 22]"
        }
    },
    {
        "task_id": 10231,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_sales/bestsellers/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "List all the dates in {{month}} when the best-selling product(s) of that month were sold. If there are multiple best-selling products, include the dates for all of them. The date should be an integer without including the month or year. Final answer should be in the format '[date1, date2, ...]' without reasoning, sorted in chronological order.",
        "intent": "List all the dates in April, 2022 when the best-selling product(s) of that month were sold. If there are multiple best-selling products, include the dates for all of them. The date should be an integer without including the month or year. Final answer should be in the format '[date1, date2, ...]' without reasoning, sorted in chronological order.",
        "required_obs": "any",
        "type_main": "long-term",
        "type_sub": "",
        "description": "Go to Reports -> Products -> Bestsellers, set the Period to Month, and retrieve the best-selling products for {{month}}. Then, check the dates on which each product was sold during {{month}} in Reports -> Products -> Ordered.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_sales/bestsellers/",
            "month": "April, 2022",
            "contents": "[23]",
            "checkpoint1": "Hera Pullover Hoodie-XS-Green",
            "checkpoint2": "",
            "checkpoint_info": "",
            "difficulty": "medium"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "[23]"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "[23]"
        }
    },
    {
        "task_id": 10232,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_sales/bestsellers/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "List all the dates in {{month}} when the best-selling product(s) of that month were sold. If there are multiple best-selling products, include the dates for all of them. The date should be an integer without including the month or year. Final answer should be in the format '[date1, date2, ...]' without reasoning, sorted in chronological order.",
        "intent": "List all the dates in June, 2022 when the best-selling product(s) of that month were sold. If there are multiple best-selling products, include the dates for all of them. The date should be an integer without including the month or year. Final answer should be in the format '[date1, date2, ...]' without reasoning, sorted in chronological order.",
        "required_obs": "any",
        "type_main": "long-term",
        "type_sub": "",
        "description": "Go to Reports -> Products -> Bestsellers, set the Period to Month, and retrieve the best-selling products for {{month}}. Then, check the dates on which each product was sold during {{month}} in Reports -> Products -> Ordered.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_sales/bestsellers/",
            "month": "June, 2022",
            "contents": "[2, 14, 18]",
            "checkpoint1": "Affirm Water Bottle, Harmony Lumaflex™ Strength Band Kit",
            "checkpoint2": "",
            "checkpoint_info": "",
            "difficulty": "medium"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "[2, 14, 18]"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "[2, 14, 18]"
        }
    },
    {
        "task_id": 10233,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_sales/bestsellers/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "List all the dates in {{month}} when the best-selling product(s) of that month were sold. If there are multiple best-selling products, include the dates for all of them. The date should be an integer without including the month or year. Final answer should be in the format '[date1, date2, ...]' without reasoning, sorted in chronological order.",
        "intent": "List all the dates in February, 2022 when the best-selling product(s) of that month were sold. If there are multiple best-selling products, include the dates for all of them. The date should be an integer without including the month or year. Final answer should be in the format '[date1, date2, ...]' without reasoning, sorted in chronological order.",
        "required_obs": "any",
        "type_main": "long-term",
        "type_sub": "",
        "description": "Go to Reports -> Products -> Bestsellers, set the Period to Month, and retrieve the best-selling products for {{month}}. Then, check the dates on which each product was sold during {{month}} in Reports -> Products -> Ordered.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_sales/bestsellers/",
            "month": "February, 2022",
            "contents": "[3, 4, 6, 8, 19]",
            "checkpoint1": "Dash Digital Watch, Sprite Yoga Strap 6 foot, Sprite Yoga Strap 8 foot",
            "checkpoint2": "",
            "checkpoint_info": "",
            "difficulty": "hard"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "[3, 4, 6, 8, 19]"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "[3, 4, 6, 8, 19]"
        }
    },
    {
        "task_id": 10234,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_sales/bestsellers/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "List all the dates in {{month}} when the best-selling product(s) of that month were sold. If there are multiple best-selling products, include the dates for all of them. The date should be an integer without including the month or year. Final answer should be in the format '[date1, date2, ...]' without reasoning, sorted in chronological order.",
        "intent": "List all the dates in May, 2022 when the best-selling product(s) of that month were sold. If there are multiple best-selling products, include the dates for all of them. The date should be an integer without including the month or year. Final answer should be in the format '[date1, date2, ...]' without reasoning, sorted in chronological order.",
        "required_obs": "any",
        "type_main": "long-term",
        "type_sub": "",
        "description": "Go to Reports -> Products -> Bestsellers, set the Period to Month, and retrieve the best-selling products for {{month}}. Then, check the dates on which each product was sold during {{month}} in Reports -> Products -> Ordered.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_sales/bestsellers/",
            "month": "May, 2022",
            "contents": "[2, 12, 15, 16, 20, 29]",
            "checkpoint1": "",
            "checkpoint2": "",
            "checkpoint_info": "",
            "difficulty": "hard"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "[2, 12, 15, 16, 20, 29]"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "[2, 12, 15, 16, 20, 29]"
        }
    },
    {
        "task_id": 10240,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_sales/sales/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "List all the customer names (bill-to) who placed the highest {{attribute}} order(s) in each month of the {{number}} half of 2022, where the {{attribute}} of completed orders exceeded the monthly average for that year. The final answer should be in the format '[name1, name2, ...]' without reasoning, where names are sorted based on the descending order of their corresponding month's {{attribute}}. If multiple months have the same {{attribute}}, those months should be ordered chronologically. If multiple customers placed the highest {{attribute}} order in the same month, their names should be sorted alphabetically.",
        "intent": "List all the customer names (bill-to) who placed the highest sales total order(s) in each month of the first half of 2022, where the sales total of completed orders exceeded the monthly average for that year. The final answer should be in the format '[name1, name2, ...]' without reasoning, where names are sorted based on the descending order of their corresponding month's sales total. If multiple months have the same sales total, those months should be ordered chronologically. If multiple customers placed the highest sales total order in the same month, their names should be sorted alphabetically.",
        "required_obs": "any",
        "type_main": "long-term",
        "type_sub": "calc",
        "description": "Go to Reports -> Sales -> Orders, set the Period to Month and the Order Status to Specified -> Complete, and display the Orders Report for 2022. Use this to calculate the monthly average of {{attribute}}, and identify the months that meet the criteria. Next, change the Period to Day, display the report for each of those months, and obtain candidate dates for the highest {{attribute}} order(s). Finally, go to Sales -> Orders, check the orders on each of those dates, and retrieve the customer name(s) associated with the highest {{attribute}} order(s).",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_sales/sales/",
            "number": "first",
            "attribute": "sales total",
            "contents": "[Grace Nguyen, Lily Potter, Samantha Jones, Jennifer White]",
            "checkpoint1": "",
            "checkpoint2": "",
            "checkpoint_info": "",
            "difficulty": "hard"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "[Grace Nguyen, Lily Potter, Samantha Jones, Jennifer White]"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "[Grace Nguyen, Lily Potter, Samantha Jones, Jennifer White]"
        }
    },
    {
        "task_id": 10241,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_sales/sales/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "List all the customer names (bill-to) who placed the highest {{attribute}} order(s) in each month of the {{number}} half of 2022, where the {{attribute}} of completed orders exceeded the monthly average for that year. The final answer should be in the format '[name1, name2, ...]' without reasoning, where names are sorted based on the descending order of their corresponding month's {{attribute}}. If multiple months have the same {{attribute}}, those months should be ordered chronologically. If multiple customers placed the highest {{attribute}} order in the same month, their names should be sorted alphabetically.",
        "intent": "List all the customer names (bill-to) who placed the highest sales total order(s) in each month of the second half of 2022, where the sales total of completed orders exceeded the monthly average for that year. The final answer should be in the format '[name1, name2, ...]' without reasoning, where names are sorted based on the descending order of their corresponding month's sales total. If multiple months have the same sales total, those months should be ordered chronologically. If multiple customers placed the highest sales total order in the same month, their names should be sorted alphabetically.",
        "required_obs": "any",
        "type_main": "long-term",
        "type_sub": "calc",
        "description": "Go to Reports -> Sales -> Orders, set the Period to Month and the Order Status to Specified -> Complete, and display the Orders Report for 2022. Use this to calculate the monthly average of {{attribute}}, and identify the months that meet the criteria. Next, change the Period to Day, display the report for each of those months, and obtain candidate dates for the highest {{attribute}} order(s). Finally, go to Sales -> Orders, check the orders on each of those dates, and retrieve the customer name(s) associated with the highest {{attribute}} order(s).",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_sales/sales/",
            "number": "second",
            "attribute": "sales total",
            "contents": "[Lucy Garcia, Lily Potter]",
            "checkpoint1": "",
            "checkpoint2": "",
            "checkpoint_info": "",
            "difficulty": "hard"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "[Lucy Garcia, Lily Potter]"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "[Lucy Garcia, Lily Potter]"
        }
    },
    {
        "task_id": 10242,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_sales/sales/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "List all the customer names (bill-to) who placed the highest {{attribute}} order(s) in each month of the {{number}} half of 2022, where the {{attribute}} of completed orders exceeded the monthly average for that year. The final answer should be in the format '[name1, name2, ...]' without reasoning, where names are sorted based on the descending order of their corresponding month's {{attribute}}. If multiple months have the same {{attribute}}, those months should be ordered chronologically. If multiple customers placed the highest {{attribute}} order in the same month, their names should be sorted alphabetically.",
        "intent": "List all the customer names (bill-to) who placed the highest sales items order(s) in each month of the first half of 2022, where the sales items of completed orders exceeded the monthly average for that year. The final answer should be in the format '[name1, name2, ...]' without reasoning, where names are sorted based on the descending order of their corresponding month's sales items. If multiple months have the same sales items, those months should be ordered chronologically. If multiple customers placed the highest sales items order in the same month, their names should be sorted alphabetically.",
        "required_obs": "any",
        "type_main": "long-term",
        "type_sub": "calc",
        "description": "Go to Reports -> Sales -> Orders, set the Period to Month and the Order Status to Specified -> Complete, and display the Orders Report for 2022. Use this to calculate the monthly average of {{attribute}}, and identify the months that meet the criteria. Next, change the Period to Day, display the report for each of those months, and obtain candidate dates for the highest {{attribute}} order(s). Finally, go to Sales -> Orders, check the orders on each of those dates, and retrieve the customer name(s) associated with the highest {{attribute}} order(s).",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_sales/sales/",
            "number": "first",
            "attribute": "sales items",
            "contents": "[Alex Johnson, Grace Nguyen, Michael Nguyen, Grace Nguyen, Jane Smith, Katie Wong, Lily Potter, Alex Martin, Samantha Jones, Jennifer White, Lily Potter]",
            "checkpoint1": "",
            "checkpoint2": "",
            "checkpoint_info": "",
            "difficulty": "hard"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "[Alex Johnson, Grace Nguyen, Michael Nguyen, Grace Nguyen, Jane Smith, Katie Wong, Lily Potter, Alex Martin, Samantha Jones, Jennifer White, Lily Potter]"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "[Alex Johnson, Grace Nguyen, Michael Nguyen, Grace Nguyen, Jane Smith, Katie Wong, Lily Potter, Alex Martin, Samantha Jones, Jennifer White, Lily Potter]"
        }
    },
    {
        "task_id": 10243,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_sales/sales/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "List all the customer names (bill-to) who placed the highest {{attribute}} order(s) in each month of the {{number}} half of 2022, where the {{attribute}} of completed orders exceeded the monthly average for that year. The final answer should be in the format '[name1, name2, ...]' without reasoning, where names are sorted based on the descending order of their corresponding month's {{attribute}}. If multiple months have the same {{attribute}}, those months should be ordered chronologically. If multiple customers placed the highest {{attribute}} order in the same month, their names should be sorted alphabetically.",
        "intent": "List all the customer names (bill-to) who placed the highest sales items order(s) in each month of the second half of 2022, where the sales items of completed orders exceeded the monthly average for that year. The final answer should be in the format '[name1, name2, ...]' without reasoning, where names are sorted based on the descending order of their corresponding month's sales items. If multiple months have the same sales items, those months should be ordered chronologically. If multiple customers placed the highest sales items order in the same month, their names should be sorted alphabetically.",
        "required_obs": "any",
        "type_main": "long-term",
        "type_sub": "calc",
        "description": "Go to Reports -> Sales -> Orders, set the Period to Month and the Order Status to Specified -> Complete, and display the Orders Report for 2022. Use this to calculate the monthly average of {{attribute}}, and identify the months that meet the criteria. Next, change the Period to Day, display the report for each of those months, and obtain candidate dates for the highest {{attribute}} order(s). Finally, go to Sales -> Orders, check the orders on each of those dates, and retrieve the customer name(s) associated with the highest {{attribute}} order(s).",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_sales/sales/",
            "number": "second",
            "attribute": "sales items",
            "contents": "[Lucy Garcia, Mary Martin, Olivia Lee, Samantha Jones]",
            "checkpoint1": "",
            "checkpoint2": "",
            "checkpoint_info": "",
            "difficulty": "hard"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "[Lucy Garcia, Mary Martin, Olivia Lee, Samantha Jones]"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "[Lucy Garcia, Mary Martin, Olivia Lee, Samantha Jones]"
        }
    },
    {
        "task_id": 10244,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_sales/sales/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "List all the customer names (bill-to) who placed the highest {{attribute}} order(s) in each month of the {{number}} half of 2022, where the {{attribute}} of completed orders exceeded the monthly average for that year. The final answer should be in the format '[name1, name2, ...]' without reasoning, where names are sorted based on the descending order of their corresponding month's {{attribute}}. If multiple months have the same {{attribute}}, those months should be ordered chronologically. If multiple customers placed the highest {{attribute}} order in the same month, their names should be sorted alphabetically.",
        "intent": "List all the customer names (bill-to) who placed the highest sales shipping order(s) in each month of the first half of 2022, where the sales shipping of completed orders exceeded the monthly average for that year. The final answer should be in the format '[name1, name2, ...]' without reasoning, where names are sorted based on the descending order of their corresponding month's sales shipping. If multiple months have the same sales shipping, those months should be ordered chronologically. If multiple customers placed the highest sales shipping order in the same month, their names should be sorted alphabetically.",
        "required_obs": "any",
        "type_main": "long-term",
        "type_sub": "calc",
        "description": "Go to Reports -> Sales -> Orders, set the Period to Month and the Order Status to Specified -> Complete, and display the Orders Report for 2022. Use this to calculate the monthly average of {{attribute}}, and identify the months that meet the criteria. Next, change the Period to Day, display the report for each of those months, and obtain candidate dates for the highest {{attribute}} order(s). Finally, go to Sales -> Orders, check the orders on each of those dates, and retrieve the customer name(s) associated with the highest {{attribute}} order(s).",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_sales/sales/",
            "number": "first",
            "attribute": "sales shipping",
            "contents": "[Alex Johnson, Grace Nguyen, Michael Nguyen, Grace Nguyen, Jane Smith, Katie Wong, Lily Potter, Alex Martin, Samantha Jones, Jennifer White, Lily Potter]",
            "checkpoint1": "",
            "checkpoint2": "",
            "checkpoint_info": "",
            "difficulty": "hard"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "[Alex Johnson, Grace Nguyen, Michael Nguyen, Grace Nguyen, Jane Smith, Katie Wong, Lily Potter, Alex Martin, Samantha Jones, Jennifer White, Lily Potter]"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "[Alex Johnson, Grace Nguyen, Michael Nguyen, Grace Nguyen, Jane Smith, Katie Wong, Lily Potter, Alex Martin, Samantha Jones, Jennifer White, Lily Potter]"
        }
    },
    {
        "task_id": 10245,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_sales/sales/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "List all the customer names (bill-to) who placed the highest {{attribute}} order(s) in each month of the {{number}} half of 2022, where the {{attribute}} of completed orders exceeded the monthly average for that year. The final answer should be in the format '[name1, name2, ...]' without reasoning, where names are sorted based on the descending order of their corresponding month's {{attribute}}. If multiple months have the same {{attribute}}, those months should be ordered chronologically. If multiple customers placed the highest {{attribute}} order in the same month, their names should be sorted alphabetically.",
        "intent": "List all the customer names (bill-to) who placed the highest sales shipping order(s) in each month of the second half of 2022, where the sales shipping of completed orders exceeded the monthly average for that year. The final answer should be in the format '[name1, name2, ...]' without reasoning, where names are sorted based on the descending order of their corresponding month's sales shipping. If multiple months have the same sales shipping, those months should be ordered chronologically. If multiple customers placed the highest sales shipping order in the same month, their names should be sorted alphabetically.",
        "required_obs": "any",
        "type_main": "long-term",
        "type_sub": "calc",
        "description": "Go to Reports -> Sales -> Orders, set the Period to Month and the Order Status to Specified -> Complete, and display the Orders Report for 2022. Use this to calculate the monthly average of {{attribute}}, and identify the months that meet the criteria. Next, change the Period to Day, display the report for each of those months, and obtain candidate dates for the highest {{attribute}} order(s). Finally, go to Sales -> Orders, check the orders on each of those dates, and retrieve the customer name(s) associated with the highest {{attribute}} order(s).",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_sales/sales/",
            "number": "second",
            "attribute": "sales shipping",
            "contents": "[Lucy Garcia, Mary Martin, Olivia Lee, Samantha Nguyen, Samantha Jones]",
            "checkpoint1": "",
            "checkpoint2": "",
            "checkpoint_info": "",
            "difficulty": "hard"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "[Lucy Garcia, Mary Martin, Olivia Lee, Samantha Nguyen, Samantha Jones]"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "[Lucy Garcia, Mary Martin, Olivia Lee, Samantha Nguyen, Samantha Jones]"
        }
    },
    {
        "task_id": 10250,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "What is the total discount amount for both catalog price discount and cart price discount for complete orders during {{period}}? Make sure to check what kind of sales are applied to both the catalog price and cart price. Amount should not be rounded and do not output trailing zeros after the decimal point. Final answer should be in the format '[discount_amount_catalog_price, discount_amount_cart_price]' without reasoning.",
        "intent": "What is the total discount amount for both catalog price discount and cart price discount for complete orders during from February 1, 2022 to February 10, 2022? Make sure to check what kind of sales are applied to both the catalog price and cart price. Amount should not be rounded and do not output trailing zeros after the decimal point. Final answer should be in the format '[discount_amount_catalog_price, discount_amount_cart_price]' without reasoning.",
        "required_obs": "any",
        "type_main": "long-term",
        "type_sub": "calc",
        "description": "As seen in Marketing -> Catalog Price Rule and Marketing -> Cart Price Rules, pants are discounted by 20%, and purchases over $200 also receive a 20% discount. Go to Sales -> Orders, set the Status to Complete, and apply filtering based on Purchase Date. Check the discounts for each order and calculate the total. Note that the difference between Original Price and Price in the Items Ordered section corresponds to the catalog price discount, while the Discount in the Order Totals section corresponds to the cart price discount.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
            "period": "from February 1, 2022 to February 10, 2022",
            "contents": "[96.4, 64.12]",
            "checkpoint1": "",
            "checkpoint2": "",
            "checkpoint_info": "",
            "difficulty": "hard"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "[96.4, 64.12]"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "[96.4, 64.12]"
        }
    },
    {
        "task_id": 10251,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "What is the total discount amount for both catalog price discount and cart price discount for complete orders during {{period}}? Make sure to check what kind of sales are applied to both the catalog price and cart price. Amount should not be rounded and do not output trailing zeros after the decimal point. Final answer should be in the format '[discount_amount_catalog_price, discount_amount_cart_price]' without reasoning.",
        "intent": "What is the total discount amount for both catalog price discount and cart price discount for complete orders during from July 10, 2022 to July 20, 2022? Make sure to check what kind of sales are applied to both the catalog price and cart price. Amount should not be rounded and do not output trailing zeros after the decimal point. Final answer should be in the format '[discount_amount_catalog_price, discount_amount_cart_price]' without reasoning.",
        "required_obs": "any",
        "type_main": "long-term",
        "type_sub": "calc",
        "description": "As seen in Marketing -> Catalog Price Rule and Marketing -> Cart Price Rules, pants are discounted by 20%, and purchases over $200 also receive a 20% discount. Go to Sales -> Orders, set the Status to Complete, and apply filtering based on Purchase Date. Check the discounts for each order and calculate the total. Note that the difference between Original Price and Price in the Items Ordered section corresponds to the catalog price discount, while the Discount in the Order Totals section corresponds to the cart price discount.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
            "period": "from July 10, 2022 to July 20, 2022",
            "contents": "[12.6, 45.28]",
            "checkpoint1": "",
            "checkpoint2": "",
            "checkpoint_info": "",
            "difficulty": "hard"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "[12.6, 45.28]"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "[12.6, 45.28]"
        }
    },
    {
        "task_id": 10252,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "What is the total discount amount for both catalog price discount and cart price discount for complete orders during {{period}}? Make sure to check what kind of sales are applied to both the catalog price and cart price. Amount should not be rounded and do not output trailing zeros after the decimal point. Final answer should be in the format '[discount_amount_catalog_price, discount_amount_cart_price]' without reasoning.",
        "intent": "What is the total discount amount for both catalog price discount and cart price discount for complete orders during from June 20, 2022 to June 30, 2022? Make sure to check what kind of sales are applied to both the catalog price and cart price. Amount should not be rounded and do not output trailing zeros after the decimal point. Final answer should be in the format '[discount_amount_catalog_price, discount_amount_cart_price]' without reasoning.",
        "required_obs": "any",
        "type_main": "long-term",
        "type_sub": "calc",
        "description": "As seen in Marketing -> Catalog Price Rule and Marketing -> Cart Price Rules, pants are discounted by 20%, and purchases over $200 also receive a 20% discount. Go to Sales -> Orders, set the Status to Complete, and apply filtering based on Purchase Date. Check the discounts for each order and calculate the total. Note that the difference between Original Price and Price in the Items Ordered section corresponds to the catalog price discount, while the Discount in the Order Totals section corresponds to the cart price discount.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
            "period": "from June 20, 2022 to June 30, 2022",
            "contents": "[23.2, 20.96]",
            "checkpoint1": "",
            "checkpoint2": "",
            "checkpoint_info": "",
            "difficulty": "hard"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "[23.2, 20.96]"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "[23.2, 20.96]"
        }
    },
    {
        "task_id": 10253,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "What is the total discount amount for both catalog price discount and cart price discount for complete orders during {{period}}? Make sure to check what kind of sales are applied to both the catalog price and cart price. Amount should not be rounded and do not output trailing zeros after the decimal point. Final answer should be in the format '[discount_amount_catalog_price, discount_amount_cart_price]' without reasoning.",
        "intent": "What is the total discount amount for both catalog price discount and cart price discount for complete orders during from January 1, 2023 to January 10, 2023? Make sure to check what kind of sales are applied to both the catalog price and cart price. Amount should not be rounded and do not output trailing zeros after the decimal point. Final answer should be in the format '[discount_amount_catalog_price, discount_amount_cart_price]' without reasoning.",
        "required_obs": "any",
        "type_main": "long-term",
        "type_sub": "calc",
        "description": "As seen in Marketing -> Catalog Price Rule and Marketing -> Cart Price Rules, pants are discounted by 20%, and purchases over $200 also receive a 20% discount. Go to Sales -> Orders, set the Status to Complete, and apply filtering based on Purchase Date. Check the discounts for each order and calculate the total. Note that the difference between Original Price and Price in the Items Ordered section corresponds to the catalog price discount, while the Discount in the Order Totals section corresponds to the cart price discount.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
            "period": "from January 1, 2023 to January 10, 2023",
            "contents": "[27.4, 45.6]",
            "checkpoint1": "",
            "checkpoint2": "",
            "checkpoint_info": "",
            "difficulty": "hard"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "[27.4, 45.6]"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "[27.4, 45.6]"
        }
    },
    {
        "task_id": 10254,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "What is the total discount amount for both catalog price discount and cart price discount for complete orders during {{period}}? Make sure to check what kind of sales are applied to both the catalog price and cart price. Amount should not be rounded and do not output trailing zeros after the decimal point. Final answer should be in the format '[discount_amount_catalog_price, discount_amount_cart_price]' without reasoning.",
        "intent": "What is the total discount amount for both catalog price discount and cart price discount for complete orders during from January 10, 2023 to January 20, 2023? Make sure to check what kind of sales are applied to both the catalog price and cart price. Amount should not be rounded and do not output trailing zeros after the decimal point. Final answer should be in the format '[discount_amount_catalog_price, discount_amount_cart_price]' without reasoning.",
        "required_obs": "any",
        "type_main": "long-term",
        "type_sub": "calc",
        "description": "As seen in Marketing -> Catalog Price Rule and Marketing -> Cart Price Rules, pants are discounted by 20%, and purchases over $200 also receive a 20% discount. Go to Sales -> Orders, set the Status to Complete, and apply filtering based on Purchase Date. Check the discounts for each order and calculate the total. Note that the difference between Original Price and Price in the Items Ordered section corresponds to the catalog price discount, while the Discount in the Order Totals section corresponds to the cart price discount.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
            "period": "from January 10, 2023 to January 20, 2023",
            "contents": "[13.2, 172.01]",
            "checkpoint1": "",
            "checkpoint2": "",
            "checkpoint_info": "",
            "difficulty": "hard"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "[13.2, 172.01]"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "[13.2, 172.01]"
        }
    },
    {
        "task_id": 10260,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_review/product/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "Tell me the date of the most recent completed order for the product(s) with the {{number}}highest average approved rating. If multiple products have the same average rating, provide the date of the oldest order among their most recent completed orders. Exclude product(s) that have reviews but have never been ordered. Note that all orders were placed between January 8, 2022, and May 31, 2023. The date should be in the format month/day/year (e.g., 01/01/20). Final answer should be in the format '[product_name, date]' without reasoning.",
        "intent": "Tell me the date of the most recent completed order for the product(s) with the highest average approved rating. If multiple products have the same average rating, provide the date of the oldest order among their most recent completed orders. Exclude product(s) that have reviews but have never been ordered. Note that all orders were placed between January 8, 2022, and May 31, 2023. The date should be in the format month/day/year (e.g., 01/01/20). Final answer should be in the format '[product_name, date]' without reasoning.",
        "required_obs": "any",
        "type_main": "long-term",
        "type_sub": "others",
        "description": "Go to Reports -> Reviews -> By Product, and sort by the Average (approved) column to retrieve the product(s) with the {{number}}highest average approved rating. Then, go to Reports -> Products -> Ordered, display the report from 1/8/22 to 5/31/23, and identify the most recent order date for those products.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_review/product/",
            "number": "",
            "contents": "[Dual Handle Cardio Ball, 03/10/23]",
            "checkpoint1": "",
            "checkpoint2": "",
            "checkpoint_info": "",
            "difficulty": "medium"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "[Dual Handle Cardio Ball, 03/10/23]"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "[Dual Handle Cardio Ball, 03/10/23]"
        }
    },
    {
        "task_id": 10261,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_review/product/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "Tell me the date of the most recent completed order for the product(s) with the {{number}}highest average approved rating. If multiple products have the same average rating, provide the date of the oldest order among their most recent completed orders. Exclude product(s) that have reviews but have never been ordered. Note that all orders were placed between January 8, 2022, and May 31, 2023. The date should be in the format month/day/year (e.g., 01/01/20). Final answer should be in the format '[product_name, date]' without reasoning.",
        "intent": "Tell me the date of the most recent completed order for the product(s) with the second-highest average approved rating. If multiple products have the same average rating, provide the date of the oldest order among their most recent completed orders. Exclude product(s) that have reviews but have never been ordered. Note that all orders were placed between January 8, 2022, and May 31, 2023. The date should be in the format month/day/year (e.g., 01/01/20). Final answer should be in the format '[product_name, date]' without reasoning.",
        "required_obs": "any",
        "type_main": "long-term",
        "type_sub": "others",
        "description": "Go to Reports -> Reviews -> By Product, and sort by the Average (approved) column to retrieve the product(s) with the {{number}}highest average approved rating. Then, go to Reports -> Products -> Ordered, display the report from 1/8/22 to 5/31/23, and identify the most recent order date for those products.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_review/product/",
            "number": "second-",
            "contents": "[Kenobi Trail Jacket, 03/13/22]",
            "checkpoint1": "",
            "checkpoint2": "",
            "checkpoint_info": "",
            "difficulty": "medium"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "[Kenobi Trail Jacket, 03/13/22]"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "[Kenobi Trail Jacket, 03/13/22]"
        }
    },
    {
        "task_id": 10262,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_review/product/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "Tell me the date of the most recent completed order for the product(s) with the {{number}}highest average approved rating. If multiple products have the same average rating, provide the date of the oldest order among their most recent completed orders. Exclude product(s) that have reviews but have never been ordered. Note that all orders were placed between January 8, 2022, and May 31, 2023. The date should be in the format month/day/year (e.g., 01/01/20). Final answer should be in the format '[product_name, date]' without reasoning.",
        "intent": "Tell me the date of the most recent completed order for the product(s) with the third-highest average approved rating. If multiple products have the same average rating, provide the date of the oldest order among their most recent completed orders. Exclude product(s) that have reviews but have never been ordered. Note that all orders were placed between January 8, 2022, and May 31, 2023. The date should be in the format month/day/year (e.g., 01/01/20). Final answer should be in the format '[product_name, date]' without reasoning.",
        "required_obs": "any",
        "type_main": "long-term",
        "type_sub": "others",
        "description": "Go to Reports -> Reviews -> By Product, and sort by the Average (approved) column to retrieve the product(s) with the {{number}}highest average approved rating. Then, go to Reports -> Products -> Ordered, display the report from 1/8/22 to 5/31/23, and identify the most recent order date for those products.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_review/product/",
            "number": "third-",
            "contents": "[Hyperion Elements Jacket, 09/12/22]",
            "checkpoint1": "",
            "checkpoint2": "",
            "checkpoint_info": "",
            "difficulty": "hard"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "[Hyperion Elements Jacket, 09/12/22]"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "[Hyperion Elements Jacket, 09/12/22]"
        }
    },
    {
        "task_id": 10263,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_review/product/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "Tell me the date of the most recent completed order for the product(s) with the {{number}}highest average approved rating. If multiple products have the same average rating, provide the date of the oldest order among their most recent completed orders. Exclude product(s) that have reviews but have never been ordered. Note that all orders were placed between January 8, 2022, and May 31, 2023. The date should be in the format month/day/year (e.g., 01/01/20). Final answer should be in the format '[product_name, date]' without reasoning.",
        "intent": "Tell me the date of the most recent completed order for the product(s) with the fourth-highest average approved rating. If multiple products have the same average rating, provide the date of the oldest order among their most recent completed orders. Exclude product(s) that have reviews but have never been ordered. Note that all orders were placed between January 8, 2022, and May 31, 2023. The date should be in the format month/day/year (e.g., 01/01/20). Final answer should be in the format '[product_name, date]' without reasoning.",
        "required_obs": "any",
        "type_main": "long-term",
        "type_sub": "others",
        "description": "Go to Reports -> Reviews -> By Product, and sort by the Average (approved) column to retrieve the product(s) with the {{number}}highest average approved rating. Then, go to Reports -> Products -> Ordered, display the report from 1/8/22 to 5/31/23, and identify the most recent order date for those products.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_review/product/",
            "number": "fourth-",
            "contents": "[Go-Get'r Pushup Grips, 06/14/22]",
            "checkpoint1": "",
            "checkpoint2": "",
            "checkpoint_info": "",
            "difficulty": "hard"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "[Go-Get'r Pushup Grips, 06/14/22]"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "[Go-Get'r Pushup Grips, 06/14/22]"
        }
    },
    {
        "task_id": 10264,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_review/product/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "Tell me the date of the most recent completed order for the product(s) with the {{number}}highest average approved rating. If multiple products have the same average rating, provide the date of the oldest order among their most recent completed orders. Exclude product(s) that have reviews but have never been ordered. Note that all orders were placed between January 8, 2022, and May 31, 2023. The date should be in the format month/day/year (e.g., 01/01/20). Final answer should be in the format '[product_name, date]' without reasoning.",
        "intent": "Tell me the date of the most recent completed order for the product(s) with the fifth-highest average approved rating. If multiple products have the same average rating, provide the date of the oldest order among their most recent completed orders. Exclude product(s) that have reviews but have never been ordered. Note that all orders were placed between January 8, 2022, and May 31, 2023. The date should be in the format month/day/year (e.g., 01/01/20). Final answer should be in the format '[product_name, date]' without reasoning.",
        "required_obs": "any",
        "type_main": "long-term",
        "type_sub": "others",
        "description": "Go to Reports -> Reviews -> By Product, and sort by the Average (approved) column to retrieve the product(s) with the {{number}}highest average approved rating. Then, go to Reports -> Products -> Ordered, display the report from 1/8/22 to 5/31/23, and identify the most recent order date for those products.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_review/product/",
            "number": "fifth-",
            "contents": "[Helios EverCool™ Tee, 01/15/22]",
            "checkpoint1": "",
            "checkpoint2": "",
            "checkpoint_info": "",
            "difficulty": "hard"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "[Helios EverCool™ Tee, 01/15/22]"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "[Helios EverCool™ Tee, 01/15/22]"
        }
    },
    {
        "task_id": 10270,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "List all the average approved rating of the products in the completed order(s) on {{date}}. The rating is a value between 0 and 100 and final answer should be rounded to the nearest integer. Final answer should be in the format '[rating1, rating2, ...]' without reasoning, and sorted in ascending order of the products' original price. If multiple products have the same original price, their ratings should be sorted by product name in alphabetical order. If there are no reviews for the product, write N/A.",
        "intent": "List all the average approved rating of the products in the completed order(s) on June 26, 2022. The rating is a value between 0 and 100 and final answer should be rounded to the nearest integer. Final answer should be in the format '[rating1, rating2, ...]' without reasoning, and sorted in ascending order of the products' original price. If multiple products have the same original price, their ratings should be sorted by product name in alphabetical order. If there are no reviews for the product, write N/A.",
        "required_obs": "any",
        "type_main": "long-term",
        "type_sub": "",
        "description": "Go to Sales -> Orders, set the Status to Complete, and identify the orders placed on {{date}}. Then, click the View button to see the details of each order and determine the products that were ordered. Finally, go to Reports -> Reviews -> By Products, search for each product, and retrieve their average approved rating.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
            "date": "June 26, 2022",
            "contents": "[N/A, 60, 67, 60]",
            "checkpoint1": "",
            "checkpoint2": "",
            "checkpoint_info": "",
            "difficulty": "medium"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "[N/A, 60, 67, 60]"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "[N/A, 60, 67, 60]"
        }
    },
    {
        "task_id": 10271,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "List all the average approved rating of the products in the completed order(s) on {{date}}. The rating is a value between 0 and 100 and final answer should be rounded to the nearest integer. Final answer should be in the format '[rating1, rating2, ...]' without reasoning, and sorted in ascending order of the products' original price. If multiple products have the same original price, their ratings should be sorted by product name in alphabetical order. If there are no reviews for the product, write N/A.",
        "intent": "List all the average approved rating of the products in the completed order(s) on April 27, 2023. The rating is a value between 0 and 100 and final answer should be rounded to the nearest integer. Final answer should be in the format '[rating1, rating2, ...]' without reasoning, and sorted in ascending order of the products' original price. If multiple products have the same original price, their ratings should be sorted by product name in alphabetical order. If there are no reviews for the product, write N/A.",
        "required_obs": "any",
        "type_main": "long-term",
        "type_sub": "",
        "description": "Go to Sales -> Orders, set the Status to Complete, and identify the orders placed on {{date}}. Then, click the View button to see the details of each order and determine the products that were ordered. Finally, go to Reports -> Reviews -> By Products, search for each product, and retrieve their average approved rating.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
            "date": "April 27, 2023",
            "contents": "[N/A, N/A, 60, 60, 87]",
            "checkpoint1": "",
            "checkpoint2": "",
            "checkpoint_info": "",
            "difficulty": "medium"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "[N/A, N/A, 60, 60, 87]"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "[N/A, N/A, 60, 60, 87]"
        }
    },
    {
        "task_id": 10272,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "List all the average approved rating of the products in the completed order(s) on {{date}}. The rating is a value between 0 and 100 and final answer should be rounded to the nearest integer. Final answer should be in the format '[rating1, rating2, ...]' without reasoning, and sorted in ascending order of the products' original price. If multiple products have the same original price, their ratings should be sorted by product name in alphabetical order. If there are no reviews for the product, write N/A.",
        "intent": "List all the average approved rating of the products in the completed order(s) on April 5, 2023. The rating is a value between 0 and 100 and final answer should be rounded to the nearest integer. Final answer should be in the format '[rating1, rating2, ...]' without reasoning, and sorted in ascending order of the products' original price. If multiple products have the same original price, their ratings should be sorted by product name in alphabetical order. If there are no reviews for the product, write N/A.",
        "required_obs": "any",
        "type_main": "long-term",
        "type_sub": "",
        "description": "Go to Sales -> Orders, set the Status to Complete, and identify the orders placed on {{date}}. Then, click the View button to see the details of each order and determine the products that were ordered. Finally, go to Reports -> Reviews -> By Products, search for each product, and retrieve their average approved rating.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
            "date": "April 5, 2023",
            "contents": "[N/A, 80, 80, 70, 87, 70, 70, 60, 67, N/A]",
            "checkpoint1": "",
            "checkpoint2": "",
            "checkpoint_info": "",
            "difficulty": "hard"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "[N/A, 80, 80, 70, 87, 70, 70, 60, 67, N/A]"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "[N/A, 80, 80, 70, 87, 70, 70, 60, 67, N/A]"
        }
    },
    {
        "task_id": 10273,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "List all the average approved rating of the products in the completed order(s) on {{date}}. The rating is a value between 0 and 100 and final answer should be rounded to the nearest integer. Final answer should be in the format '[rating1, rating2, ...]' without reasoning, and sorted in ascending order of the products' original price. If multiple products have the same original price, their ratings should be sorted by product name in alphabetical order. If there are no reviews for the product, write N/A.",
        "intent": "List all the average approved rating of the products in the completed order(s) on December 11, 2022. The rating is a value between 0 and 100 and final answer should be rounded to the nearest integer. Final answer should be in the format '[rating1, rating2, ...]' without reasoning, and sorted in ascending order of the products' original price. If multiple products have the same original price, their ratings should be sorted by product name in alphabetical order. If there are no reviews for the product, write N/A.",
        "required_obs": "any",
        "type_main": "long-term",
        "type_sub": "",
        "description": "Go to Sales -> Orders, set the Status to Complete, and identify the orders placed on {{date}}. Then, click the View button to see the details of each order and determine the products that were ordered. Finally, go to Reports -> Reviews -> By Products, search for each product, and retrieve their average approved rating.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
            "date": "December 11, 2022",
            "contents": "[60, N/A, 67, N/A, N/A, N/A, 50, 0]",
            "checkpoint1": "",
            "checkpoint2": "",
            "checkpoint_info": "",
            "difficulty": "hard"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "[60, N/A, 67, N/A, N/A, N/A, 50, 0]"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "[60, N/A, 67, N/A, N/A, N/A, 50, 0]"
        }
    },
    {
        "task_id": 10274,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "List all the average approved rating of the products in the completed order(s) on {{date}}. The rating is a value between 0 and 100 and final answer should be rounded to the nearest integer. Final answer should be in the format '[rating1, rating2, ...]' without reasoning, and sorted in ascending order of the products' original price. If multiple products have the same original price, their ratings should be sorted by product name in alphabetical order. If there are no reviews for the product, write N/A.",
        "intent": "List all the average approved rating of the products in the completed order(s) on May 2, 2022. The rating is a value between 0 and 100 and final answer should be rounded to the nearest integer. Final answer should be in the format '[rating1, rating2, ...]' without reasoning, and sorted in ascending order of the products' original price. If multiple products have the same original price, their ratings should be sorted by product name in alphabetical order. If there are no reviews for the product, write N/A.",
        "required_obs": "any",
        "type_main": "long-term",
        "type_sub": "",
        "description": "Go to Sales -> Orders, set the Status to Complete, and identify the orders placed on {{date}}. Then, click the View button to see the details of each order and determine the products that were ordered. Finally, go to Reports -> Reviews -> By Products, search for each product, and retrieve their average approved rating.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/sales/order/",
            "date": "May 2, 2022",
            "contents": "[N/A, 60, 80, N/A, N/A, 87, N/A, N/A]",
            "checkpoint1": "",
            "checkpoint2": "",
            "checkpoint_info": "",
            "difficulty": "hard"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "[N/A, 60, 80, N/A, N/A, 87, N/A, N/A]"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "[N/A, 60, 80, N/A, N/A, 87, N/A, N/A]"
        }
    },
    {
        "task_id": 10280,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_review/product/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "List all the nicknames of reviewers who created an approved review for {{product_name1}} or {{product_name2}}, sorted in ascending order of rating. If multiple reviews have the same rating, sort these nicknames in alphabetical order. Final answer should be in the format '[nickname1, nickname2, ...]' without reasoning",
        "intent": "List all the nicknames of reviewers who created an approved review for Erica Evercool Sports Bra or Celeste Sports Bra, sorted in ascending order of rating. If multiple reviews have the same rating, sort these nicknames in alphabetical order. Final answer should be in the format '[nickname1, nickname2, ...]' without reasoning",
        "required_obs": "image",
        "type_main": "long-term",
        "type_sub": "",
        "description": "Go to Marketing -> User Content -> All Reviews, and filter the reviews by searching for the product in the Product field. Then, click the Edit button to open the details of each review and retrieve the rating.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_review/product/",
            "product_name1": "Erica Evercool Sports Bra",
            "product_name2": "Celeste Sports Bra",
            "contents": "[Ardelia, Eartha, Roxie, Cayla, Jammie, Tonya, Dorcas]",
            "checkpoint1": "",
            "checkpoint2": "",
            "checkpoint_info": "",
            "difficulty": "medium"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "[Ardelia, Eartha, Roxie, Cayla, Jammie, Tonya, Dorcas]"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "[Ardelia, Eartha, Roxie, Cayla, Jammie, Tonya, Dorcas]"
        }
    },
    {
        "task_id": 10281,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_review/product/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "List all the nicknames of reviewers who created an approved review for {{product_name1}} or {{product_name2}}, sorted in ascending order of rating. If multiple reviews have the same rating, sort these nicknames in alphabetical order. Final answer should be in the format '[nickname1, nickname2, ...]' without reasoning",
        "intent": "List all the nicknames of reviewers who created an approved review for Helios Endurance Tank or Erikssen CoolTech™ Fitness Tank, sorted in ascending order of rating. If multiple reviews have the same rating, sort these nicknames in alphabetical order. Final answer should be in the format '[nickname1, nickname2, ...]' without reasoning",
        "required_obs": "image",
        "type_main": "long-term",
        "type_sub": "",
        "description": "Go to Marketing -> User Content -> All Reviews, and filter the reviews by searching for the product in the Product field. Then, click the Edit button to open the details of each review and retrieve the rating.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_review/product/",
            "product_name1": "Helios Endurance Tank",
            "product_name2": "Erikssen CoolTech™ Fitness Tank",
            "contents": "[Dominic, Scotty, Alexander, Edmund, Trey, Graham, Mervin, Patrick]",
            "checkpoint1": "",
            "checkpoint2": "",
            "checkpoint_info": "",
            "difficulty": "medium"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "[Dominic, Scotty, Alexander, Edmund, Trey, Graham, Mervin, Patrick]"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "[Dominic, Scotty, Alexander, Edmund, Trey, Graham, Mervin, Patrick]"
        }
    },
    {
        "task_id": 10282,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_review/product/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "List all the nicknames of reviewers who created an approved review for {{product_name1}} or {{product_name2}}, sorted in ascending order of rating. If multiple reviews have the same rating, sort these nicknames in alphabetical order. Final answer should be in the format '[nickname1, nickname2, ...]' without reasoning",
        "intent": "List all the nicknames of reviewers who created an approved review for Strike Endurance Tee or Logan HeatTec® Tee, sorted in ascending order of rating. If multiple reviews have the same rating, sort these nicknames in alphabetical order. Final answer should be in the format '[nickname1, nickname2, ...]' without reasoning",
        "required_obs": "image",
        "type_main": "long-term",
        "type_sub": "",
        "description": "Go to Marketing -> User Content -> All Reviews, and filter the reviews by searching for the product in the Product field. Then, click the Edit button to open the details of each review and retrieve the rating.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_review/product/",
            "product_name1": "Strike Endurance Tee",
            "product_name2": "Logan HeatTec® Tee",
            "contents": "[Carlo, Hiram, Don, Gus, Maynard, Alfred, Hiram]",
            "checkpoint1": "",
            "checkpoint2": "",
            "checkpoint_info": "",
            "difficulty": "medium"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "[Carlo, Hiram, Don, Gus, Maynard, Alfred, Hiram]"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "[Carlo, Hiram, Don, Gus, Maynard, Alfred, Hiram]"
        }
    },
    {
        "task_id": 10283,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_review/product/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "List all the nicknames of reviewers who created an approved review for {{product_name1}} or {{product_name2}}, sorted in ascending order of rating. If multiple reviews have the same rating, sort these nicknames in alphabetical order. Final answer should be in the format '[nickname1, nickname2, ...]' without reasoning",
        "intent": "List all the nicknames of reviewers who created an approved review for Cruise Dual Analog Watch or Clamber Watch, sorted in ascending order of rating. If multiple reviews have the same rating, sort these nicknames in alphabetical order. Final answer should be in the format '[nickname1, nickname2, ...]' without reasoning",
        "required_obs": "image",
        "type_main": "long-term",
        "type_sub": "",
        "description": "Go to Marketing -> User Content -> All Reviews, and filter the reviews by searching for the product in the Product field. Then, click the Edit button to open the details of each review and retrieve the rating.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_review/product/",
            "product_name1": "Cruise Dual Analog Watch",
            "product_name2": "Clamber Watch",
            "contents": "[Bobby, Nadia, Tommie, Colleen, Laronda, Jamie, Frank]",
            "checkpoint1": "",
            "checkpoint2": "",
            "checkpoint_info": "",
            "difficulty": "medium"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "[Bobby, Nadia, Tommie, Colleen, Laronda, Jamie, Frank]"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "[Bobby, Nadia, Tommie, Colleen, Laronda, Jamie, Frank]"
        }
    },
    {
        "task_id": 10284,
        "sites": [
            "shopping_admin"
        ],
        "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
        "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_review/product/",
        "storage_state": "./.auth/shopping_admin_state.json",
        "intent_template": "List all the nicknames of reviewers who created an approved review for {{product_name1}} or {{product_name2}}, sorted in ascending order of rating. If multiple reviews have the same rating, sort these nicknames in alphabetical order. Final answer should be in the format '[nickname1, nickname2, ...]' without reasoning",
        "intent": "List all the nicknames of reviewers who created an approved review for Josie Yoga Jacket or Lando Gym Jacket, sorted in ascending order of rating. If multiple reviews have the same rating, sort these nicknames in alphabetical order. Final answer should be in the format '[nickname1, nickname2, ...]' without reasoning",
        "required_obs": "image",
        "type_main": "long-term",
        "type_sub": "",
        "description": "Go to Marketing -> User Content -> All Reviews, and filter the reviews by searching for the product in the Product field. Then, click the Edit button to open the details of each review and retrieve the rating.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7780/admin/admin/dashboard/",
            "start_url_lite": "http://172.16.2.4:7780/admin/reports/report_review/product/",
            "product_name1": "Josie Yoga Jacket",
            "product_name2": "Lando Gym Jacket",
            "contents": "[Alesha, Emmett, Rudolf, Tennille, Burl, Lakeesha, Elvina]",
            "checkpoint1": "",
            "checkpoint2": "",
            "checkpoint_info": "",
            "difficulty": "medium"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "[Alesha, Emmett, Rudolf, Tennille, Burl, Lakeesha, Elvina]"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "[Alesha, Emmett, Rudolf, Tennille, Burl, Lakeesha, Elvina]"
        }
    }
]