[
    {
        "task_id": 20000,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770/snakebyte-twin-charge-x-xbox-one-controller-charger-dual-docking-charging-station-incl-2-rechargeable-battery-packs-for-xbox-one-controller-elite-s-controller-gamepad-black.html",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "Please provide the distribution of reviews for {{product}}. Here, the review stars (1~5) correspond to a Rating that is 20 times their value (1 = Rating 20, 2 = Rating 40, 3 = Rating 60, 4 = Rating 80, 5 = Rating 100).\nFollow the format below, using numerical values:\n5: {number}, 4: {number}, 3: {number}, 2: {number}, 1: {number}",
        "intent": "Please provide the distribution of reviews for Snakebyte Twin Charge X - Xbox One Controller Charger Dual Docking/Charging Station incl. 2 Rechargeable Battery Packs for XBOX One Controller / Elite / S Controller Gamepad, Black. Here, the review stars (1~5) correspond to a Rating that is 20 times their value (1 = Rating 20, 2 = Rating 40, 3 = Rating 60, 4 = Rating 80, 5 = Rating 100).\nFollow the format below, using numerical values:\n5: {number}, 4: {number}, 3: {number}, 2: {number}, 1: {number}",
        "required_obs": "any",
        "type_main": "massive_memory",
        "description": "First, go to the product page and navigate to the reviews page. Then, on the reviews page, count the distribution of review ratings.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770/snakebyte-twin-charge-x-xbox-one-controller-charger-dual-docking-charging-station-incl-2-rechargeable-battery-packs-for-xbox-one-controller-elite-s-controller-gamepad-black.html",
            "product": "Snakebyte Twin Charge X - Xbox One Controller Charger Dual Docking/Charging Station incl. 2 Rechargeable Battery Packs for XBOX One Controller / Elite / S Controller Gamepad, Black",
            "contents": "5: 9, 4: 2, 3: 3, 2: 3, 1: 15",
            "checkpoint1": "5, 5, 5, 1, 5, 1, 1, 1, 1, 1",
            "checkpoint2": "1, 1, 1, 1, 1, 1, 2, 4, 4, 3",
            "checkpoint3": "3, 5, 1, 5, 3, 1, 5, 1, 5, 2",
            "checkpoint4": "5, 2",
            "checkpoint_info": "checkpoint1: review in page1, checkpoint2: review in page2, checkpoint3: review in page3",
            "difficulty": "medium"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "5: 9, 4: 2, 3: 3, 2: 3, 1: 15"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "5: 9, 4: 2, 3: 3, 2: 3, 1: 15"
        }
    },
    {
        "task_id": 20001,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770/denture-case-denture-cups-bath-toothbrush-with-hard-denture-dentures-container-with-basket-denture-holder-for-travel-mouth-guard-night-gum-retainer-container-pink.html",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "Please provide the distribution of reviews for {{product}}. Here, the review stars (1~5) correspond to a Rating that is 20 times their value (1 = Rating 20, 2 = Rating 40, 3 = Rating 60, 4 = Rating 80, 5 = Rating 100).\nFollow the format below, using numerical values:\n5: {number}, 4: {number}, 3: {number}, 2: {number}, 1: {number}",
        "intent": "Please provide the distribution of reviews for Denture Case,Denture Cups Bath, Toothbrush with hard denture, Dentures Container with Basket Denture Holder for Travel,Mouth Guard Night Gum Retainer Container (Pink). Here, the review stars (1~5) correspond to a Rating that is 20 times their value (1 = Rating 20, 2 = Rating 40, 3 = Rating 60, 4 = Rating 80, 5 = Rating 100).\nFollow the format below, using numerical values:\n5: {number}, 4: {number}, 3: {number}, 2: {number}, 1: {number}",
        "required_obs": "any",
        "type_main": "massive_memory",
        "description": "First, go to the product page and navigate to the reviews page. Then, on the reviews page, count the distribution of review ratings.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770/denture-case-denture-cups-bath-toothbrush-with-hard-denture-dentures-container-with-basket-denture-holder-for-travel-mouth-guard-night-gum-retainer-container-pink.html",
            "product": "Denture Case,Denture Cups Bath, Toothbrush with hard denture, Dentures Container with Basket Denture Holder for Travel,Mouth Guard Night Gum Retainer Container (Pink)",
            "contents": "5: 42, 4: 6, 3: 8, 2: 7, 1: 7",
            "checkpoint1": "5*8, 3*1, 1*1",
            "checkpoint2": "5*4, 2*1, 1*5",
            "checkpoint3": "5*5, 3*1, 2*3, 1*1",
            "checlpoint4": "5*9, 3*1",
            "checkpoint5": "5*6, 4*1, 3*2, 2*1",
            "checkpoint6": "5*6, 4*3, 3*1",
            "checkpoint7": "5*4, 4*2, 3*2, 2*2",
            "checkpoint_info": "checkpoint1: review in page1, checkpoint2: review in page2",
            "difficulty": "hard"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "5: 42, 4: 6, 3: 8, 2: 7, 1: 7"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "5: 42, 4: 6, 3: 8, 2: 7, 1: 7"
        }
    },
    {
        "task_id": 20002,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770/manhattan-comfort-avesta-double-side-table-2-0-collection-free-standing-modern-side-table-tv-stand-with-storage-includes-2-doors-with-3-shelves-and-features-splayed-legs-white-stamp-grey-legs.html",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "Please provide the distribution of reviews for {{product}}. Here, the review stars (1~5) correspond to a Rating that is 20 times their value (1 = Rating 20, 2 = Rating 40, 3 = Rating 60, 4 = Rating 80, 5 = Rating 100).\nFollow the format below, using numerical values:\n5: {number}, 4: {number}, 3: {number}, 2: {number}, 1: {number}",
        "intent": "Please provide the distribution of reviews for Manhattan Comfort Avesta Double Side Table 2.0 Collection Free Standing Modern Side Table / TV Stand with Storage Includes 2 Doors with 3 Shelves and Features Splayed Legs, White/Stamp/Grey Legs. Here, the review stars (1~5) correspond to a Rating that is 20 times their value (1 = Rating 20, 2 = Rating 40, 3 = Rating 60, 4 = Rating 80, 5 = Rating 100).\nFollow the format below, using numerical values:\n5: {number}, 4: {number}, 3: {number}, 2: {number}, 1: {number}",
        "required_obs": "any",
        "type_main": "massive_memory",
        "description": "First, go to the product page and navigate to the reviews page. Then, on the reviews page, count the distribution of review ratings.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770/manhattan-comfort-avesta-double-side-table-2-0-collection-free-standing-modern-side-table-tv-stand-with-storage-includes-2-doors-with-3-shelves-and-features-splayed-legs-white-stamp-grey-legs.html",
            "product": "Manhattan Comfort Avesta Double Side Table 2.0 Collection Free Standing Modern Side Table / TV Stand with Storage Includes 2 Doors with 3 Shelves and Features Splayed Legs, White/Stamp/Grey Legs",
            "contents": "5: 16, 4: 23, 3: 17, 2: 8, 1: 6",
            "checkpoint1": "2, 1, 1, 0, 1, 0, 1",
            "checkpoint2": "2, 3, 1, 1, 1, 0, 0",
            "checkpoint3": "2, 2, 1, 2, 3, 2, 5",
            "checkpoint4": "0, 1, 3, 4, 4, 7, 4",
            "checkpoint5": "4, 3, 4, 3, 1, 1, 0",
            "checkpoint_info": "checkpoint1: review in page1, checkpoint2: review in page2",
            "difficulty": "hard"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "5: 16, 4: 23, 3: 17, 2: 8, 1: 6"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "5: 16, 4: 23, 3: 17, 2: 8, 1: 6"
        }
    },
    {
        "task_id": 20003,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770/denture-do-it-yourself-full-set-of-top-and-bottom-fake-teeth-for-improve-smile.html",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "Please provide the distribution of reviews for {{product}}. Here, the review stars (1~5) correspond to a Rating that is 20 times their value (1 = Rating 20, 2 = Rating 40, 3 = Rating 60, 4 = Rating 80, 5 = Rating 100).\nFollow the format below, using numerical values:\n5: {number}, 4: {number}, 3: {number}, 2: {number}, 1: {number}",
        "intent": "Please provide the distribution of reviews for Denture Do it Yourself Full Set of Top and Bottom Fake Teeth for Improve Smile. Here, the review stars (1~5) correspond to a Rating that is 20 times their value (1 = Rating 20, 2 = Rating 40, 3 = Rating 60, 4 = Rating 80, 5 = Rating 100).\nFollow the format below, using numerical values:\n5: {number}, 4: {number}, 3: {number}, 2: {number}, 1: {number}",
        "required_obs": "any",
        "type_main": "massive_memory",
        "description": "First, go to the product page and navigate to the reviews page. Then, on the reviews page, count the distribution of review ratings.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770/denture-do-it-yourself-full-set-of-top-and-bottom-fake-teeth-for-improve-smile.html",
            "product": "Denture Do it Yourself Full Set of Top and Bottom Fake Teeth for Improve Smile",
            "contents": "5: 6, 4: 6, 3: 10, 2: 5, 1: 45",
            "checkpoint1": "8, 10, 9, 10, 4, 1, 2, 1",
            "checkpoint2": "0, 0, 0, 0, 3, 0, 2, 0",
            "checkpoint3": "0, 0, 0, 0, 2, 5, 2, 1",
            "checkpoint4": "0, 0, 0, 0, 0, 3, 3, 0",
            "checkpoint5": "2, 0, 1, 0, 1, 1, 1, 0",
            "checkpoint_info": "checkpoint1: review in page1, checkpoint2: review in page2",
            "difficulty": "hard"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "5: 6, 4: 6, 3: 10, 2: 5, 1: 45"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "5: 6, 4: 6, 3: 10, 2: 5, 1: 45"
        }
    },
    {
        "task_id": 20004,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770/philadelphia-strawberry-cheesecake-snacks-2-ct-pack-3-25-oz-cups.html",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "Please provide the distribution of reviews for {{product}}. Here, the review stars (1~5) correspond to a Rating that is 20 times their value (1 = Rating 20, 2 = Rating 40, 3 = Rating 60, 4 = Rating 80, 5 = Rating 100).\nFollow the format below, using numerical values:\n5: {number}, 4: {number}, 3: {number}, 2: {number}, 1: {number}",
        "intent": "Please provide the distribution of reviews for Philadelphia Strawberry Cheesecake Snacks (2 ct Pack, 3.25 oz Cups). Here, the review stars (1~5) correspond to a Rating that is 20 times their value (1 = Rating 20, 2 = Rating 40, 3 = Rating 60, 4 = Rating 80, 5 = Rating 100).\nFollow the format below, using numerical values:\n5: {number}, 4: {number}, 3: {number}, 2: {number}, 1: {number}",
        "required_obs": "any",
        "type_main": "massive_memory",
        "description": "First, go to the product page and navigate to the reviews page. Then, on the reviews page, count the distribution of review ratings.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770/philadelphia-strawberry-cheesecake-snacks-2-ct-pack-3-25-oz-cups.html",
            "product": "Philadelphia Strawberry Cheesecake Snacks (2 ct Pack, 3.25 oz Cups)",
            "contents": "5: 41, 4: 10, 3: 7, 2: 4, 1: 12",
            "checkpoint1": "1, 6, 0, 0, 2, 0, 1, 2",
            "checkpoint2": "0, 1, 1, 0, 0, 2, 0, 0",
            "checkpoint3": "1, 2, 0, 1, 1, 0, 2, 0",
            "checkpoint4": "0, 0, 1, 1, 3, 2, 1, 2",
            "checkpoint5": "8, 1, 8, 8, 4, 6, 6, 0",
            "checkpoint_info": "checkpoint1: review in page1, checkpoint2: review in page2",
            "difficulty": "hard"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "5: 41, 4: 10, 3: 7, 2: 4, 1: 12"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "5: 41, 4: 10, 3: 7, 2: 4, 1: 12"
        }
    },
    {
        "task_id": 20010,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770/video-games/pc/accessories.html",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "Please provide the distribution of reviews in the {{category}} category. Here, the review stars (1~5) correspond to a Rating that is 20 times their value (1 = Rating 20, 2 = Rating 40, 3 = Rating 60, 4 = Rating 80, 5 = Rating 100). If the review star value is a decimal, round it down (e.g., 3.9 -> 3).\nFollow the format below, using numerical values:\n5: {number}, 4: {number}, 3: {number}, 2: {number}, 1: {number}, 0: {number}",
        "intent": "Please provide the distribution of reviews in the Accessories of PC of Video Games category. Here, the review stars (1~5) correspond to a Rating that is 20 times their value (1 = Rating 20, 2 = Rating 40, 3 = Rating 60, 4 = Rating 80, 5 = Rating 100). If the review star value is a decimal, round it down (e.g., 3.9 -> 3).\nFollow the format below, using numerical values:\n5: {number}, 4: {number}, 3: {number}, 2: {number}, 1: {number}, 0: {number}",
        "required_obs": "text",
        "type_main": "massive_memory",
        "type_sub": "long-term",
        "description": "Go to the category page and count the distribution of reviews for the displayed products.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770/video-games/pc/accessories.html",
            "category": "Accessories of PC of Video Games",
            "contents": "5: 1, 4: 17, 3: 33, 2: 5, 1: 1, 0: 69"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "exact_match": "5: 1, 4: 17, 3: 33, 2: 5, 1: 1, 0: 69"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "5: 1, 4: 17, 3: 33, 2: 5, 1: 1, 0: 69"
        }
    },
    {
        "task_id": 20011,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770/clothing-shoes-jewelry/women/uniforms-work-safety.html",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "Please provide the distribution of reviews in the {{category}} category. Here, the review stars (1~5) correspond to a Rating that is 20 times their value (1 = Rating 20, 2 = Rating 40, 3 = Rating 60, 4 = Rating 80, 5 = Rating 100). If the review star value is a decimal, round it down (e.g., 3.9 -> 3).\nFollow the format below, using numerical values:\n5: {number}, 4: {number}, 3: {number}, 2: {number}, 1: {number}, 0: {number}",
        "intent": "Please provide the distribution of reviews in the Uniforms, Work & Safety of Women of Clothing, Shoes & Jewelry category. Here, the review stars (1~5) correspond to a Rating that is 20 times their value (1 = Rating 20, 2 = Rating 40, 3 = Rating 60, 4 = Rating 80, 5 = Rating 100). If the review star value is a decimal, round it down (e.g., 3.9 -> 3).\nFollow the format below, using numerical values:\n5: {number}, 4: {number}, 3: {number}, 2: {number}, 1: {number}, 0: {number}",
        "required_obs": "text",
        "type_main": "massive_memory",
        "type_sub": "long-term",
        "description": "Go to the category page and count the distribution of reviews for the displayed products.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770/clothing-shoes-jewelry/women/uniforms-work-safety.html",
            "category": "Uniforms, Work & Safety of Women of Clothing, Shoes & Jewelry",
            "contents": "5: 6, 4: 21, 3: 11, 2: 2, 1: 1, 0: 67"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "exact_match": "5: 6, 4: 21, 3: 11, 2: 2, 1: 1, 0: 67"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "5: 6, 4: 21, 3: 11, 2: 2, 1: 1, 0: 67"
        }
    },
    {
        "task_id": 20012,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770/grocery-gourmet-food/frozen/ice-cream-novelties.html",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "Please provide the distribution of reviews in the {{category}} category. Here, the review stars (1~5) correspond to a Rating that is 20 times their value (1 = Rating 20, 2 = Rating 40, 3 = Rating 60, 4 = Rating 80, 5 = Rating 100). If the review star value is a decimal, round it down (e.g., 3.9 -> 3).\nFollow the format below, using numerical values:\n5: {number}, 4: {number}, 3: {number}, 2: {number}, 1: {number}, 0: {number}",
        "intent": "Please provide the distribution of reviews in the Ice Cream & Novelties of Frozen of Grocery & Gourmet Food category. Here, the review stars (1~5) correspond to a Rating that is 20 times their value (1 = Rating 20, 2 = Rating 40, 3 = Rating 60, 4 = Rating 80, 5 = Rating 100). If the review star value is a decimal, round it down (e.g., 3.9 -> 3).\nFollow the format below, using numerical values:\n5: {number}, 4: {number}, 3: {number}, 2: {number}, 1: {number}, 0: {number}",
        "required_obs": "text",
        "type_main": "massive_memory",
        "type_sub": "long-term",
        "description": "Go to the category page and count the distribution of reviews for the displayed products.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770/grocery-gourmet-food/frozen/ice-cream-novelties.html",
            "category": "Ice Cream & Novelties of Frozen of Grocery & Gourmet Food",
            "contents": "5: 2, 4: 16, 3: 11, 2: 10, 1: 1, 0: 60"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "exact_match": "5: 2, 4: 16, 3: 11, 2: 10, 1: 1, 0: 60"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "5: 2, 4: 16, 3: 11, 2: 10, 1: 1, 0: 60"
        }
    },
    {
        "task_id": 20013,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770/grocery-gourmet-food/meat-seafood/seafood.html",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "Please provide the distribution of reviews in the {{category}} category. Here, the review stars (1~5) correspond to a Rating that is 20 times their value (1 = Rating 20, 2 = Rating 40, 3 = Rating 60, 4 = Rating 80, 5 = Rating 100). If the review star value is a decimal, round it down (e.g., 3.9 -> 3).\nFollow the format below, using numerical values:\n5: {number}, 4: {number}, 3: {number}, 2: {number}, 1: {number}, 0: {number}",
        "intent": "Please provide the distribution of reviews in the Seafood of Meat & Seafood of Grocery & Gourmet Food category. Here, the review stars (1~5) correspond to a Rating that is 20 times their value (1 = Rating 20, 2 = Rating 40, 3 = Rating 60, 4 = Rating 80, 5 = Rating 100). If the review star value is a decimal, round it down (e.g., 3.9 -> 3).\nFollow the format below, using numerical values:\n5: {number}, 4: {number}, 3: {number}, 2: {number}, 1: {number}, 0: {number}",
        "required_obs": "text",
        "type_main": "massive_memory",
        "type_sub": "long-term",
        "description": "Go to the category page and count the distribution of reviews for the displayed products.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770/grocery-gourmet-food/meat-seafood/seafood.html",
            "category": "Seafood of Meat & Seafood of Grocery & Gourmet Food",
            "contents": "5: 11, 4: 12, 3: 12, 2: 4, 1: 3, 0: 98"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "exact_match": "5: 11, 4: 12, 3: 12, 2: 4, 1: 3, 0: 98"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "5: 11, 4: 12, 3: 12, 2: 4, 1: 3, 0: 98"
        }
    },
    {
        "task_id": 20014,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770/electronics/computers-accessories/tablet-accessories.html",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "Please provide the distribution of reviews in the {{category}} category. Here, the review stars (1~5) correspond to a Rating that is 20 times their value (1 = Rating 20, 2 = Rating 40, 3 = Rating 60, 4 = Rating 80, 5 = Rating 100). If the review star value is a decimal, round it down (e.g., 3.9 -> 3).\nFollow the format below, using numerical values:\n5: {number}, 4: {number}, 3: {number}, 2: {number}, 1: {number}, 0: {number}",
        "intent": "Please provide the distribution of reviews in the Tablet Accessories of Computers & Accessories of Electronics category. Here, the review stars (1~5) correspond to a Rating that is 20 times their value (1 = Rating 20, 2 = Rating 40, 3 = Rating 60, 4 = Rating 80, 5 = Rating 100). If the review star value is a decimal, round it down (e.g., 3.9 -> 3).\nFollow the format below, using numerical values:\n5: {number}, 4: {number}, 3: {number}, 2: {number}, 1: {number}, 0: {number}",
        "required_obs": "text",
        "type_main": "massive_memory",
        "type_sub": "long-term",
        "description": "Go to the category page and count the distribution of reviews for the displayed products.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770/electronics/computers-accessories/tablet-accessories.html",
            "category": "Tablet Accessories of Computers & Accessories of Electronics",
            "contents": "5: 3, 4: 29, 3: 36, 2: 10, 1: 2, 0: 74"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "exact_match": "5: 3, 4: 29, 3: 36, 2: 10, 1: 2, 0: 74"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "5: 3, 4: 29, 3: 36, 2: 10, 1: 2, 0: 74"
        }
    },
    {
        "task_id": 20020,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770/patio-lawn-garden/gardening-lawn-care.html?cat=179",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "Tell me the highest-rated product in {{category}} within the price range of {{minimum_price}} to {{maximum_price}}. If the average rating is the same, select the product with the highest number of reviews. Output only the full name of the product. If there is more than one matching product, separate them with commas in ascending order of price (e.g., product1, product2).",
        "intent": "Tell me the highest-rated product in Gardening & Lawn Care of Patio, Lawn & Garden within the price range of $30 to $69.99. If the average rating is the same, select the product with the highest number of reviews. Output only the full name of the product. If there is more than one matching product, separate them with commas in ascending order of price (e.g., product1, product2).",
        "required_obs": "text",
        "type_main": "long-term",
        "type_sub": "massive_memory",
        "description": "Go to the specified category. Then, within the specified price range, select the product with the highest-rated review.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770/patio-lawn-garden/gardening-lawn-care.html?cat=179",
            "category": "Gardening & Lawn Care of Patio, Lawn & Garden",
            "minimum_price": "$30",
            "maximum_price": "$69.99",
            "contents": "TOUCH MISS Ceramic Planter for Outdoor Indoor,Ceramic pots for Plants,Small Plant pots Set of 4 Flower Pot 3.35inch Blue, Fat Plants San Diego Miniature Flowering Cactus and Succulent Plant Collection (20)",
            "checkpoint_info": ""
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "fuzzy_match": "TOUCH MISS Ceramic Planter for Outdoor Indoor,Ceramic pots for Plants,Small Plant pots Set of 4 Flower Pot 3.35inch Blue, Fat Plants San Diego Miniature Flowering Cactus and Succulent Plant Collection (20)"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "TOUCH MISS Ceramic Planter for Outdoor Indoor,Ceramic pots for Plants,Small Plant pots Set of 4 Flower Pot 3.35inch Blue, Fat Plants San Diego Miniature Flowering Cactus and Succulent Plant Collection (20)"
        }
    },
    {
        "task_id": 20021,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770/electronics/home-audio.html",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "Tell me the highest-rated product in {{category}} within the price range of {{minimum_price}} to {{maximum_price}}. If the average rating is the same, select the product with the highest number of reviews. Output only the full name of the product. If there is more than one matching product, separate them with commas in ascending order of price (e.g., product1, product2).",
        "intent": "Tell me the highest-rated product in Home Audio of Electronics within the price range of $1,000.00 to $4,999.99. If the average rating is the same, select the product with the highest number of reviews. Output only the full name of the product. If there is more than one matching product, separate them with commas in ascending order of price (e.g., product1, product2).",
        "required_obs": "text",
        "type_main": "long-term",
        "type_sub": "massive_memory",
        "description": "Go to the specified category. Then, within the specified price range, select the product with the highest-rated review.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770/electronics/home-audio.html",
            "category": "Home Audio of Electronics",
            "minimum_price": "$1,000.00",
            "maximum_price": "$4,999.99",
            "contents": "REL Acoustics T/7x Subwoofer, 8 inch Front-Firing Driver, Arrow Wireless Port, High Gloss Black",
            "checkpoint_info": ""
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "fuzzy_match": "REL Acoustics T/7x Subwoofer, 8 inch Front-Firing Driver, Arrow Wireless Port, High Gloss Black"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "REL Acoustics T/7x Subwoofer, 8 inch Front-Firing Driver, Arrow Wireless Port, High Gloss Black"
        }
    },
    {
        "task_id": 20022,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770/cell-phones-accessories/accessories.html?cat=204",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "Tell me the highest-rated product in {{category}} within the price range of {{minimum_price}} to {{maximum_price}}. If the average rating is the same, select the product with the highest number of reviews. Output only the full name of the product. If there is more than one matching product, separate them with commas in ascending order of price (e.g., product1, product2).",
        "intent": "Tell me the highest-rated product in Chargers & Power Adapters of Accessories of Cell Phones & Accessories within the price range of $30.00 to $69.99. If the average rating is the same, select the product with the highest number of reviews. Output only the full name of the product. If there is more than one matching product, separate them with commas in ascending order of price (e.g., product1, product2).",
        "required_obs": "text",
        "type_main": "long-term",
        "type_sub": "massive_memory",
        "description": "Go to the specified category. Then, within the specified price range, select the product with the highest-rated review.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770/cell-phones-accessories/accessories.html?cat=204",
            "category": "Chargers & Power Adapters of Accessories of Cell Phones & Accessories",
            "minimum_price": "$30.00",
            "maximum_price": "$69.99",
            "contents": "AMMOD 65W GaN Charger, USB C Wall Charger with Foldable Plug, Type C Charger with 2 Ports Fast Charging for iPhone, MacBook Pro/Air, iPad Pro,Galaxy S20/S10 and More",
            "checkpoint_info": ""
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "fuzzy_match": "AMMOD 65W GaN Charger, USB C Wall Charger with Foldable Plug, Type C Charger with 2 Ports Fast Charging for iPhone, MacBook Pro/Air, iPad Pro,Galaxy S20/S10 and More"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "AMMOD 65W GaN Charger, USB C Wall Charger with Foldable Plug, Type C Charger with 2 Ports Fast Charging for iPhone, MacBook Pro/Air, iPad Pro,Galaxy S20/S10 and More"
        }
    },
    {
        "task_id": 20023,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770/cell-phones-accessories/accessories.html",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "Tell me the highest-rated product in {{category}} within the price range of {{minimum_price}} to {{maximum_price}}. If the average rating is the same, select the product with the highest number of reviews. Output only the full name of the product. If there is more than one matching product, separate them with commas in ascending order of price (e.g., product1, product2).",
        "intent": "Tell me the highest-rated product in Accessories of Cell Phones & Accessories within the price range of $200.00 to $600.00. If the average rating is the same, select the product with the highest number of reviews. Output only the full name of the product. If there is more than one matching product, separate them with commas in ascending order of price (e.g., product1, product2).",
        "required_obs": "text",
        "type_main": "long-term",
        "type_sub": "massive_memory",
        "description": "Go to the specified category. Then, within the specified price range, select the product with the highest-rated review.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770/cell-phones-accessories/accessories.html",
            "category": "Accessories of Cell Phones & Accessories",
            "minimum_price": "$200.00",
            "maximum_price": "$600.00",
            "contents": "Home 5G Cell Phone Signal Booster | Boost 2G/3G/4G LTE 5G Data and Calls | Compatible with All U.S. Carriers Verizon, AT&T, Straight Talk, U.S. Cellular | Cover up to 4,500 sq.ft | FCC Approved",
            "checkpoint_info": ""
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "fuzzy_match": "Home 5G Cell Phone Signal Booster | Boost 2G/3G/4G LTE 5G Data and Calls | Compatible with All U.S. Carriers Verizon, AT&T, Straight Talk, U.S. Cellular | Cover up to 4,500 sq.ft | FCC Approved"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "Home 5G Cell Phone Signal Booster | Boost 2G/3G/4G LTE 5G Data and Calls | Compatible with All U.S. Carriers Verizon, AT&T, Straight Talk, U.S. Cellular | Cover up to 4,500 sq.ft | FCC Approved"
        }
    },
    {
        "task_id": 20024,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770/grocery-gourmet-food/food-beverage-gifts.html",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "Tell me the highest-rated product in {{category}} within the price range of {{minimum_price}} to {{maximum_price}}. If the average rating is the same, select the product with the highest number of reviews. Output only the full name of the product. If there is more than one matching product, separate them with commas in ascending order of price (e.g., product1, product2).",
        "intent": "Tell me the highest-rated product in Food & Beverage Gifts of Grocery & Gourmet Food within the price range of $100.00 to $399.00. If the average rating is the same, select the product with the highest number of reviews. Output only the full name of the product. If there is more than one matching product, separate them with commas in ascending order of price (e.g., product1, product2).",
        "required_obs": "text",
        "type_main": "long-term",
        "type_sub": "massive_memory",
        "description": "Go to the specified category. Then, within the specified price range, select the product with the highest-rated review.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770/grocery-gourmet-food/food-beverage-gifts.html",
            "category": "Food & Beverage Gifts of Grocery & Gourmet Food",
            "minimum_price": "$100.00",
            "maximum_price": "$399.00",
            "contents": "Greenfire Custom Fortune Cookies, for Advertising and Promotions, Full Color Fortune Printing, Premium Vanilla, Bulk Quantity (120 count)",
            "checkpoint_info": ""
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "fuzzy_match": "Greenfire Custom Fortune Cookies, for Advertising and Promotions, Full Color Fortune Printing, Premium Vanilla, Bulk Quantity (120 count)"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "Greenfire Custom Fortune Cookies, for Advertising and Promotions, Full Color Fortune Printing, Premium Vanilla, Bulk Quantity (120 count)"
        }
    },
    {
        "task_id": 20030,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770/electronics/home-audio.html",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "Tell me the top 3 products with the highest number of reviews in {{category}} within the price range of {{minimum_price}} to {{maximum_price}}. If multiple products have the same number of reviews, prioritize those with the highest average rating. Output only the full name of the product, listed in descending order by number of reviews and then by rating, separated by commas.",
        "intent": "Tell me the top 3 products with the highest number of reviews in Home Audio of  Electronics within the price range of $1,000.00 to $9,999.99. If multiple products have the same number of reviews, prioritize those with the highest average rating. Output only the full name of the product, listed in descending order by number of reviews and then by rating, separated by commas.",
        "required_obs": "text",
        "type_main": "long-term",
        "type_sub": "massive_memory",
        "description": "First, go to the specified category. Then, within the specified price range, select the product with the highest number of reviews.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770/electronics/home-audio.html",
            "category": "Home Audio of  Electronics",
            "minimum_price": "$1,000.00",
            "maximum_price": "$9,999.99",
            "contents": "Klipsch RP-6000F Floorstanding Speaker (Ebony Pair), Bose Bass Module 700 - Black- Wireless, Compact Subwoofer & Smart Soundbar 900 Dolby Atmos with Alexa Built-in, Bluetooth connectivity - Black, JBL Bar 9.1 - Channel Soundbar System with Surround Speakers and Dolby Atmos",
            "checkpoint_info": ""
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "fuzzy_match": "Klipsch RP-6000F Floorstanding Speaker (Ebony Pair), Bose Bass Module 700 - Black- Wireless, Compact Subwoofer & Smart Soundbar 900 Dolby Atmos with Alexa Built-in, Bluetooth connectivity - Black, JBL Bar 9.1 - Channel Soundbar System with Surround Speakers and Dolby Atmos"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "Klipsch RP-6000F Floorstanding Speaker (Ebony Pair), Bose Bass Module 700 - Black- Wireless, Compact Subwoofer & Smart Soundbar 900 Dolby Atmos with Alexa Built-in, Bluetooth connectivity - Black, JBL Bar 9.1 - Channel Soundbar System with Surround Speakers and Dolby Atmos"
        }
    },
    {
        "task_id": 20031,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770/cell-phones-accessories/accessories.html",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "Tell me the top 3 products with the highest number of reviews in {{category}} within the price range of {{minimum_price}} to {{maximum_price}}. If multiple products have the same number of reviews, prioritize those with the highest average rating. Output only the full name of the product, listed in descending order by number of reviews and then by rating, separated by commas.",
        "intent": "Tell me the top 3 products with the highest number of reviews in Accessories of Cell Phones & Accessories within the price range of $80.00 to $699.99. If multiple products have the same number of reviews, prioritize those with the highest average rating. Output only the full name of the product, listed in descending order by number of reviews and then by rating, separated by commas.",
        "required_obs": "text",
        "type_main": "long-term",
        "type_sub": "massive_memory",
        "description": "First, go to the specified category. Then, within the specified price range, select the product with the highest number of reviews.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770/cell-phones-accessories/accessories.html",
            "category": "Accessories of Cell Phones & Accessories",
            "minimum_price": "$80.00",
            "maximum_price": "$699.99",
            "contents": "Home Cell Phone Signal Booster Verizon Signal Booster 4G LTE 5G Band 13 Cell Phone Booster Verizon Cell Phone Signal Booster Amplifier Extender to Boost Voice and Data FCC Approved (White), Oculus Go Standalone Virtual Reality Headset - 32GB, weBoost Drive Reach (470154R) Factory Refurbished Vehicle Cell Phone Signal Booster | Car, Truck, Van, or SUV | U.S. Company | All U.S. Networks and Carriers | 1 Year Manufacturer Warranty",
            "checkpoint_info": ""
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "fuzzy_match": "Home Cell Phone Signal Booster Verizon Signal Booster 4G LTE 5G Band 13 Cell Phone Booster Verizon Cell Phone Signal Booster Amplifier Extender to Boost Voice and Data FCC Approved (White), Oculus Go Standalone Virtual Reality Headset - 32GB, weBoost Drive Reach (470154R) Factory Refurbished Vehicle Cell Phone Signal Booster | Car, Truck, Van, or SUV | U.S. Company | All U.S. Networks and Carriers | 1 Year Manufacturer Warranty"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "Home Cell Phone Signal Booster Verizon Signal Booster 4G LTE 5G Band 13 Cell Phone Booster Verizon Cell Phone Signal Booster Amplifier Extender to Boost Voice and Data FCC Approved (White), Oculus Go Standalone Virtual Reality Headset - 32GB, weBoost Drive Reach (470154R) Factory Refurbished Vehicle Cell Phone Signal Booster | Car, Truck, Van, or SUV | U.S. Company | All U.S. Networks and Carriers | 1 Year Manufacturer Warranty"
        }
    },
    {
        "task_id": 20032,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770/electronics/video-projectors.html",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "Tell me the top 3 products with the highest number of reviews in {{category}} within the price range of {{minimum_price}} to {{maximum_price}}. If multiple products have the same number of reviews, prioritize those with the highest average rating. Output only the full name of the product, listed in descending order by number of reviews and then by rating, separated by commas.",
        "intent": "Tell me the top 3 products with the highest number of reviews in Video Projectors of Electronics within the price range of $900.00 to $3,999.99. If multiple products have the same number of reviews, prioritize those with the highest average rating. Output only the full name of the product, listed in descending order by number of reviews and then by rating, separated by commas.",
        "required_obs": "text",
        "type_main": "long-term",
        "type_sub": "massive_memory",
        "description": "First, go to the specified category. Then, within the specified price range, select the product with the highest number of reviews.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770/electronics/video-projectors.html",
            "category": "Video Projectors of Electronics",
            "minimum_price": "$900.00",
            "maximum_price": "$3,999.99",
            "contents": "OMMC 4K UHD Laser TV Home Theatre Projector | Bright 2500 ANSI Lumens | Ultra Short Throw | Built-in Integrated SoundBar | Smart Android System| 3G RAM+64G Storage, Epson Home Cinema 3200 4K PRO-UHD 3-Chip Projector with HDR, Optoma UHD30 True 4K UHD Gaming Projector | 16ms Response Time with Enhanced Gaming Mode | Lowest Input Lag on 4K Projector | 240Hz Refresh Rate | HDR10 & HLG",
            "checkpoint_info": ""
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "fuzzy_match": "OMMC 4K UHD Laser TV Home Theatre Projector | Bright 2500 ANSI Lumens | Ultra Short Throw | Built-in Integrated SoundBar | Smart Android System| 3G RAM+64G Storage, Epson Home Cinema 3200 4K PRO-UHD 3-Chip Projector with HDR, Optoma UHD30 True 4K UHD Gaming Projector | 16ms Response Time with Enhanced Gaming Mode | Lowest Input Lag on 4K Projector | 240Hz Refresh Rate | HDR10 & HLG"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "OMMC 4K UHD Laser TV Home Theatre Projector | Bright 2500 ANSI Lumens | Ultra Short Throw | Built-in Integrated SoundBar | Smart Android System| 3G RAM+64G Storage, Epson Home Cinema 3200 4K PRO-UHD 3-Chip Projector with HDR, Optoma UHD30 True 4K UHD Gaming Projector | 16ms Response Time with Enhanced Gaming Mode | Lowest Input Lag on 4K Projector | 240Hz Refresh Rate | HDR10 & HLG"
        }
    },
    {
        "task_id": 20033,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770/grocery-gourmet-food/alcoholic-beverages.html",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "Tell me the top 3 products with the highest number of reviews in {{category}} within the price range of {{minimum_price}} to {{maximum_price}}. If multiple products have the same number of reviews, prioritize those with the highest average rating. Output only the full name of the product, listed in descending order by number of reviews and then by rating, separated by commas.",
        "intent": "Tell me the top 3 products with the highest number of reviews in Alcoholic Beverages of Grocery & Gourmet Food within the price range of $20.00 to $79.99. If multiple products have the same number of reviews, prioritize those with the highest average rating. Output only the full name of the product, listed in descending order by number of reviews and then by rating, separated by commas.",
        "required_obs": "text",
        "type_main": "long-term",
        "type_sub": "massive_memory",
        "description": "First, go to the specified category. Then, within the specified price range, select the product with the highest number of reviews.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770/grocery-gourmet-food/alcoholic-beverages.html",
            "category": "Alcoholic Beverages of Grocery & Gourmet Food",
            "minimum_price": "$20.00",
            "maximum_price": "$79.99",
            "contents": "Sutter Home Fre Brut Non-alcoholic Champagne Wine Two Pack (Pack of 2), Heineken, Lager, 6pk, 12 Fl Oz, Underwood The Bubbles Can, 375 ml",
            "checkpoint_info": ""
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "fuzzy_match": "Sutter Home Fre Brut Non-alcoholic Champagne Wine Two Pack (Pack of 2), Heineken, Lager, 6pk, 12 Fl Oz, Underwood The Bubbles Can, 375 ml"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "Sutter Home Fre Brut Non-alcoholic Champagne Wine Two Pack (Pack of 2), Heineken, Lager, 6pk, 12 Fl Oz, Underwood The Bubbles Can, 375 ml"
        }
    },
    {
        "task_id": 20034,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770/sports-outdoors/hunting-fishing.html?price=0-1000",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "Tell me the top 3 products with the highest number of reviews in {{category}} within the price range of {{minimum_price}} to {{maximum_price}}. If multiple products have the same number of reviews, prioritize those with the highest average rating. Output only the full name of the product, listed in descending order by number of reviews and then by rating, separated by commas.",
        "intent": "Tell me the top 3 products with the highest number of reviews in Hunting & Fishing of Sports & Outdoors within the price range of $100.00 to $899.99. If multiple products have the same number of reviews, prioritize those with the highest average rating. Output only the full name of the product, listed in descending order by number of reviews and then by rating, separated by commas.",
        "required_obs": "text",
        "type_main": "long-term",
        "type_sub": "massive_memory",
        "description": "First, go to the specified category. Then, within the specified price range, select the product with the highest number of reviews.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770/sports-outdoors/hunting-fishing.html?price=0-1000",
            "category": "Hunting & Fishing of Sports & Outdoors",
            "minimum_price": "$100.00",
            "maximum_price": "$899.99",
            "contents": "Vortex Optics Diamondback Tactical First Focal Plane Riflescopes, ACPOTEL Night Vision Monocular, 5 x 35 Digital Night Vision HD Scopes with Rechargeable/Take Photo/Video Recording/Playback Function for Outdoor/Surveillance/Security/Hunting/Hiking, CREATIVE XP Cellular Trail Cameras - Outdoor Waterproof Security Camera w/ Night Vision for Hunting & Security - 4G Camo",
            "checkpoint_info": ""
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "fuzzy_match": "Vortex Optics Diamondback Tactical First Focal Plane Riflescopes, ACPOTEL Night Vision Monocular, 5 x 35 Digital Night Vision HD Scopes with Rechargeable/Take Photo/Video Recording/Playback Function for Outdoor/Surveillance/Security/Hunting/Hiking, CREATIVE XP Cellular Trail Cameras - Outdoor Waterproof Security Camera w/ Night Vision for Hunting & Security - 4G Camo"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "Vortex Optics Diamondback Tactical First Focal Plane Riflescopes, ACPOTEL Night Vision Monocular, 5 x 35 Digital Night Vision HD Scopes with Rechargeable/Take Photo/Video Recording/Playback Function for Outdoor/Surveillance/Security/Hunting/Hiking, CREATIVE XP Cellular Trail Cameras - Outdoor Waterproof Security Camera w/ Night Vision for Hunting & Security - 4G Camo"
        }
    },
    {
        "task_id": 20040,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/office-products/office-electronics/printers-accessories.html",
        "start_url_lite": "http://172.16.2.4:7770/office-products/office-electronics/printers-accessories.html",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "Show me the page with the {{status}} Date First Available among the products displayed on the given page. Please skip the products that do not have the Date First Available information.",
        "intent": "Show me the page with the most recent Date First Available among the products displayed on the given page. Please skip the products that do not have the Date First Available information.",
        "required_obs": "any",
        "type_main": "others",
        "description": "Record the Data First Available and select the oldest or the most recent one.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/office-products/office-electronics/printers-accessories.html",
            "start_url_lite": "http://172.16.2.4:7770/office-products/office-electronics/printers-accessories.html",
            "status": "most recent",
            "contents": "http://172.16.2.4:7770/canon-all-in-one-color-inkjet-wired-printer-print-scan-copy-for-home-office-up-to-60-sheets-600-x-1200-dpi-portability-lightweight-pixma-mg2522-w-usb-cable.html",
            "checkpoint_info": ""
        },
        "eval": {
            "eval_types": [
                "url_match"
            ],
            "reference_answers": null,
            "reference_url": "http://172.16.2.4:7770/canon-all-in-one-color-inkjet-wired-printer-print-scan-copy-for-home-office-up-to-60-sheets-600-x-1200-dpi-portability-lightweight-pixma-mg2522-w-usb-cable.html",
            "program_html": [],
            "url_note": "GOLD in PRED"
        }
    },
    {
        "task_id": 20041,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/office-products/office-electronics/printers-accessories.html?p=2",
        "start_url_lite": "http://172.16.2.4:7770/office-products/office-electronics/printers-accessories.html?p=2",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "Show me the page with the {{status}} Date First Available among the products displayed on the given page. Please skip the products that do not have the Date First Available information.",
        "intent": "Show me the page with the most recent Date First Available among the products displayed on the given page. Please skip the products that do not have the Date First Available information.",
        "required_obs": "any",
        "type_main": "others",
        "description": "Record the Data First Available and select the oldest or the most recent one.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/office-products/office-electronics/printers-accessories.html?p=2",
            "start_url_lite": "http://172.16.2.4:7770/office-products/office-electronics/printers-accessories.html?p=2",
            "status": "most recent",
            "contents": "http://172.16.2.4:7770/ashata-replacement-printhead-black-printer-print-head-for-p100-xp202-xp102-xp212-tx430-tx430w-xp235-xp211-xp220-sx420-sx425-sx435-me500-me960-etc.html",
            "checkpoint_info": ""
        },
        "eval": {
            "eval_types": [
                "url_match"
            ],
            "reference_answers": null,
            "reference_url": "http://172.16.2.4:7770/ashata-replacement-printhead-black-printer-print-head-for-p100-xp202-xp102-xp212-tx430-tx430w-xp235-xp211-xp220-sx420-sx425-sx435-me500-me960-etc.html",
            "program_html": [],
            "url_note": "GOLD in PRED"
        }
    },
    {
        "task_id": 20042,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/office-products/office-school-supplies/desk-accessories-workspace-organizers.html",
        "start_url_lite": "http://172.16.2.4:7770/office-products/office-school-supplies/desk-accessories-workspace-organizers.html",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "Show me the page with the {{status}} Date First Available among the products displayed on the given page. Please skip the products that do not have the Date First Available information.",
        "intent": "Show me the page with the most recent Date First Available among the products displayed on the given page. Please skip the products that do not have the Date First Available information.",
        "required_obs": "any",
        "type_main": "others",
        "description": "Record the Data First Available and select the oldest or the most recent one.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/office-products/office-school-supplies/desk-accessories-workspace-organizers.html",
            "start_url_lite": "http://172.16.2.4:7770/office-products/office-school-supplies/desk-accessories-workspace-organizers.html",
            "status": "most recent",
            "contents": "http://172.16.2.4:7770/luckxuan-small-corner-bookcase-double-layer-desktop-bookshelf-office-storage-rack-small-printer-stand-save-space-multifunctional-office-desk-storage-rack-l57-d34-h38cm-bookshelf-bookshelves.html",
            "checkpoint_info": ""
        },
        "eval": {
            "eval_types": [
                "url_match"
            ],
            "reference_answers": null,
            "reference_url": "http://172.16.2.4:7770/luckxuan-small-corner-bookcase-double-layer-desktop-bookshelf-office-storage-rack-small-printer-stand-save-space-multifunctional-office-desk-storage-rack-l57-d34-h38cm-bookshelf-bookshelves.html",
            "program_html": [],
            "url_note": "GOLD in PRED"
        }
    },
    {
        "task_id": 20043,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/office-products/office-school-supplies/desk-accessories-workspace-organizers.html?p=2",
        "start_url_lite": "http://172.16.2.4:7770/office-products/office-school-supplies/desk-accessories-workspace-organizers.html?p=2",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "Show me the page with the {{status}} Date First Available among the products displayed on the given page. Please skip the products that do not have the Date First Available information.",
        "intent": "Show me the page with the most recent Date First Available among the products displayed on the given page. Please skip the products that do not have the Date First Available information.",
        "required_obs": "any",
        "type_main": "others",
        "description": "Record the Data First Available and select the oldest or the most recent one.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/office-products/office-school-supplies/desk-accessories-workspace-organizers.html?p=2",
            "start_url_lite": "http://172.16.2.4:7770/office-products/office-school-supplies/desk-accessories-workspace-organizers.html?p=2",
            "status": "most recent",
            "contents": "http://172.16.2.4:7770/mdesign-plastic-divided-drawer-organizer-storage-bin-for-home-office-desk-drawer-shelf-cabinet-4-compartments-12-long-4-pack-32-adhesive-labels-clear.html",
            "checkpoint_info": ""
        },
        "eval": {
            "eval_types": [
                "url_match"
            ],
            "reference_answers": null,
            "reference_url": "http://172.16.2.4:7770/mdesign-plastic-divided-drawer-organizer-storage-bin-for-home-office-desk-drawer-shelf-cabinet-4-compartments-12-long-4-pack-32-adhesive-labels-clear.html",
            "program_html": [],
            "url_note": "GOLD in PRED"
        }
    },
    {
        "task_id": 20044,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/health-household/household-supplies/household-batteries.html",
        "start_url_lite": "http://172.16.2.4:7770/health-household/household-supplies/household-batteries.html",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "Show me the page with the {{status}} Date First Available among the products displayed on the given page. Please skip the products that do not have the Date First Available information.",
        "intent": "Show me the page with the oldest Date First Available among the products displayed on the given page. Please skip the products that do not have the Date First Available information.",
        "required_obs": "any",
        "type_main": "others",
        "description": "Record the Data First Available and select the oldest or the most recent one.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/health-household/household-supplies/household-batteries.html",
            "start_url_lite": "http://172.16.2.4:7770/health-household/household-supplies/household-batteries.html",
            "status": "oldest",
            "contents": "http://172.16.2.4:7770/duracell-ultra-lithium-battery-3v-cr2-2-batteries-pack-of-2.html",
            "checkpoint_info": ""
        },
        "eval": {
            "eval_types": [
                "url_match"
            ],
            "reference_answers": null,
            "reference_url": "http://172.16.2.4:7770/duracell-ultra-lithium-battery-3v-cr2-2-batteries-pack-of-2.html",
            "program_html": [],
            "url_note": "GOLD in PRED"
        }
    },
    {
        "task_id": 20050,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "Count the total number of products sold in each category ({{category1}}, {{category2}}, {{category3}}, {{category4}}, {{category5}}) and sort them in descending order. Answer in the following format: {category}, {category}, {category}, {category}, {category}",
        "intent": "Count the total number of products sold in each category (Beauty & Personal Care, Sports & Outdoors, Home & Kitchen, Electronics, Cell Phones & Accessories) and sort them in descending order. Answer in the following format: {category}, {category}, {category}, {category}, {category}",
        "required_obs": "any",
        "type_main": "long-term",
        "description": "Go to each category and retrieve the number of displayed products. Then, sort the categories in descending order based on the number of products.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770",
            "contents": "Beauty & Personal Care, Home & Kitchen, Electronics, Cell Phones & Accessories, Sports & Outdoors",
            "category1": "Beauty & Personal Care",
            "category2": "Sports & Outdoors",
            "category3": "Home & Kitchen",
            "category4": "Electronics",
            "category5": "Cell Phones & Accessories",
            "checkpoint1": "21796",
            "checkpoint2": "884",
            "checkpoint3": "17241",
            "checkpoint4": "14539",
            "checkpoint5": "1924",
            "checkpoint_info": ""
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "Beauty & Personal Care, Home & Kitchen, Electronics, Cell Phones & Accessories, Sports & Outdoors"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "Beauty & Personal Care, Home & Kitchen, Electronics, Cell Phones & Accessories, Sports & Outdoors"
        }
    },
    {
        "task_id": 20051,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "Count the total number of products sold in each category ({{category1}}, {{category2}}, {{category3}}, {{category4}}, {{category5}}) and sort them in descending order. Answer in the following format: {category}, {category}, {category}, {category}, {category}",
        "intent": "Count the total number of products sold in each category (Skin Care of Beauty & Personal Care, Men of Clothing, Shoes & Jewelry, Home Audio of Electronics, Office Electronics of Office Products, Xbox One of Video Games) and sort them in descending order. Answer in the following format: {category}, {category}, {category}, {category}, {category}",
        "required_obs": "any",
        "type_main": "long-term",
        "description": "Go to each category and retrieve the number of displayed products. Then, sort the categories in descending order based on the number of products.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770",
            "contents": "Men of Clothing, Shoes & Jewelry, Skin Care of Beauty & Personal Care, Home Audio of Electronics, Office Electronics of Office Products, Xbox One of Video Games",
            "category1": "Skin Care of Beauty & Personal Care",
            "category2": "Men of Clothing, Shoes & Jewelry",
            "category3": "Home Audio of Electronics",
            "category4": "Office Electronics of Office Products",
            "category5": "Xbox One of Video Games",
            "checkpoint1": "3074",
            "checkpoint2": "7648",
            "checkpoint3": "1280",
            "checkpoint4": "488",
            "checkpoint5": "158",
            "checkpoint_info": ""
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "Men of Clothing, Shoes & Jewelry, Skin Care of Beauty & Personal Care, Home Audio of Electronics, Office Electronics of Office Products, Xbox One of Video Games"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "Men of Clothing, Shoes & Jewelry, Skin Care of Beauty & Personal Care, Home Audio of Electronics, Office Electronics of Office Products, Xbox One of Video Games"
        }
    },
    {
        "task_id": 20052,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "Count the total number of products sold in each category ({{category1}}, {{category2}}, {{category3}}, {{category4}}, {{category5}}) and sort them in descending order. Answer in the following format: {category}, {category}, {category}, {category}, {category}",
        "intent": "Count the total number of products sold in each category (Gardening & Lawn Care of Patio, Lawn & Garden, Headphones of Electronics, Cell Phones of Cell Phones & Accessories, PlayStation 4 of Video Games, Fan Shop of  Sports & Outdoors) and sort them in descending order. Answer in the following format: {category}, {category}, {category}, {category}, {category}",
        "required_obs": "any",
        "type_main": "long-term",
        "description": "Go to each category and retrieve the number of displayed products. Then, sort the categories in descending order based on the number of products.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770",
            "contents": "Headphones of Electronics, Fan Shop of  Sports & Outdoors, PlayStation 4 of Video Games, Gardening & Lawn Care of Patio, Lawn & Garden, Cell Phones of Cell Phones & Accessories",
            "category1": "Gardening & Lawn Care of Patio, Lawn & Garden",
            "category2": "Headphones of Electronics",
            "category3": "Cell Phones of Cell Phones & Accessories",
            "category4": "PlayStation 4 of Video Games",
            "category5": "Fan Shop of  Sports & Outdoors",
            "checkpoint1": "168",
            "checkpoint2": "631",
            "checkpoint3": "68",
            "checkpoint4": "233",
            "checkpoint5": "338",
            "checkpoint_info": ""
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "Headphones of Electronics, Fan Shop of  Sports & Outdoors, PlayStation 4 of Video Games, Gardening & Lawn Care of Patio, Lawn & Garden, Cell Phones of Cell Phones & Accessories"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "Headphones of Electronics, Fan Shop of  Sports & Outdoors, PlayStation 4 of Video Games, Gardening & Lawn Care of Patio, Lawn & Garden, Cell Phones of Cell Phones & Accessories"
        }
    },
    {
        "task_id": 20053,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "Count the total number of products sold in each category ({{category1}}, {{category2}}, {{category3}}, {{category4}}, {{category5}}) and sort them in descending order. Answer in the following format: {category}, {category}, {category}, {category}, {category}",
        "intent": "Count the total number of products sold in each category (Fragrance of Beauty & Personal Care, Video Projectors of Electronics, Cases, Holsters & Sleeves of Cell Phones & Accessories, Deli & Prepared Foods of Grocery & Gourmet Food, PlayStation 4 of Video Games) and sort them in descending order. Answer in the following format: {category}, {category}, {category}, {category}, {category}",
        "required_obs": "any",
        "type_main": "long-term",
        "description": "Go to each category and retrieve the number of displayed products. Then, sort the categories in descending order based on the number of products.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770",
            "contents": "Fragrance of Beauty & Personal Care, Video Projectors of Electronics, Cases, Holsters & Sleeves of Cell Phones & Accessories, PlayStation 4 of Video Games, Deli & Prepared Foods of Grocery & Gourmet Food",
            "category1": "Fragrance of Beauty & Personal Care",
            "category2": "Video Projectors of Electronics",
            "category3": "Cases, Holsters & Sleeves of Cell Phones & Accessories",
            "category4": "Deli & Prepared Foods of Grocery & Gourmet Food",
            "category5": "PlayStation 4 of Video Games",
            "checkpoint1": "1311",
            "checkpoint2": "484",
            "checkpoint3": "457",
            "checkpoint4": "149",
            "checkpoint5": "233",
            "checkpoint_info": ""
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "Fragrance of Beauty & Personal Care, Video Projectors of Electronics, Cases, Holsters & Sleeves of Cell Phones & Accessories, PlayStation 4 of Video Games, Deli & Prepared Foods of Grocery & Gourmet Food"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "Fragrance of Beauty & Personal Care, Video Projectors of Electronics, Cases, Holsters & Sleeves of Cell Phones & Accessories, PlayStation 4 of Video Games, Deli & Prepared Foods of Grocery & Gourmet Food"
        }
    },
    {
        "task_id": 20054,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "Count the total number of products sold in each category ({{category1}}, {{category2}}, {{category3}}, {{category4}}, {{category5}}) and sort them in descending order. Answer in the following format: {category}, {category}, {category}, {category}, {category}",
        "intent": "Count the total number of products sold in each category (Over-Ear Headphones of Headphones of Electronics, Chargers & Power Adapters of Accessories of Cell Phones & Accessories, Health Care of Health & Household, Sports of Sports & Outdoors, Bath of Home & Kitchen) and sort them in descending order. Answer in the following format: {category}, {category}, {category}, {category}, {category}",
        "required_obs": "any",
        "type_main": "long-term",
        "description": "Go to each category and retrieve the number of displayed products. Then, sort the categories in descending order based on the number of products.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770",
            "contents": "Chargers & Power Adapters of Accessories of Cell Phones & Accessories, Bath of Home & Kitchen, Over-Ear Headphones of Headphones of Electronics, Health Care of Health & Household, Sports of Sports & Outdoors",
            "category1": "Over-Ear Headphones of Headphones of Electronics",
            "category2": "Chargers & Power Adapters of Accessories of Cell Phones & Accessories",
            "category3": "Health Care of Health & Household",
            "category4": "Sports of Sports & Outdoors",
            "category5": "Bath of Home & Kitchen",
            "checkpoint1": "148",
            "checkpoint2": "273",
            "checkpoint3": "86",
            "checkpoint4": "75",
            "checkpoint5": "156",
            "checkpoint_info": ""
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "Chargers & Power Adapters of Accessories of Cell Phones & Accessories, Bath of Home & Kitchen, Over-Ear Headphones of Headphones of Electronics, Health Care of Health & Household, Sports of Sports & Outdoors"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "Chargers & Power Adapters of Accessories of Cell Phones & Accessories, Bath of Home & Kitchen, Over-Ear Headphones of Headphones of Electronics, Health Care of Health & Household, Sports of Sports & Outdoors"
        }
    },
    {
        "task_id": 20060,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770/sales/order/history",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "Among my monthly purchase amounts from {{month2}}/{{year2}} to {{month1}}/{{year1}}, tell me the month with the {{order}} purchase amount and its value. Only consider purchases that were successfully completed. Provide the answer in the following format: {month}/{year}, {purchase amount}",
        "intent": "Among my monthly purchase amounts from 11/2022 to 5/2023, tell me the month with the second highest purchase amount and its value. Only consider purchases that were successfully completed. Provide the answer in the following format: {month}/{year}, {purchase amount}",
        "required_obs": "any",
        "type_main": "calc",
        "type_sub": "massive_memory",
        "description": "First, check the purchase history. Then, calculate the total purchase amount for each of the specified months.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770/sales/order/history",
            "contents": "1/2023, 572.88 |OR| 1/2023, $572.88 |OR| 01/2023, 572.88 |OR| 01/2023, $572.88 |OR| January/2023, 572.88 |OR| January/2023, $572.88",
            "year1": "2023",
            "month1": "5",
            "year2": "2022",
            "month2": "11",
            "order": "second highest",
            "checkpoint1": "2023-5: 0, 2023-3: 83.31, 2023-2: 947.5, 2023-1: 572.88, 2022-12: 203.4, 2022-11: 403.18",
            "checkpoint_info": ""
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "1/2023, 572.88 |OR| 1/2023, $572.88 |OR| 01/2023, 572.88 |OR| 01/2023, $572.88 |OR| January/2023, 572.88 |OR| January/2023, $572.88"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "1/2023, 572.88 |OR| 1/2023, $572.88 |OR| 01/2023, 572.88 |OR| 01/2023, $572.88 |OR| January/2023, 572.88 |OR| January/2023, $572.88"
        }
    },
    {
        "task_id": 20061,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770/sales/order/history",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "Among my monthly purchase amounts from {{month2}}/{{year2}} to {{month1}}/{{year1}}, tell me the month with the {{order}} purchase amount and its value. Only consider purchases that were successfully completed. Provide the answer in the following format: {month}/{year}, {purchase amount}",
        "intent": "Among my monthly purchase amounts from 7/2022 to 11/2022, tell me the month with the third highest purchase amount and its value. Only consider purchases that were successfully completed. Provide the answer in the following format: {month}/{year}, {purchase amount}",
        "required_obs": "any",
        "type_main": "calc",
        "type_sub": "massive_memory",
        "description": "First, check the purchase history. Then, calculate the total purchase amount for each of the specified months.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770/sales/order/history",
            "contents": "11/2022, 403.18 |OR| 11/2022, $403.18 |OR| November/2022, 403.18 |OR| November/2022, $403.18",
            "year1": "2022",
            "month1": "11",
            "year2": "2022",
            "month2": "7",
            "order": "third highest",
            "checkpoint1": "2022-11: 403.18, 2022-10: 3336.22, 2022-9: 3024.38, 2022-8: 153.64, 2022-7: 394.82",
            "checkpoint_info": ""
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "11/2022, 403.18 |OR| 11/2022, $403.18 |OR| November/2022, 403.18 |OR| November/2022, $403.18"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "11/2022, 403.18 |OR| 11/2022, $403.18 |OR| November/2022, 403.18 |OR| November/2022, $403.18"
        }
    },
    {
        "task_id": 20062,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770/sales/order/history",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "Among my monthly purchase amounts from {{month2}}/{{year2}} to {{month1}}/{{year1}}, tell me the month with the {{order}} purchase amount and its value. Only consider purchases that were successfully completed. Provide the answer in the following format: {month}/{year}, {purchase amount}",
        "intent": "Among my monthly purchase amounts from 3/2022 to 7/2022, tell me the month with the highest purchase amount and its value. Only consider purchases that were successfully completed. Provide the answer in the following format: {month}/{year}, {purchase amount}",
        "required_obs": "any",
        "type_main": "calc",
        "type_sub": "massive_memory",
        "description": "First, check the purchase history. Then, calculate the total purchase amount for each of the specified months.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770/sales/order/history",
            "contents": "5/2022, 298.65 |OR| 5/2022, $298.65 |OR| 05/2022, 298.65 |OR| 05/2022, $298.65 |OR| May/2022, 298.65 |OR| May/2022, $298.65",
            "year1": "2022",
            "month1": "7",
            "year2": "2022",
            "month2": "3",
            "order": "highest",
            "checkpoint1": "2022-7: 40.16, 2022-6: 0, 2022-5: 298.65, 2022-4: 102.82, 2022-3: 52.35",
            "checkpoint_info": ""
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "5/2022, 298.65 |OR| 5/2022, $298.65 |OR| 05/2022, 298.65 |OR| 05/2022, $298.65 |OR| May/2022, 298.65 |OR| May/2022, $298.65"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "5/2022, 298.65 |OR| 5/2022, $298.65 |OR| 05/2022, 298.65 |OR| 05/2022, $298.65 |OR| May/2022, 298.65 |OR| May/2022, $298.65"
        }
    },
    {
        "task_id": 20063,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770/sales/order/history",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "Among my monthly purchase amounts from {{month2}}/{{year2}} to {{month1}}/{{year1}}, tell me the month with the {{order}} purchase amount and its value. Only consider purchases that were successfully completed. Provide the answer in the following format: {month}/{year}, {purchase amount}",
        "intent": "Among my monthly purchase amounts from 3/2022 to 3/2023, tell me the month with the fourth highest purchase amount and its value. Only consider purchases that were successfully completed. Provide the answer in the following format: {month}/{year}, {purchase amount}",
        "required_obs": "any",
        "type_main": "calc",
        "type_sub": "massive_memory",
        "description": "First, check the purchase history. Then, calculate the total purchase amount for each of the specified months.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770/sales/order/history",
            "contents": "1/2023, 572.88 |OR| 1/2023, $572.88 |OR| 01/2023, 572.88 |OR| 01/2023, $572.88 |OR| January/2023, 572.88 |OR| January/2023, $572.88",
            "year1": "2023",
            "month1": "3",
            "year2": "2022",
            "month2": "3",
            "order": "fourth highest",
            "checkpoint1": "2023-3: 83.31, 2023-2: 947.5, 2023-1: 572.88, 2022-12: 203.4, 2022-11: 403.18, 2022-10: 3336.22, 2022-9: 3024.38, 2022-8: 153.64, 2022-7: 394.82, 2022-6: 173.56, 2022-5: 298.65, 2022-4: 102.82, 2022-3: 374.12",
            "checkpoint_info": ""
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "1/2023, 572.88 |OR| 1/2023, $572.88 |OR| 01/2023, 572.88 |OR| 01/2023, $572.88 |OR| January/2023, 572.88 |OR| January/2023, $572.88"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "1/2023, 572.88 |OR| 1/2023, $572.88 |OR| 01/2023, 572.88 |OR| 01/2023, $572.88 |OR| January/2023, 572.88 |OR| January/2023, $572.88"
        }
    },
    {
        "task_id": 20064,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770/sales/order/history",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "Among my monthly purchase amounts from {{month2}}/{{year2}} to {{month1}}/{{year1}}, tell me the month with the {{order}} purchase amount and its value. Only consider purchases that were successfully completed. Provide the answer in the following format: {month}/{year}, {purchase amount}",
        "intent": "Among my monthly purchase amounts from 6/2022 to 1/2023, tell me the month with the seventh highest purchase amount and its value. Only consider purchases that were successfully completed. Provide the answer in the following format: {month}/{year}, {purchase amount}",
        "required_obs": "any",
        "type_main": "calc",
        "type_sub": "massive_memory",
        "description": "First, check the purchase history. Then, calculate the total purchase amount for each of the specified months.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770/sales/order/history",
            "contents": "7/2022, 40.16 |OR| 7/2022, $40.16 |OR| 07/2022, 40.16 |OR| 07/2022, $40.16 |OR| July/2022, 40.16 |OR| July/2022, $40.16",
            "year1": "2023",
            "month1": "1",
            "year2": "2022",
            "month2": "6",
            "order": "seventh highest",
            "checkpoint1": "2022-6: 0, 2022-7: 40.16, 2022-8: 75.98, 2022-9: 3024.38, 2022-10: 1209.9, 2022-11: 403.18, 2022-12: 203.4, 2023-1: 572.88",
            "checkpoint_info": ""
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "7/2022, 40.16 |OR| 7/2022, $40.16 |OR| 07/2022, 40.16 |OR| 07/2022, $40.16 |OR| July/2022, 40.16 |OR| July/2022, $40.16"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "7/2022, 40.16 |OR| 7/2022, $40.16 |OR| 07/2022, 40.16 |OR| 07/2022, $40.16 |OR| July/2022, 40.16 |OR| July/2022, $40.16"
        }
    },
    {
        "task_id": 20070,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770",
        "intent_template": "Please tell me the total price, including shipping fees, for buying {{first_item_count}} units of {{first_item_name}} and {{second_item_count}} units of {{second_item_name}}, but do not add them to the cart. Please consider the shipping fee for multiple purchases of a single product to be the same as that for purchasing just one unit of the product.",
        "intent": "Please tell me the total price, including shipping fees, for buying 3 units of Nivea Invisible Black & White Fresh Anti-Perspirant Deodorants - PACK OF 2 X 150 ml and 4 units of Eyebrow Trimmer, Sharpened Blade Facial Shaver Beautiful Delicate with Safety Cover for Eyebrow Shaving for Facial Shaving, but do not add them to the cart. Please consider the shipping fee for multiple purchases of a single product to be the same as that for purchasing just one unit of the product.",
        "required_obs": "any",
        "type_main": "calc",
        "description": "Note down the prices from the product pages and infer the shipping cost from the order history. Finally, calculate the total amount.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770",
            "first_item_count": "3",
            "first_item_name": "Nivea Invisible Black & White Fresh Anti-Perspirant Deodorants - PACK OF 2 X 150 ml",
            "second_item_count": "4",
            "second_item_name": "Eyebrow Trimmer, Sharpened Blade Facial Shaver Beautiful Delicate with Safety Cover for Eyebrow Shaving for Facial Shaving",
            "ground_truth": "89.83",
            "//comments": "$16.89 x 3 + $7.29 x 4 = $79.83"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "89.83"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "{{contents}}"
        }
    },
    {
        "task_id": 20071,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770",
        "intent_template": "Please tell me the total price, including shipping fees, for buying {{first_item_count}} units of {{first_item_name}} and {{second_item_count}} units of {{second_item_name}}, but do not add them to the cart. Please consider the shipping fee for multiple purchases of a single product to be the same as that for purchasing just one unit of the product.",
        "intent": "Please tell me the total price, including shipping fees, for buying 2 units of EMMA + OLIVER Stand-Alone Double Study Carrel with Height Adjustable Legs - Nebula Grey Finish and 5 units of File Holder Storage Box File Box Desktop Multi-Function Book Stand Simple Information Frame Office Supplies A+ (Color : Gray, Size : 3 Columns), but do not add them to the cart. Please consider the shipping fee for multiple purchases of a single product to be the same as that for purchasing just one unit of the product.",
        "required_obs": "any",
        "type_main": "calc",
        "description": "Note down the prices from the product pages and infer the shipping cost from the order history. Finally, calculate the total amount.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770",
            "first_item_count": "2",
            "first_item_name": "EMMA + OLIVER Stand-Alone Double Study Carrel with Height Adjustable Legs - Nebula Grey Finish",
            "second_item_count": "5",
            "second_item_name": "File Holder Storage Box File Box Desktop Multi-Function Book Stand Simple Information Frame Office Supplies A+ (Color : Gray, Size : 3 Columns)",
            "ground_truth": "1177.88",
            "//comments": "$492.99 x 2 + $36.38 x 5 = $1167.88"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "1177.88"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "{{contents}}"
        }
    },
    {
        "task_id": 20072,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770",
        "intent_template": "Please tell me the total price, including shipping fees, for buying {{first_item_count}} units of {{first_item_name}} and {{second_item_count}} units of {{second_item_name}}, but do not add them to the cart. Please consider the shipping fee for multiple purchases of a single product to be the same as that for purchasing just one unit of the product.",
        "intent": "Please tell me the total price, including shipping fees, for buying 11 units of [Samyang] Buldark Spicy Chicken Roasted Sauce 200g×2 / Korean food / Korean sauce / Asian dishes (overseas direct shipment) and 20 units of Barilla Ready Pasta, Rotini, 8.5 Ounce (Pack of 6), but do not add them to the cart. Please consider the shipping fee for multiple purchases of a single product to be the same as that for purchasing just one unit of the product.",
        "required_obs": "any",
        "type_main": "calc",
        "description": "Note down the prices from the product pages and infer the shipping cost from the order history. Finally, calculate the total amount.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770",
            "first_item_count": "11",
            "first_item_name": "[Samyang] Buldark Spicy Chicken Roasted Sauce 200g×2 / Korean food / Korean sauce / Asian dishes (overseas direct shipment)",
            "second_item_count": "20",
            "second_item_name": "Barilla Ready Pasta, Rotini, 8.5 Ounce (Pack of 6)",
            "ground_truth": "380.69",
            "//comments": "$11.99 x 11 + $11.94 x 20 = $370.69"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "380.69"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "{{contents}}"
        }
    },
    {
        "task_id": 20073,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770",
        "intent_template": "Please tell me the total price, including shipping fees, for buying {{first_item_count}} units of {{first_item_name}} and {{second_item_count}} units of {{second_item_name}}, but do not add them to the cart. Please consider the shipping fee for multiple purchases of a single product to be the same as that for purchasing just one unit of the product.",
        "intent": "Please tell me the total price, including shipping fees, for buying 2 units of 100w 24(20A) Port USB Fast Charging Station,Travel Desktop USB Rapid Charger,Multi Ports Charging Station Organizer for iPhone,Ipad,Samsung and More Devices,fit School,mall,Hotel,Shop and 23 units of USB Type C Charger Cable Fast Charging 10ft,Extra Long 2Pack 10Foot USB A to USB-C Phone Charging Cord for Samsung Galaxy S20 S10 S10E S9 S8 Plus Note 10 9 8,Z Flip,LG V50 V40 V30 V20, but do not add them to the cart. Please consider the shipping fee for multiple purchases of a single product to be the same as that for purchasing just one unit of the product.",
        "required_obs": "any",
        "type_main": "calc",
        "description": "Note down the prices from the product pages and infer the shipping cost from the order history. Finally, calculate the total amount.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770",
            "first_item_count": "2",
            "first_item_name": "100w 24(20A) Port USB Fast Charging Station,Travel Desktop USB Rapid Charger,Multi Ports Charging Station Organizer for iPhone,Ipad,Samsung and More Devices,fit School,mall,Hotel,Shop",
            "second_item_count": "23",
            "second_item_name": "USB Type C Charger Cable Fast Charging 10ft,Extra Long 2Pack 10Foot USB A to USB-C Phone Charging Cord for Samsung Galaxy S20 S10 S10E S9 S8 Plus Note 10 9 8,Z Flip,LG V50 V40 V30 V20",
            "ground_truth": "379.77",
            "//comments": "$35.50 x 2 + $12.99 x 23 = $369.77"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "379.77"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "{{contents}}"
        }
    },
    {
        "task_id": 20074,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770",
        "intent_template": "Please tell me the total price, including shipping fees, for buying {{first_item_count}} units of {{first_item_name}} and {{second_item_count}} units of {{second_item_name}}, but do not add them to the cart. Please consider the shipping fee for multiple purchases of a single product to be the same as that for purchasing just one unit of the product.",
        "intent": "Please tell me the total price, including shipping fees, for buying 19 units of Made Terra 2 Tier Woven Hanging Plant Pot Seagrass Wicker Wall Hanging Planter Basket Cover for Flower Pot, Indoor Outdoor Garden Balcony Home Decoration (Natural Stripe) and 9 units of Tosnail 2 Pack Metal Wall Hanging Planter Basket with Coco Liner - Great for Indoor or Outdoor Plants, but do not add them to the cart. Please consider the shipping fee for multiple purchases of a single product to be the same as that for purchasing just one unit of the product.",
        "required_obs": "any",
        "type_main": "calc",
        "description": "Note down the prices from the product pages and infer the shipping cost from the order history. Finally, calculate the total amount.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770",
            "first_item_count": "19",
            "first_item_name": "Made Terra 2 Tier Woven Hanging Plant Pot Seagrass Wicker Wall Hanging Planter Basket Cover for Flower Pot, Indoor Outdoor Garden Balcony Home Decoration (Natural Stripe)",
            "second_item_count": "9",
            "second_item_name": "Tosnail 2 Pack Metal Wall Hanging Planter Basket with Coco Liner - Great for Indoor or Outdoor Plants",
            "ground_truth": "749.96",
            "//comments": "$29.95 x 19 + $18.99 x 9 = $739.96"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "749.96"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "{{contents}}"
        }
    },
    {
        "task_id": 20080,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/grocery-gourmet-food/food-beverage-gifts/snack-gifts.html?product_list_limit=5&product_list_mode=list",
        "start_url_lite": "http://172.16.2.4:7770",
        "intent_template": "Among products on this page that have weight information, which one has the lowest price per pound? Sort the SKU numbers in ascending order by price per pound and list them separated by commas followed by a space.",
        "intent": "Among products on this page that have weight information, which one has the lowest price per pound? Sort the SKU numbers in ascending order by price per pound and list them separated by commas followed by a space.",
        "required_obs": "any",
        "type_main": "calc",
        "description": "Calculate the price per unit weight for all products on the page based on their price and weight, then compare and sort them accordingly.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/grocery-gourmet-food/food-beverage-gifts/snack-gifts.html?product_list_limit=5&product_list_mode=list",
            "start_url_lite": "http://172.16.2.4:7770",
            "ground_truth": "B004PR6HPQ, B09LLHRC3W, B07WZPC4WG, B098NRBL68, B005TG4C68",
            "//comments": "69.97$ 6lb (2721g) 0.0257$/g B004PR6HPQ, 44.98$ 2.69lb (1220g) 0.0369$/g B09LLHRC3W, 23.99$ 180g 0.1333$/g B098NRBL68, 28.09$ 90g 0.3121$/g B005TG4C68, 49.99$ 450g 0.1111$/g B07WZPC4WG"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "B004PR6HPQ, B09LLHRC3W, B07WZPC4WG, B098NRBL68, B005TG4C68"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "{{contents}}"
        }
    },
    {
        "task_id": 20081,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/grocery-gourmet-food/food-beverage-gifts/fruit-nut-gifts.html?product_list_mode=list&product_list_limit=5",
        "start_url_lite": "http://172.16.2.4:7770",
        "intent_template": "Among products on this page that have weight information, which one has the lowest price per pound? Sort the SKU numbers in ascending order by price per pound and list them separated by commas followed by a space.",
        "intent": "Among products on this page that have weight information, which one has the lowest price per pound? Sort the SKU numbers in ascending order by price per pound and list them separated by commas followed by a space.",
        "required_obs": "any",
        "type_main": "calc",
        "description": "Calculate the price per unit weight for all products on the page based on their price and weight, then compare and sort them accordingly.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/grocery-gourmet-food/food-beverage-gifts/fruit-nut-gifts.html?product_list_mode=list&product_list_limit=5",
            "start_url_lite": "http://172.16.2.4:7770",
            "ground_truth": "B09NQC6LBJ, B07F1WMJX2, B09MSMWHJJ, B08HRBXD2K, B00CPM4JSU",
            "//comments": "21.69$ 1.01lb 21.48$/lb B00CPM4JSU, 29.95$ 2.5lb 11.93$/lb B09NQC6LBJ, 22.99$ 1.51lb 15.23$/lb B09MSMWHJJ, 34.38$ 2lb 17.19$/lb B08HRBXD2K, 27.99$ 2.05lb 13.65$/lb B07F1WMJX2"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "B09NQC6LBJ, B07F1WMJX2, B09MSMWHJJ, B08HRBXD2K, B00CPM4JSU"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "{{contents}}"
        }
    },
    {
        "task_id": 20082,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/grocery-gourmet-food/food-beverage-gifts/meat-seafood-gifts.html?p=2&product_list_limit=5&product_list_mode=list",
        "start_url_lite": "http://172.16.2.4:7770",
        "intent_template": "Among products on this page that have weight information, which one has the lowest price per pound? Sort the SKU numbers in ascending order by price per pound and list them separated by commas followed by a space.",
        "intent": "Among products on this page that have weight information, which one has the lowest price per pound? Sort the SKU numbers in ascending order by price per pound and list them separated by commas followed by a space.",
        "required_obs": "any",
        "type_main": "calc",
        "description": "Calculate the price per unit weight for all products on the page based on their price and weight, then compare and sort them accordingly.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/grocery-gourmet-food/food-beverage-gifts/meat-seafood-gifts.html?p=2&product_list_limit=5&product_list_mode=list",
            "start_url_lite": "http://172.16.2.4:7770",
            "ground_truth": "B08MX292VN, B00FAPR3LE, B071DVW1VK",
            "//comments": "重さなし, 49.95$ 3lb 16.65$/lb B00FAPR3LE, 399$ 10lb 39.9$/lb B071DVW1VK, 68.95$ 7.09lb 9.72$/lb B08MX292VN, 重さなし"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "B08MX292VN, B00FAPR3LE, B071DVW1VK"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "{{contents}}"
        }
    },
    {
        "task_id": 20083,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/grocery-gourmet-food/meat-seafood/seafood.html?product_list_mode=list&product_list_limit=5",
        "start_url_lite": "http://172.16.2.4:7770",
        "intent_template": "Among products on this page that have weight information, which one has the lowest price per pound? Sort the SKU numbers in ascending order by price per pound and list them separated by commas followed by a space.",
        "intent": "Among products on this page that have weight information, which one has the lowest price per pound? Sort the SKU numbers in ascending order by price per pound and list them separated by commas followed by a space.",
        "required_obs": "any",
        "type_main": "calc",
        "description": "Calculate the price per unit weight for all products on the page based on their price and weight, then compare and sort them accordingly.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/grocery-gourmet-food/meat-seafood/seafood.html?product_list_mode=list&product_list_limit=5",
            "start_url_lite": "http://172.16.2.4:7770",
            "ground_truth": "B00HP8UXHU, B00DJLL27W, B00QG2C5NQ, B00B04J6OQ, B085Q4Q7QV",
            "//comments": "14.43$ 2.02lb 7.14$/lb B00HP8UXHU, 6.59$ 0.45oz 0.028lb 235.36$/lb B085Q4Q7QV, 17.99$ 1lb 17.99$/lb B00DJLL27W, 167.5$ 1lb 167.5$/lb B00B04J6OQ, 99.50$ 5lb 19.9$/lb B00QG2C5NQ"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "B00HP8UXHU, B00DJLL27W, B00QG2C5NQ, B00B04J6OQ, B085Q4Q7QV"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "{{contents}}"
        }
    },
    {
        "task_id": 20084,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/grocery-gourmet-food/beverages/bottled-beverages-water-drink-mixes.html?product_list_mode=list&product_list_limit=5",
        "start_url_lite": "http://172.16.2.4:7770",
        "intent_template": "Among products on this page that have weight information, which one has the lowest price per pound? Sort the SKU numbers in ascending order by price per pound and list them separated by commas followed by a space.",
        "intent": "Among products on this page that have weight information, which one has the lowest price per pound? Sort the SKU numbers in ascending order by price per pound and list them separated by commas followed by a space.",
        "required_obs": "any",
        "type_main": "calc",
        "description": "Calculate the price per unit weight for all products on the page based on their price and weight, then compare and sort them accordingly.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/grocery-gourmet-food/beverages/bottled-beverages-water-drink-mixes.html?product_list_mode=list&product_list_limit=5",
            "start_url_lite": "http://172.16.2.4:7770",
            "ground_truth": "B01MT3KBO7, B091B8SW3V, B09BNQM376, B095DK6HBF, B08Z2FR6GQ",
            "//comments": "29.13$ 1.62lb 17.98$/lb B09BNQM376, 15.95$ 7.83oz 0.49lb 32.55$/lb B095DK6HBF, 21.26$ 10.08oz 0.63lb 33.7$/lb B08Z2FR6GQ, 45.00$ 12oz*18 13.5lb 3.33$/lb B091B8SW3V, 24$ 9.45lb 2.54$/lb B01MT3KBO7"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "B01MT3KBO7, B091B8SW3V, B09BNQM376, B095DK6HBF, B08Z2FR6GQ"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "{{contents}}"
        }
    },
    {
        "task_id": 20090,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770/clothing-shoes-jewelry/men/shoes.html?product_list_order=price",
        "intent_template": "Among the {{size}} products in the '{{category}}' category, please tell me which one is the cheapest. Answer SKU number.",
        "intent": "Among the 6.5inch products in the 'Mens' Shoes' category, please tell me which one is the cheapest. Answer SKU number.",
        "required_obs": "any",
        "type_main": "long-term",
        "description": "Go through each product one by one until finding one that meets the condition.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770/clothing-shoes-jewelry/men/shoes.html?product_list_order=price",
            "category": "Mens' Shoes",
            "size": "6.5inch",
            "ground_truth": "B09SB5YHCV"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "B09SB5YHCV"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "{{contents}}"
        }
    },
    {
        "task_id": 20091,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770/electronics/television-video/televisions.html?product_list_order=price",
        "intent_template": "Among the {{size}} products in the '{{category}}' category, please tell me which one is the cheapest. Answer SKU number.",
        "intent": "Among the 43inch products in the 'TV' category, please tell me which one is the cheapest. Answer SKU number.",
        "required_obs": "any",
        "type_main": "long-term",
        "description": "Go through each product one by one until finding one that meets the condition.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770/electronics/television-video/televisions.html?product_list_order=price",
            "category": "TV",
            "size": "43inch",
            "ground_truth": "B09753NK6N"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "B09753NK6N"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "{{contents}}"
        }
    },
    {
        "task_id": 20100,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770/electronics/accessories-supplies/power-strips-surge-protectors.html?product_list_order=price",
        "intent_template": "Among the products {{criteria}} in the '{{category}}' category, please tell me the cheapest one with at least 5 reviews and an average rating of 4 or higher out of 5 stars. Answer with SKU number.",
        "intent": "Among the products that have USB Ports in the 'Power Strips & Surge Protectors' category, please tell me the cheapest one with at least 5 reviews and an average rating of 4 or higher out of 5 stars. Answer with SKU number.",
        "required_obs": "any",
        "type_main": "long-term",
        "description": "Go through each product one by one until finding one that meets the condition.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770/electronics/accessories-supplies/power-strips-surge-protectors.html?product_list_order=price",
            "category": "Power Strips & Surge Protectors",
            "criteria": "that have USB Ports",
            "ground_truth": "B0872P75QM"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "B0872P75QM"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "{{contents}}"
        }
    },
    {
        "task_id": 20101,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770/electronics/headphones/earbud-headphones.html?product_list_order=price",
        "intent_template": "Among the products {{criteria}} in the '{{category}}' category, please tell me the cheapest one with at least 5 reviews and an average rating of 4 or higher out of 5 stars. Answer with SKU number.",
        "intent": "Among the products made by JVC in the 'Earbud Headphones' category, please tell me the cheapest one with at least 5 reviews and an average rating of 4 or higher out of 5 stars. Answer with SKU number.",
        "required_obs": "any",
        "type_main": "long-term",
        "description": "Go through each product one by one until finding one that meets the condition.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770/electronics/headphones/earbud-headphones.html?product_list_order=price",
            "category": "Earbud Headphones",
            "criteria": "made by JVC",
            "ground_truth": "B08DK5178Y"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "B08DK5178Y"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "{{contents}}"
        }
    },
    {
        "task_id": 20102,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770/grocery-gourmet-food/dairy-cheese-eggs/cheese.html?product_list_order=price",
        "intent_template": "Among the products {{criteria}} in the '{{category}}' category, please tell me the cheapest one with at least 5 reviews and an average rating of 4 or higher out of 5 stars. Answer with SKU number.",
        "intent": "Among the products that contain cheddar cheese in the 'Cheese' category, please tell me the cheapest one with at least 5 reviews and an average rating of 4 or higher out of 5 stars. Answer with SKU number.",
        "required_obs": "any",
        "type_main": "long-term",
        "description": "Go through each product one by one until finding one that meets the condition.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770/grocery-gourmet-food/dairy-cheese-eggs/cheese.html?product_list_order=price",
            "category": "Cheese",
            "criteria": "that contain cheddar cheese",
            "ground_truth": "B01N5WCAKK"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "B01N5WCAKK"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "{{contents}}"
        }
    },
    {
        "task_id": 20103,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770/clothing-shoes-jewelry/men/shoes.html?product_list_order=price",
        "intent_template": "Among the products {{criteria}} in the '{{category}}' category, please tell me the cheapest one with at least 5 reviews and an average rating of 4 or higher out of 5 stars. Answer with SKU number.",
        "intent": "Among the products made by leather not sandals in the 'Men's Shoes' category, please tell me the cheapest one with at least 5 reviews and an average rating of 4 or higher out of 5 stars. Answer with SKU number.",
        "required_obs": "any",
        "type_main": "long-term",
        "description": "Go through each product one by one until finding one that meets the condition.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770/clothing-shoes-jewelry/men/shoes.html?product_list_order=price",
            "category": "Men's Shoes",
            "criteria": "made by leather not sandals",
            "ground_truth": "B07PP5BMDX"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "B07PP5BMDX"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "{{contents}}"
        }
    },
    {
        "task_id": 20104,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770/cell-phones-accessories/cell-phones.html?product_list_order=price",
        "intent_template": "Among the products {{criteria}} in the '{{category}}' category, please tell me the cheapest one with at least 5 reviews and an average rating of 4 or higher out of 5 stars. Answer with SKU number.",
        "intent": "Among the products that have 128GB ROM in the 'Cell Phone' category, please tell me the cheapest one with at least 5 reviews and an average rating of 4 or higher out of 5 stars. Answer with SKU number.",
        "required_obs": "any",
        "type_main": "long-term",
        "description": "Go through each product one by one until finding one that meets the condition.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770/cell-phones-accessories/cell-phones.html?product_list_order=price",
            "category": "Cell Phone",
            "criteria": "that have 128GB ROM",
            "ground_truth": "B09HN52ST3"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "B09HN52ST3"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "{{contents}}"
        }
    },
    {
        "task_id": 20110,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770",
        "intent_template": "Please report to the administrator regarding {{category}} products purchased {{date}}. Make sure to include the SKU number and price in the following format: ```SKU: {SKU}\nPrice: ${PRICE}```",
        "intent": "Please report to the administrator regarding cooked food products purchased on March, 2023. Make sure to include the SKU number and price in the following format: ```SKU: {SKU}\nPrice: ${PRICE}```",
        "required_obs": "any",
        "type_main": "long-term",
        "description": "Go through the order history to find an entry that satisfies the condition.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770",
            "category": "cooked food",
            "date": "on March, 2023",
            "SKU": "B005IR33MM",
            "price": "12.99"
        },
        "eval": {
            "eval_types": [
                "program_html"
            ],
            "reference_answers": null,
            "reference_url": "",
            "program_html": [
                {
                    "url": "last",
                    "locator": "document.querySelector('[title=\"What’s on your mind?\"').value",
                    "required_contents": {
                        "must_include": [
                            "SKU: B005IR33MM\nPrice: $12.99"
                        ]
                    }
                }
            ],
            "string_note": "",
            "reference_answer_raw_annotation": "{{contents}}"
        }
    },
    {
        "task_id": 20111,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770",
        "intent_template": "Please report to the administrator regarding {{category}} products purchased {{date}}. Make sure to include the SKU number and price in the following format: ```SKU: {SKU}\nPrice: ${PRICE}```",
        "intent": "Please report to the administrator regarding furtniture products purchased on February, 2023. Make sure to include the SKU number and price in the following format: ```SKU: {SKU}\nPrice: ${PRICE}```",
        "required_obs": "any",
        "type_main": "long-term",
        "description": "Go through the order history to find an entry that satisfies the condition.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770",
            "category": "furtniture",
            "date": "on February, 2023",
            "SKU": "B099TZT2XR",
            "price": "599.00"
        },
        "eval": {
            "eval_types": [
                "program_html"
            ],
            "reference_answers": null,
            "reference_url": "",
            "program_html": [
                {
                    "url": "last",
                    "locator": "document.querySelector('[title=\"What’s on your mind?\"').value",
                    "required_contents": {
                        "must_include": [
                            "SKU: B099TZT2XR\nPrice: $599.00"
                        ]
                    }
                }
            ],
            "string_note": "",
            "reference_answer_raw_annotation": "{{contents}}"
        }
    },
    {
        "task_id": 20112,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770",
        "intent_template": "Please report to the administrator regarding {{category}} products purchased {{date}}. Make sure to include the SKU number and price in the following format: ```SKU: {SKU}\nPrice: ${PRICE}```",
        "intent": "Please report to the administrator regarding photo products purchased in 2023. Make sure to include the SKU number and price in the following format: ```SKU: {SKU}\nPrice: ${PRICE}```",
        "required_obs": "any",
        "type_main": "long-term",
        "description": "Go through the order history to find an entry that satisfies the condition.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770",
            "category": "photo",
            "date": "in 2023",
            "SKU": "B09F3JQN4W",
            "price": "33.28"
        },
        "eval": {
            "eval_types": [
                "program_html"
            ],
            "reference_answers": null,
            "reference_url": "",
            "program_html": [
                {
                    "url": "last",
                    "locator": "document.querySelector('[title=\"What’s on your mind?\"').value",
                    "required_contents": {
                        "must_include": [
                            "SKU: B09F3JQN4W\nPrice: $33.28"
                        ]
                    }
                }
            ],
            "string_note": "",
            "reference_answer_raw_annotation": "{{contents}}"
        }
    },
    {
        "task_id": 20113,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770",
        "intent_template": "Please report to the administrator regarding {{category}} products purchased {{date}}. Make sure to include the SKU number and price in the following format: ```SKU: {SKU}\nPrice: ${PRICE}```",
        "intent": "Please report to the administrator regarding skin care products purchased on November, 2022. Make sure to include the SKU number and price in the following format: ```SKU: {SKU}\nPrice: ${PRICE}```",
        "required_obs": "any",
        "type_main": "long-term",
        "description": "Go through the order history to find an entry that satisfies the condition.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770",
            "category": "skin care",
            "date": "on November, 2022",
            "SKU": "B084WRP9B2",
            "price": "99.99"
        },
        "eval": {
            "eval_types": [
                "program_html"
            ],
            "reference_answers": null,
            "reference_url": "",
            "program_html": [
                {
                    "url": "last",
                    "locator": "document.querySelector('[title=\"What’s on your mind?\"').value",
                    "required_contents": {
                        "must_include": [
                            "SKU: B084WRP9B2\nPrice: $99.99"
                        ]
                    }
                }
            ],
            "string_note": "",
            "reference_answer_raw_annotation": "{{contents}}"
        }
    },
    {
        "task_id": 20114,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770",
        "intent_template": "Please report to the administrator regarding {{category}} products purchased {{date}}. Make sure to include the SKU number and price in the following format: ```SKU: {SKU}\nPrice: ${PRICE}```",
        "intent": "Please report to the administrator regarding snack products purchased on April, 2022. Make sure to include the SKU number and price in the following format: ```SKU: {SKU}\nPrice: ${PRICE}```",
        "required_obs": "any",
        "type_main": "long-term",
        "description": "Go through the order history to find an entry that satisfies the condition.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770",
            "category": "snack",
            "date": "on April, 2022",
            "SKU": "B07VR1QS11",
            "price": "19.86"
        },
        "eval": {
            "eval_types": [
                "program_html"
            ],
            "reference_answers": null,
            "reference_url": "",
            "program_html": [
                {
                    "url": "last",
                    "locator": "document.querySelector('[title=\"What’s on your mind?\"').value",
                    "required_contents": {
                        "must_include": [
                            "SKU: B07VR1QS11\nPrice: $19.86"
                        ]
                    }
                }
            ],
            "string_note": "",
            "reference_answer_raw_annotation": "{{contents}}"
        }
    },
    {
        "task_id": 20120,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770/health-household/diet-sports-nutrition.html?cat=192&price=10-20",
        "intent_template": "Among the products in the '{{category}}' category within the price range of {{min_dollers}} to {{max_dollers}} dollars, tell me which one has the highest percentage of reviews with 3 or more stars out of 5, with at least 5 reviews. Answer SKU number.",
        "intent": "Among the products in the 'Diet & Sports Nutrition' category within the price range of 10.00 to 19.99 dollars, tell me which one has the highest percentage of reviews with 3 or more stars out of 5, with at least 5 reviews. Answer SKU number.",
        "required_obs": "any",
        "type_main": "massive_memory",
        "type_sub": "long-term",
        "description": "Record the ratings of all products that meet the condition, and finally output the one with the highest rating.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770/health-household/diet-sports-nutrition.html?cat=192&price=10-20",
            "category": "Diet & Sports Nutrition",
            "min_dollers": "10.00",
            "max_dollers": "19.99",
            "ground_truth": "B07JG914JC"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "B07JG914JC"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "{{contents}}"
        }
    },
    {
        "task_id": 20121,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770/video-games/playstation-4/accessories.html?price=20-30%2C0-100",
        "intent_template": "Among the products in the '{{category}}' category within the price range of {{min_dollers}} to {{max_dollers}} dollars, tell me which one has the highest percentage of reviews with 3 or more stars out of 5, with at least 5 reviews. Answer SKU number.",
        "intent": "Among the products in the 'PlayStation4 Accessories' category within the price range of 20.00 to 29.99 dollars, tell me which one has the highest percentage of reviews with 3 or more stars out of 5, with at least 5 reviews. Answer SKU number.",
        "required_obs": "any",
        "type_main": "massive_memory",
        "type_sub": "long-term",
        "description": "Record the ratings of all products that meet the condition, and finally output the one with the highest rating.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770/video-games/playstation-4/accessories.html?price=20-30%2C0-100",
            "category": "PlayStation4 Accessories",
            "min_dollers": "20.00",
            "max_dollers": "29.99",
            "ground_truth": "B08JZ2MTMJ"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "B08JZ2MTMJ"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "{{contents}}"
        }
    },
    {
        "task_id": 20122,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770/patio-lawn-garden/gardening-lawn-care/plants-seeds-bulbs.html?price=10-20",
        "intent_template": "Among the products in the '{{category}}' category within the price range of {{min_dollers}} to {{max_dollers}} dollars, tell me which one has the highest percentage of reviews with 3 or more stars out of 5, with at least 5 reviews. Answer SKU number.",
        "intent": "Among the products in the 'Plants, Seeds & Bulbs' category within the price range of 10.00 to 19.99 dollars, tell me which one has the highest percentage of reviews with 3 or more stars out of 5, with at least 5 reviews. Answer SKU number.",
        "required_obs": "any",
        "type_main": "massive_memory",
        "type_sub": "long-term",
        "description": "Record the ratings of all products that meet the condition, and finally output the one with the highest rating.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770/patio-lawn-garden/gardening-lawn-care/plants-seeds-bulbs.html?price=10-20",
            "category": "Plants, Seeds & Bulbs",
            "min_dollers": "10.00",
            "max_dollers": "19.99",
            "ground_truth": "B07SSFGC5S"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "B07SSFGC5S"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "{{contents}}"
        }
    },
    {
        "task_id": 20123,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770/home-kitchen/bath/bathroom-accessories.html?price=10-20%2C0-100",
        "intent_template": "Among the products in the '{{category}}' category within the price range of {{min_dollers}} to {{max_dollers}} dollars, tell me which one has the highest percentage of reviews with 3 or more stars out of 5, with at least 5 reviews. Answer SKU number.",
        "intent": "Among the products in the 'Bathroom Accessories' category within the price range of 10.00 to 19.99 dollars, tell me which one has the highest percentage of reviews with 3 or more stars out of 5, with at least 5 reviews. Answer SKU number.",
        "required_obs": "any",
        "type_main": "massive_memory",
        "type_sub": "long-term",
        "description": "Record the ratings of all products that meet the condition, and finally output the one with the highest rating.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770/home-kitchen/bath/bathroom-accessories.html?price=10-20%2C0-100",
            "category": "Bathroom Accessories",
            "min_dollers": "10.00",
            "max_dollers": "19.99",
            "ground_truth": "B092HVZZG4"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "B092HVZZG4"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "{{contents}}"
        }
    },
    {
        "task_id": 20124,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770/cell-phones-accessories/cases-holsters-sleeves/flip-cases.html?price=10-20",
        "intent_template": "Among the products in the '{{category}}' category within the price range of {{min_dollers}} to {{max_dollers}} dollars, tell me which one has the highest percentage of reviews with 3 or more stars out of 5, with at least 5 reviews. Answer SKU number.",
        "intent": "Among the products in the 'Cell Phone Flip Cases' category within the price range of 10.00 to 19.99 dollars, tell me which one has the highest percentage of reviews with 3 or more stars out of 5, with at least 5 reviews. Answer SKU number.",
        "required_obs": "any",
        "type_main": "massive_memory",
        "type_sub": "long-term",
        "description": "Record the ratings of all products that meet the condition, and finally output the one with the highest rating.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770/cell-phones-accessories/cases-holsters-sleeves/flip-cases.html?price=10-20",
            "category": "Cell Phone Flip Cases",
            "min_dollers": "10.00",
            "max_dollers": "19.99",
            "ground_truth": "B097MYR4NQ"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "B097MYR4NQ"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "{{contents}}"
        }
    },
    {
        "task_id": 20130,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770",
        "intent_template": "Extract the title of reviews with a rating of 2 or below out of 5 stars from '{{item_name}}' and output them as a list in alphabetical order, separeted by line breaks.",
        "intent": "Extract the title of reviews with a rating of 2 or below out of 5 stars from 'Tea Gift Set for Tea Lovers - Includes Double Insulated Tea Cup 12 Uniquely Blended Teas and All Natural Honey Straws | Tea Gift Sets for Women Men | Tea Gifts Bag Presented in Beautiful Gift Bag' and output them as a list in alphabetical order, separeted by line breaks.",
        "required_obs": "any",
        "type_main": "massive_memory",
        "type_sub": "long-term",
        "description": "Go through all the reviews and note the titles of those with low ratings. Finally, sort them in alphabetical order.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770",
            "item_name": "Tea Gift Set for Tea Lovers - Includes Double Insulated Tea Cup 12 Uniquely Blended Teas and All Natural Honey Straws | Tea Gift Sets for Women Men | Tea Gifts Bag Presented in Beautiful Gift Bag",
            "ground_truth": "Absolutely ridiculous!\nCHEAP and non-returnable.\nGarbage.\nMissing all of the product !\nNot worth $30\nVERY DISAPPOINTED and you cannot return!"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "Absolutely ridiculous!\nCHEAP and non-returnable.\nGarbage.\nMissing all of the product !\nNot worth $30\nVERY DISAPPOINTED and you cannot return!"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "{{contents}}"
        }
    },
    {
        "task_id": 20131,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770",
        "intent_template": "Extract the title of reviews with a rating of 2 or below out of 5 stars from '{{item_name}}' and output them as a list in alphabetical order, separeted by line breaks.",
        "intent": "Extract the title of reviews with a rating of 2 or below out of 5 stars from 'Pre-Seasoned Garlic and Herb Rub Prime Rib Roast, 1 count, 3.5-4 lb from Kansas City Steaks' and output them as a list in alphabetical order, separeted by line breaks.",
        "required_obs": "any",
        "type_main": "massive_memory",
        "type_sub": "long-term",
        "description": "Go through all the reviews and note the titles of those with low ratings. Finally, sort them in alphabetical order.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770",
            "item_name": "Pre-Seasoned Garlic and Herb Rub Prime Rib Roast, 1 count, 3.5-4 lb from Kansas City Steaks",
            "ground_truth": "Bad buy on that\nBuy from a reputable butcher for a Prime rib Roast\nDon't buy classic beef cuts online. Buy it in person.\nOver priced very small !\nso small\nTHINK\nVery expensive and disappointing. The dog will eat it, it's not fit for human consumption."
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "Bad buy on that\nBuy from a reputable butcher for a Prime rib Roast\nDon't buy classic beef cuts online. Buy it in person.\nOver priced very small !\nso small\nTHINK\nVery expensive and disappointing. The dog will eat it, it's not fit for human consumption."
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "{{contents}}"
        }
    },
    {
        "task_id": 20132,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770",
        "intent_template": "Extract the title of reviews with a rating of 2 or below out of 5 stars from '{{item_name}}' and output them as a list in alphabetical order, separeted by line breaks.",
        "intent": "Extract the title of reviews with a rating of 2 or below out of 5 stars from 'SAMSUNG Galaxy Note 8 N950U 64GB Unlocked GSM 4G LTE Android Smartphone w/Dual 12 MegaPixel Camera (Renewed) (Orchid Grey)' and output them as a list in alphabetical order, separeted by line breaks.",
        "required_obs": "any",
        "type_main": "massive_memory",
        "type_sub": "long-term",
        "description": "Go through all the reviews and note the titles of those with low ratings. Finally, sort them in alphabetical order.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770",
            "item_name": "SAMSUNG Galaxy Note 8 N950U 64GB Unlocked GSM 4G LTE Android Smartphone w/Dual 12 MegaPixel Camera (Renewed) (Orchid Grey)",
            "ground_truth": "Barely works not worth the money\nCareful which Seller\nDefectuoso\nDO NOT WASTE YOUR MONEY AND AVOID an8 \"refurbished unlocked Verizon Branded Samsung Galaxy Note 8\nEl telÃ©fono saliÃ³ mal,lo regresÃ© y no me han devuelto mi dinero\nNot please\nOne of the two Note 8 phones I took to Africa was mess up, the other is pretty good though\nPhone dont work unless you put it on Speaker.\nPhone not unlocked\nPhone Sent was locked and they would not exchange for unlocked phone advertised\nRUN!!!\nWARNING might not come as described!"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "Barely works not worth the money\nCareful which Seller\nDefectuoso\nDO NOT WASTE YOUR MONEY AND AVOID an8 \"refurbished unlocked Verizon Branded Samsung Galaxy Note 8\nEl telÃ©fono saliÃ³ mal,lo regresÃ© y no me han devuelto mi dinero\nNot please\nOne of the two Note 8 phones I took to Africa was mess up, the other is pretty good though\nPhone dont work unless you put it on Speaker.\nPhone not unlocked\nPhone Sent was locked and they would not exchange for unlocked phone advertised\nRUN!!!\nWARNING might not come as described!"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "{{contents}}"
        }
    },
    {
        "task_id": 20133,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770",
        "intent_template": "Extract the title of reviews with a rating of 2 or below out of 5 stars from '{{item_name}}' and output them as a list in alphabetical order, separeted by line breaks.",
        "intent": "Extract the title of reviews with a rating of 2 or below out of 5 stars from 'Bamboo Rolling 6 Tier Plant Stand Rack Multiple Flower Pot Holder Shelf Indoor Outdoor Planter Display Shelving Unit for Patio Garden Corner Balcony Living Room' and output them as a list in alphabetical order, separeted by line breaks.",
        "required_obs": "any",
        "type_main": "massive_memory",
        "type_sub": "long-term",
        "description": "Go through all the reviews and note the titles of those with low ratings. Finally, sort them in alphabetical order.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770",
            "item_name": "Bamboo Rolling 6 Tier Plant Stand Rack Multiple Flower Pot Holder Shelf Indoor Outdoor Planter Display Shelving Unit for Patio Garden Corner Balcony Living Room",
            "ground_truth": "Cute but No waterproof at all.\nCute but No waterproof at all.\nNot sturdy; do not recommend!! |OR| Cute but No waterproof at all.\nNot sturdy; do not recommend!!"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "Cute but No waterproof at all.\nCute but No waterproof at all.\nNot sturdy; do not recommend!! |OR| Cute but No waterproof at all.\nNot sturdy; do not recommend!!"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "{{contents}}"
        }
    },
    {
        "task_id": 20134,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770",
        "start_url_lite": "http://172.16.2.4:7770",
        "intent_template": "Extract the title of reviews with a rating of 2 or below out of 5 stars from '{{item_name}}' and output them as a list in alphabetical order, separeted by line breaks.",
        "intent": "Extract the title of reviews with a rating of 2 or below out of 5 stars from 'Waterproof Bone Conduction Headphones for Swimming, IPX8 Open-Ear MP3 Player Wireless Sport Earphones Headset Built-in 8GB Flash Memory for Running, Diving Water, Gym, Spa，Black' and output them as a list in alphabetical order, separeted by line breaks.",
        "required_obs": "any",
        "type_main": "massive_memory",
        "type_sub": "long-term",
        "description": "Go through all the reviews and note the titles of those with low ratings. Finally, sort them in alphabetical order.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770",
            "start_url_lite": "http://172.16.2.4:7770",
            "item_name": "Waterproof Bone Conduction Headphones for Swimming, IPX8 Open-Ear MP3 Player Wireless Sport Earphones Headset Built-in 8GB Flash Memory for Running, Diving Water, Gym, Spa，Black",
            "ground_truth": "Disappointed\nFaulty\nNot what I was looking for\nUncomfortable to wear hard to hear\nUncomfortable to wear hard to hear\nVery hard material.\nWas great while it lasted |OR| Disappointed\nFaulty\nNot what I was looking for\nUncomfortable to wear hard to hear\nVery hard material.\nWas great while it lasted"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "Disappointed\nFaulty\nNot what I was looking for\nUncomfortable to wear hard to hear\nUncomfortable to wear hard to hear\nVery hard material.\nWas great while it lasted |OR| Disappointed\nFaulty\nNot what I was looking for\nUncomfortable to wear hard to hear\nVery hard material.\nWas great while it lasted"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "{{contents}}"
        }
    },
    {
        "task_id": 20140,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/",
        "start_url_lite": "http://172.16.2.4:7770/electronics/gps-finders-accessories.html",
        "intent_template": "What is the average price of products in {{category}}? Please round to 2 decimal places. Please output only the number",
        "intent": "What is the average price of products in GPS, Finders & Accessories? Please round to 2 decimal places. Please output only the number",
        "required_obs": "any",
        "type_main": "massive_memory",
        "type_sub": "calc",
        "description": "Check the prices of all products in the category and calculate the average price.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/",
            "start_url_lite": "http://172.16.2.4:7770/electronics/gps-finders-accessories.html",
            "category": "GPS, Finders & Accessories",
            "category_level": "subcategory",
            "contents": "52.66"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "exact_match": "52.66"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "52.66"
        }
    },
    {
        "task_id": 20141,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/",
        "start_url_lite": "http://172.16.2.4:7770/health-household/diet-sports-nutrition.html",
        "intent_template": "What is the average price of products in {{category}}? Please round to 2 decimal places. Please output only the number",
        "intent": "What is the average price of products in Diet & Sports Nutrition? Please round to 2 decimal places. Please output only the number",
        "required_obs": "any",
        "type_main": "massive_memory",
        "type_sub": "calc",
        "description": "Check the prices of all products in the category and calculate the average price.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/",
            "start_url_lite": "http://172.16.2.4:7770/health-household/diet-sports-nutrition.html",
            "category": "Diet & Sports Nutrition",
            "category_level": "subcategory",
            "contents": "30.46"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "exact_match": "30.46"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "30.46"
        }
    },
    {
        "task_id": 20142,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/",
        "start_url_lite": "http://172.16.2.4:7770/electronics/security-surveillance/surveillance-video-equipment.html",
        "intent_template": "What is the average price of products in {{category}}? Please round to 2 decimal places. Please output only the number",
        "intent": "What is the average price of products in Surveillance Video Equipment? Please round to 2 decimal places. Please output only the number",
        "required_obs": "any",
        "type_main": "massive_memory",
        "type_sub": "calc",
        "description": "Check the prices of all products in the category and calculate the average price.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/",
            "start_url_lite": "http://172.16.2.4:7770/electronics/security-surveillance/surveillance-video-equipment.html",
            "category": "Surveillance Video Equipment",
            "category_level": "detailed category",
            "contents": "331.64"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "exact_match": "331.64"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "331.64"
        }
    },
    {
        "task_id": 20143,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/",
        "start_url_lite": "http://172.16.2.4:7770/beauty-personal-care/hair-care/hair-treatment-oils.html",
        "intent_template": "What is the average price of products in {{category}}? Please round to 2 decimal places. Please output only the number",
        "intent": "What is the average price of products in Hair Treatment Oils? Please round to 2 decimal places. Please output only the number",
        "required_obs": "any",
        "type_main": "massive_memory",
        "type_sub": "calc",
        "description": "Check the prices of all products in the category and calculate the average price.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/",
            "start_url_lite": "http://172.16.2.4:7770/beauty-personal-care/hair-care/hair-treatment-oils.html",
            "category": "Hair Treatment Oils",
            "category_level": "detailed category",
            "contents": "21.87"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "exact_match": "21.87"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "21.87"
        }
    },
    {
        "task_id": 20144,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/",
        "start_url_lite": "http://172.16.2.4:7770/grocery-gourmet-food/pantry-staples/condiments-salad-dressings.html",
        "intent_template": "What is the average price of products in {{category}}? Please round to 2 decimal places. Please output only the number",
        "intent": "What is the average price of products in Condiments & Salad Dressings? Please round to 2 decimal places. Please output only the number",
        "required_obs": "any",
        "type_main": "massive_memory",
        "type_sub": "calc",
        "description": "Check the prices of all products in the category and calculate the average price.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/",
            "start_url_lite": "http://172.16.2.4:7770/grocery-gourmet-food/pantry-staples/condiments-salad-dressings.html",
            "category": "Condiments & Salad Dressings",
            "category_level": "detailed category",
            "contents": "22.96"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "exact_match": "22.96"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "22.96"
        }
    },
    {
        "task_id": 20150,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/",
        "start_url_lite": "http://172.16.2.4:7770/beauty-personal-care/oral-care/mouthwash.html",
        "intent_template": "Calculate the following statistics for the number of reviews in the {{category}} category, considering only products that have at least one review: maximum, mean (rounded to 1 decimal place), median (rounded to 1 decimal place), and variance (unbiased variance, rounded to 1 decimal place). Format your answer exactly as follows on separate lines:\nMaximum: [value]\nMean: [value]\nMedian: [value]\nVariance: [value]",
        "intent": "Calculate the following statistics for the number of reviews in the Mouthwash category, considering only products that have at least one review: maximum, mean (rounded to 1 decimal place), median (rounded to 1 decimal place), and variance (unbiased variance, rounded to 1 decimal place). Format your answer exactly as follows on separate lines:\nMaximum: [value]\nMean: [value]\nMedian: [value]\nVariance: [value]",
        "required_obs": "text",
        "type_main": "calc",
        "type_sub": "massive_memory",
        "description": "Check the number of reviews for all products in the category and calculate the maximum, mean, median, and unbiased variance.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/",
            "start_url_lite": "http://172.16.2.4:7770/beauty-personal-care/oral-care/mouthwash.html",
            "category": "Mouthwash",
            "review_stats": "Maximum: 12\nMean: 7.8\nMedian: 10.5\nVariance: 21.3",
            "max_product_name": "Sensodyne Pronamel Mouthwash, 8.4 Ounce (Pack of 2)",
            "review_max": 12,
            "review_mean": 7.8,
            "review_median": 10.5,
            "review_variance": 21.3,
            "num_products_with_reviews": 60
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "Maximum: 12\nMean: 7.8\nMedian: 10.5\nVariance: 21.3"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "Maximum: 12\nMean: 7.8\nMedian: 10.5\nVariance: 21.3"
        }
    },
    {
        "task_id": 20151,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/",
        "start_url_lite": "http://172.16.2.4:7770/video-games/playstation-4/accessories.html",
        "intent_template": "Calculate the following statistics for the number of reviews in the {{category}} category, considering only products that have at least one review: maximum, mean (rounded to 1 decimal place), median (rounded to 1 decimal place), and variance (unbiased variance, rounded to 1 decimal place). Format your answer exactly as follows on separate lines:\nMaximum: [value]\nMean: [value]\nMedian: [value]\nVariance: [value]",
        "intent": "Calculate the following statistics for the number of reviews in the Play Station4 Accessories category, considering only products that have at least one review: maximum, mean (rounded to 1 decimal place), median (rounded to 1 decimal place), and variance (unbiased variance, rounded to 1 decimal place). Format your answer exactly as follows on separate lines:\nMaximum: [value]\nMean: [value]\nMedian: [value]\nVariance: [value]",
        "required_obs": "text",
        "type_main": "calc",
        "type_sub": "massive_memory",
        "description": "Check the number of reviews for all products in the category and calculate the maximum, mean, median, and unbiased variance.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/",
            "start_url_lite": "http://172.16.2.4:7770/video-games/playstation-4/accessories.html",
            "category": "Play Station4 Accessories",
            "review_stats": "Maximum: 32\nMean: 7.6\nMedian: 9.0\nVariance: 28.1",
            "max_product_name": "MXRC Silicone Rubber Cover Skin Case x 1 Anti-Slip Studded Dots Customize for PS4/SLIM/PRO Controller x 1(Pink) + Cat Paw Thumb Grips x 2",
            "review_max": 32,
            "review_mean": 7.6,
            "review_median": 9.0,
            "review_variance": 28.1,
            "num_products_with_reviews": 91
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "Maximum: 32\nMean: 7.6\nMedian: 9.0\nVariance: 28.1"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "Maximum: 32\nMean: 7.6\nMedian: 9.0\nVariance: 28.1"
        }
    },
    {
        "task_id": 20152,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/",
        "start_url_lite": "http://172.16.2.4:7770/beauty-personal-care/oral-care/denture-care.html",
        "intent_template": "Calculate the following statistics for the number of reviews in the {{category}} category, considering only products that have at least one review: maximum, mean (rounded to 1 decimal place), median (rounded to 1 decimal place), and variance (unbiased variance, rounded to 1 decimal place). Format your answer exactly as follows on separate lines:\nMaximum: [value]\nMean: [value]\nMedian: [value]\nVariance: [value]",
        "intent": "Calculate the following statistics for the number of reviews in the Denture Care category, considering only products that have at least one review: maximum, mean (rounded to 1 decimal place), median (rounded to 1 decimal place), and variance (unbiased variance, rounded to 1 decimal place). Format your answer exactly as follows on separate lines:\nMaximum: [value]\nMean: [value]\nMedian: [value]\nVariance: [value]",
        "required_obs": "text",
        "type_main": "calc",
        "type_sub": "massive_memory",
        "description": "Check the number of reviews for all products in the category and calculate the maximum, mean, median, and unbiased variance.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/",
            "start_url_lite": "http://172.16.2.4:7770/beauty-personal-care/oral-care/denture-care.html",
            "category": "Denture Care",
            "review_stats": "Maximum: 72\nMean: 12.4\nMedian: 12.0\nVariance: 225.3",
            "max_product_name": "Denture Do it Yourself Full Set of Top and Bottom Fake Teeth for Improve Smile",
            "review_max": 72,
            "review_mean": 12.4,
            "review_median": 12.0,
            "review_variance": 225.3,
            "num_products_with_reviews": 36
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "Maximum: 72\nMean: 12.4\nMedian: 12.0\nVariance: 225.3"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "Maximum: 72\nMean: 12.4\nMedian: 12.0\nVariance: 225.3"
        }
    },
    {
        "task_id": 20153,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/",
        "start_url_lite": "http://172.16.2.4:7770/electronics/television-video/streaming-media-players.html",
        "intent_template": "Calculate the following statistics for the number of reviews in the {{category}} category, considering only products that have at least one review: maximum, mean (rounded to 1 decimal place), median (rounded to 1 decimal place), and variance (unbiased variance, rounded to 1 decimal place). Format your answer exactly as follows on separate lines:\nMaximum: [value]\nMean: [value]\nMedian: [value]\nVariance: [value]",
        "intent": "Calculate the following statistics for the number of reviews in the Streaming Media Players category, considering only products that have at least one review: maximum, mean (rounded to 1 decimal place), median (rounded to 1 decimal place), and variance (unbiased variance, rounded to 1 decimal place). Format your answer exactly as follows on separate lines:\nMaximum: [value]\nMean: [value]\nMedian: [value]\nVariance: [value]",
        "required_obs": "text",
        "type_main": "calc",
        "type_sub": "massive_memory",
        "description": "Check the number of reviews for all products in the category and calculate the maximum, mean, median, and unbiased variance.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/",
            "start_url_lite": "http://172.16.2.4:7770/electronics/television-video/streaming-media-players.html",
            "category": "Streaming Media Players",
            "review_stats": "Maximum: 12\nMean: 7.4\nMedian: 7.5\nVariance: 21.0",
            "max_product_name": "TiVo TCDA94000 Stream for TiVo Premiere and Roamio DVRs",
            "review_max": 12,
            "review_mean": 7.4,
            "review_median": 7.5,
            "review_variance": 21.0,
            "num_products_with_reviews": 22
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "Maximum: 12\nMean: 7.4\nMedian: 7.5\nVariance: 21.0"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "Maximum: 12\nMean: 7.4\nMedian: 7.5\nVariance: 21.0"
        }
    },
    {
        "task_id": 20154,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/",
        "start_url_lite": "http://172.16.2.4:7770/home-kitchen/storage-organization/clothing-closet-storage.html",
        "intent_template": "Calculate the following statistics for the number of reviews in the {{category}} category, considering only products that have at least one review: maximum, mean (rounded to 1 decimal place), median (rounded to 1 decimal place), and variance (unbiased variance, rounded to 1 decimal place). Format your answer exactly as follows on separate lines:\nMaximum: [value]\nMean: [value]\nMedian: [value]\nVariance: [value]",
        "intent": "Calculate the following statistics for the number of reviews in the Clothing & Closet Storage category, considering only products that have at least one review: maximum, mean (rounded to 1 decimal place), median (rounded to 1 decimal place), and variance (unbiased variance, rounded to 1 decimal place). Format your answer exactly as follows on separate lines:\nMaximum: [value]\nMean: [value]\nMedian: [value]\nVariance: [value]",
        "required_obs": "text",
        "type_main": "calc",
        "type_sub": "massive_memory",
        "description": "Check the number of reviews for all products in the category and calculate the maximum, mean, median, and unbiased variance.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/",
            "start_url_lite": "http://172.16.2.4:7770/home-kitchen/storage-organization/clothing-closet-storage.html",
            "category": "Clothing & Closet Storage",
            "review_stats": "Maximum: 122\nMean: 12.4\nMedian: 12.0\nVariance: 242.3",
            "max_product_name": "storageLAB Under Bed Shoe Storage Organizer, Adjustable Dividers - Fits Up to 12 Pairs - Underbed Storage Solution (Grey)",
            "review_max": 122,
            "review_mean": 12.4,
            "review_median": 12.0,
            "review_variance": 242.3,
            "num_products_with_reviews": 69
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "must_include": [
                    "Maximum: 122\nMean: 12.4\nMedian: 12.0\nVariance: 242.3"
                ]
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "Maximum: 122\nMean: 12.4\nMedian: 12.0\nVariance: 242.3"
        }
    },
    {
        "task_id": 20160,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/",
        "start_url_lite": "http://172.16.2.4:7770/",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "What is the combined price of the most affordable {{item1_description}} and the cheapest {{item2_description}}{{condition}}? Please only output the number.",
        "intent": "What is the combined price of the most affordable item between highest-spec Galaxy cell phones (excluding the Note series) in Cell phone category products and the cheapest Flip Case with Card Holder that is compatible with highest-spec Galaxy (excluding the Note series)? Please only output the number.",
        "required_obs": "text",
        "type_main": "long-term",
        "description": "Extract all items from the lists of {product1} and {product2} that meet the {condition} → Among them, find the cheapest item from each list and add their prices together.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/",
            "start_url_lite": "http://172.16.2.4:7770/",
            "item1_description": "item between highest-spec Galaxy cell phones (excluding the Note series) in Cell phone category products",
            "item2_description": "Flip Case with Card Holder that is compatible with highest-spec Galaxy (excluding the Note series)",
            "condition": "",
            "item1_price": 534.0,
            "item2_price": 13.99,
            "expected_answer": "547.99",
            "item1_product": "(Renewed) Samsung Galaxy S21 5G, US Version, 128GB, Phantom Gray - Unlocked",
            "item2_product": "Asuwish Compatible with Samsung Galaxy S21 Plus Glaxay S21+ 5G Wallet Case and Tempered Glass Screen Protector Flip Cover Card Holder Cell Phone Cases for Gaxaly S21+5G S21plus 21S + S 21 21+ G5 Blue"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "exact_match": "547.99"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "547.99",
            "price_calculation": "534.0 + 13.99"
        }
    },
    {
        "task_id": 20161,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/",
        "start_url_lite": "http://172.16.2.4:7770/",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "What is the combined price of the most affordable {{item1_description}} and the cheapest {{item2_description}}{{condition}}? Please only output the number.",
        "intent": "What is the combined price of the most affordable items between Latest iPhone in Cell phone category products and the cheapest Flip Case with Card Holder that is compatible with latest iPhone? Please only output the number.",
        "required_obs": "text",
        "type_main": "long-term",
        "description": "Extract all items from the lists of {product1} and {product2} that meet the {condition} → Among them, find the cheapest item from each list and add their prices together.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/",
            "start_url_lite": "http://172.16.2.4:7770/",
            "item1_description": "items between Latest iPhone in Cell phone category products",
            "item2_description": "Flip Case with Card Holder that is compatible with latest iPhone",
            "condition": "",
            "item1_price": 448.2,
            "item2_price": 22.99,
            "expected_answer": "471.19",
            "item1_product": "Apple iPhone 12 Mini, 64GB, Black - Verizon (Renewed)",
            "item2_product": "ZVE Wallet Case Compatible with iPhone 12 Mini(2020), Zipper Purse Case with Card Holder Slot Crossbody Chain Wrist Strap Compatible with iPhone 12 Mini,5.4 inch-Rose Gold"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "exact_match": "471.19"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "471.19",
            "price_calculation": "448.2 + 22.99"
        }
    },
    {
        "task_id": 20162,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/",
        "start_url_lite": "http://172.16.2.4:7770/",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "What is the combined price of the most affordable {{item1_description}} and the cheapest {{item2_description}}{{condition}}? Please only output the number.",
        "intent": "What is the combined price of the most affordable smartphone at least 3.5 rating (rating>=70) and 5 review manufactured by Samsung or Apple and the cheapest privacy screen protector compatible with the phone each? Please only output the number.",
        "required_obs": "text",
        "type_main": "long-term",
        "description": "Extract all items from the lists of {product1} and {product2} that meet the {condition} → Among them, find the cheapest item from each list and add their prices together.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/",
            "start_url_lite": "http://172.16.2.4:7770/",
            "item1_description": "smartphone at least 3.5 rating (rating>=70) and 5 review manufactured by Samsung or Apple",
            "item2_description": "privacy screen protector compatible with the phone each",
            "condition": "",
            "item1_price": 159.0,
            "item2_price": 5.98,
            "expected_answer": "164.98",
            "item1_product": "Samsung Galaxy A50 US Version Factory Unlocked Cell Phone with 64GB Memory, 6.4\" Screen, Black, [SM-A505UZKNXAA] (Renewed)",
            "item2_product": "[3-Pack] Pokolan Tempered Glass for Samsung Galaxy A50, A30, A30s, A50s, A40, M30, M30s, M31, M21 Screen Protector, Anti Scratch, HD Clear, 9H Hardness, Bubble Free, Easy to Install"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "exact_match": "164.98"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "164.98",
            "price_calculation": "159.0 + 5.98"
        }
    },
    {
        "task_id": 20170,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/",
        "start_url_lite": "http://172.16.2.4:7770/home-kitchen/home-decor-products/rugs-pads-protectors.html",
        "intent_template": "Please tell me the full name of the product with the most reviews among the highest-rated products in the highest 20% price range of {{category}}. Answer with only the full product name, nothing else.",
        "intent": "Please tell me the full name of the product with the most reviews among the highest-rated products in the highest 20% price range of Rugs, Pads & Protectors. Answer with only the full product name, nothing else.",
        "required_obs": "text",
        "type_main": "massive_memory",
        "description": "Go to the specified category and calculate the highest 20% price of the products in that category. Sort the products in descending order by price, then find the product with the highest rating among those with the most reviews within that 20%.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/",
            "start_url_lite": "http://172.16.2.4:7770/home-kitchen/home-decor-products/rugs-pads-protectors.html",
            "category": "Rugs, Pads & Protectors",
            "product_name": "Rugs America Area Rug, 8'0\"X10'0\", Carnation",
            "price": 188.84,
            "rating": 5.0,
            "review_count": 2
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "fuzzy_match": "Rugs America Area Rug, 8'0\"X10'0\", Carnation"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "Rugs America Area Rug, 8'0\"X10'0\", Carnation"
        }
    },
    {
        "task_id": 20171,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/",
        "start_url_lite": "http://172.16.2.4:7770/electronics/camera-photo/bags-cases.html",
        "intent_template": "Please tell me the full name of the product with the most reviews among the highest-rated products in the highest 20% price range of {{category}}. Answer with only the full product name, nothing else.",
        "intent": "Please tell me the full name of the product with the most reviews among the highest-rated products in the highest 20% price range of Camera & Photo/Bags & Cases. Answer with only the full product name, nothing else.",
        "required_obs": "text",
        "type_main": "massive_memory",
        "description": "Go to the specified category and calculate the highest 20% price of the products in that category. Sort the products in descending order by price, then find the product with the highest rating among those with the most reviews within that 20%.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/",
            "start_url_lite": "http://172.16.2.4:7770/electronics/camera-photo/bags-cases.html",
            "category": "Camera & Photo/Bags & Cases",
            "product_name": "SKB iSeries 3i-1309 Military-Grade Waterproof Hard Case for BlackMagic Design Pocket Cinema Camera 4K & Accessories",
            "price": 139.99,
            "rating": 5.0,
            "review_count": 5
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "fuzzy_match": "SKB iSeries 3i-1309 Military-Grade Waterproof Hard Case for BlackMagic Design Pocket Cinema Camera 4K & Accessories"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "SKB iSeries 3i-1309 Military-Grade Waterproof Hard Case for BlackMagic Design Pocket Cinema Camera 4K & Accessories"
        }
    },
    {
        "task_id": 20172,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/",
        "start_url_lite": "http://172.16.2.4:7770/grocery-gourmet-food/frozen/ice-cream-novelties.html",
        "intent_template": "Please tell me the full name of the product with the most reviews among the highest-rated products in the highest 20% price range of {{category}}. Answer with only the full product name, nothing else.",
        "intent": "Please tell me the full name of the product with the most reviews among the highest-rated products in the highest 20% price range of Ice Cream & Novelties. Answer with only the full product name, nothing else.",
        "required_obs": "text",
        "type_main": "massive_memory",
        "description": "Go to the specified category and calculate the highest 20% price of the products in that category. Sort the products in descending order by price, then find the product with the highest rating among those with the most reviews within that 20%.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/",
            "start_url_lite": "http://172.16.2.4:7770/grocery-gourmet-food/frozen/ice-cream-novelties.html",
            "category": "Ice Cream & Novelties",
            "product_name": "Juve Hydrating Electrolyte Pops (25-35 Calories, 5-7g Sugar, Vitamin B5, B6, B3) [Variety Pack: Triple Berry, Grape, and Orange] 6 Boxes of 12 x 1.79 FL Oz Pops, 72 Count | Buy Now-Freeze Later",
            "price": 60.0,
            "rating": 5.0,
            "review_count": 1
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "fuzzy_match": "Juve Hydrating Electrolyte Pops (25-35 Calories, 5-7g Sugar, Vitamin B5, B6, B3) [Variety Pack: Triple Berry, Grape, and Orange] 6 Boxes of 12 x 1.79 FL Oz Pops, 72 Count | Buy Now-Freeze Later"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "Juve Hydrating Electrolyte Pops (25-35 Calories, 5-7g Sugar, Vitamin B5, B6, B3) [Variety Pack: Triple Berry, Grape, and Orange] 6 Boxes of 12 x 1.79 FL Oz Pops, 72 Count | Buy Now-Freeze Later"
        }
    },
    {
        "task_id": 20173,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/",
        "start_url_lite": "http://172.16.2.4:7770/electronics/security-surveillance/surveillance-video-equipment.html",
        "intent_template": "Please tell me the full name of the product with the most reviews among the highest-rated products in the highest 20% price range of {{category}}. Answer with only the full product name, nothing else.",
        "intent": "Please tell me the full name of the product with the most reviews among the highest-rated products in the highest 20% price range of Surveillance Video Equipment. Answer with only the full product name, nothing else.",
        "required_obs": "text",
        "type_main": "massive_memory",
        "description": "Go to the specified category and calculate the highest 20% price of the products in that category. Sort the products in descending order by price, then find the product with the highest rating among those with the most reviews within that 20%.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/",
            "start_url_lite": "http://172.16.2.4:7770/electronics/security-surveillance/surveillance-video-equipment.html",
            "category": "Surveillance Video Equipment",
            "product_name": "PANOOB 2-Way Audio 8 Channel 4K PoE Security Camera Systems, 4K/8MP NVR w/2TB HDD - 6Pcs 5MP 2.8mm HD Human Detection Wired Surveillance IP Cameras for 24/7 Business Video Audio Recording",
            "price": 569.99,
            "rating": 5.0,
            "review_count": 2
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "fuzzy_match": "PANOOB 2-Way Audio 8 Channel 4K PoE Security Camera Systems, 4K/8MP NVR w/2TB HDD - 6Pcs 5MP 2.8mm HD Human Detection Wired Surveillance IP Cameras for 24/7 Business Video Audio Recording"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "PANOOB 2-Way Audio 8 Channel 4K PoE Security Camera Systems, 4K/8MP NVR w/2TB HDD - 6Pcs 5MP 2.8mm HD Human Detection Wired Surveillance IP Cameras for 24/7 Business Video Audio Recording"
        }
    },
    {
        "task_id": 20174,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/",
        "start_url_lite": "http://172.16.2.4:7770/beauty-personal-care/makeup/makeup-remover.html",
        "intent_template": "Please tell me the full name of the product with the most reviews among the highest-rated products in the highest 20% price range of {{category}}. Answer with only the full product name, nothing else.",
        "intent": "Please tell me the full name of the product with the most reviews among the highest-rated products in the highest 20% price range of Makeup Remover. Answer with only the full product name, nothing else.",
        "required_obs": "text",
        "type_main": "massive_memory",
        "description": "Go to the specified category and calculate the highest 20% price of the products in that category. Sort the products in descending order by price, then find the product with the highest rating among those with the most reviews within that 20%.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/",
            "start_url_lite": "http://172.16.2.4:7770/beauty-personal-care/makeup/makeup-remover.html",
            "category": "Makeup Remover",
            "product_name": "Diamond Wipes Gentle Makeup Remover Cleansing Face Wipes Case of 500ct Wipes Made with Vitamin E Perfect for Waterproof Makeup",
            "price": 57.95,
            "rating": 4.5,
            "review_count": 12
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "fuzzy_match": "Diamond Wipes Gentle Makeup Remover Cleansing Face Wipes Case of 500ct Wipes Made with Vitamin E Perfect for Waterproof Makeup"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "Diamond Wipes Gentle Makeup Remover Cleansing Face Wipes Case of 500ct Wipes Made with Vitamin E Perfect for Waterproof Makeup"
        }
    },
    {
        "task_id": 20180,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/",
        "start_url_lite": "http://172.16.2.4:7770/",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "What month had the highest spending for {{category}} products in my past orders during the period from {{start_date}} to {{end_date}}? Answer with only the month and year in this exact format: '[Month] [Year]' (for example: 'January 2023').",
        "intent": "What month had the highest spending for Home & Kitchen products in my past orders during the period from August 2022 to November 2022? Answer with only the month and year in this exact format: '[Month] [Year]' (for example: 'January 2023').",
        "required_obs": "any",
        "type_main": "massive_memory",
        "type_sub": "calc",
        "description": "Aggregate the prices of items that fall under a specific category from my orders within the specified time period on a monthly basis → Identify the month with the highest total spending.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/",
            "start_url_lite": "http://172.16.2.4:7770/",
            "category": "Home & Kitchen",
            "start_date": "August 2022",
            "end_date": "November 2022",
            "highest_month": "October 2022",
            "second_highest_month": "September 2022",
            "highest_amount": "370.87",
            "second_highest_amount": "338.99",
            "percentage_diff": "8.60%",
            "monthly_spending": "• October 2022: $370.87\n• September 2022: $338.99",
            "exact_answer": "October 2022"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "exact_match": "October 2022"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "October 2022"
        }
    },
    {
        "task_id": 20181,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/",
        "start_url_lite": "http://172.16.2.4:7770/",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "What month had the highest spending for {{category}} products in my past orders during the period from {{start_date}} to {{end_date}}? Answer with only the month and year in this exact format: '[Month] [Year]' (for example: 'January 2023').",
        "intent": "What month had the highest spending for Electronics products in my past orders during the period from December 2022 to May 2023? Answer with only the month and year in this exact format: '[Month] [Year]' (for example: 'January 2023').",
        "required_obs": "any",
        "type_main": "massive_memory",
        "type_sub": "calc",
        "description": "Aggregate the prices of items that fall under a specific category from my orders within the specified time period on a monthly basis → Identify the month with the highest total spending.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/",
            "start_url_lite": "http://172.16.2.4:7770/",
            "category": "Electronics",
            "start_date": "December 2022",
            "end_date": "May 2023",
            "highest_month": "February 2023",
            "second_highest_month": "January 2023",
            "highest_amount": "229.63",
            "second_highest_amount": "169.95",
            "percentage_diff": "25.99%",
            "monthly_spending": "• February 2023: $229.63\n• January 2023: $169.95\n• December 2022: $15.49\n• March 2023: $6.28",
            "exact_answer": "February 2023"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "exact_match": "February 2023"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "February 2023"
        }
    },
    {
        "task_id": 20182,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/",
        "start_url_lite": "http://172.16.2.4:7770/",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "What month had the highest spending for {{category}} products in my past orders during the period from {{start_date}} to {{end_date}}? Answer with only the month and year in this exact format: '[Month] [Year]' (for example: 'January 2023').",
        "intent": "What month had the highest spending for Home & Kitchen products in my past orders during the period from September 2022 to December 2022? Answer with only the month and year in this exact format: '[Month] [Year]' (for example: 'January 2023').",
        "required_obs": "any",
        "type_main": "massive_memory",
        "type_sub": "calc",
        "description": "Aggregate the prices of items that fall under a specific category from my orders within the specified time period on a monthly basis → Identify the month with the highest total spending.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/",
            "start_url_lite": "http://172.16.2.4:7770/",
            "category": "Home & Kitchen",
            "start_date": "September 2022",
            "end_date": "December 2022",
            "highest_month": "October 2022",
            "second_highest_month": "September 2022",
            "highest_amount": "370.87",
            "second_highest_amount": "338.99",
            "percentage_diff": "8.60%",
            "monthly_spending": "• October 2022: $370.87\n• September 2022: $338.99\n• December 2022: $59.17",
            "exact_answer": "October 2022"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "exact_match": "October 2022"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "October 2022"
        }
    },
    {
        "task_id": 20183,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/",
        "start_url_lite": "http://172.16.2.4:7770/",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "What month had the highest spending for {{category}} products in my past orders during the period from {{start_date}} to {{end_date}}? Answer with only the month and year in this exact format: '[Month] [Year]' (for example: 'January 2023').",
        "intent": "What month had the highest spending for Grocery & Gourmet Food products in my past orders during the period from November 2022 to February 2023? Answer with only the month and year in this exact format: '[Month] [Year]' (for example: 'January 2023').",
        "required_obs": "any",
        "type_main": "massive_memory",
        "type_sub": "calc",
        "description": "Aggregate the prices of items that fall under a specific category from my orders within the specified time period on a monthly basis → Identify the month with the highest total spending.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/",
            "start_url_lite": "http://172.16.2.4:7770/",
            "category": "Grocery & Gourmet Food",
            "start_date": "November 2022",
            "end_date": "February 2023",
            "highest_month": "December 2022",
            "second_highest_month": "November 2022",
            "highest_amount": "66.27",
            "second_highest_amount": "41.94",
            "percentage_diff": "36.71%",
            "monthly_spending": "• December 2022: $66.27\n• November 2022: $41.94",
            "exact_answer": "December 2022"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "exact_match": "December 2022"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "December 2022"
        }
    },
    {
        "task_id": 20184,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/",
        "start_url_lite": "http://172.16.2.4:7770/",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "What month had the highest spending for {{category}} products in my past orders during the period from {{start_date}} to {{end_date}}? Answer with only the month and year in this exact format: '[Month] [Year]' (for example: 'January 2023').",
        "intent": "What month had the highest spending for Electronics products in my past orders during the period from November 2022 to February 2023? Answer with only the month and year in this exact format: '[Month] [Year]' (for example: 'January 2023').",
        "required_obs": "any",
        "type_main": "massive_memory",
        "type_sub": "calc",
        "description": "Aggregate the prices of items that fall under a specific category from my orders within the specified time period on a monthly basis → Identify the month with the highest total spending.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/",
            "start_url_lite": "http://172.16.2.4:7770/",
            "category": "Electronics",
            "start_date": "November 2022",
            "end_date": "February 2023",
            "highest_month": "February 2023",
            "second_highest_month": "January 2023",
            "highest_amount": "229.63",
            "second_highest_amount": "169.95",
            "percentage_diff": "25.99%",
            "monthly_spending": "• February 2023: $229.63\n• January 2023: $169.95\n• December 2022: $15.49",
            "exact_answer": "February 2023"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "exact_match": "February 2023"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "February 2023"
        }
    },
    {
        "task_id": 20190,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/",
        "start_url_lite": "http://172.16.2.4:7770/",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "What is the full name of the {{product_description}} in the same subcategory {{category}} as the product {{product_name}} from my order number {{order_number}}?  The product must have at least 5 reviews, a rating of 3.5 (rating>=70) or higher, and cost between 50%-90% of the original product's price. Answer with only the product's full name, nothing else.",
        "intent": "What is the full name of the lightest selfie stick product in the same subcategory Photo & Video Accessories as the product Tziarp Multifunction 3 in 1 Selfie Stick Wireless Bluetooth Control Extendable Tripod Foldable Rotatable Self-Timer Monopod for Phone from my order number 179?  The product must have at least 5 reviews, a rating of 3.5 (rating>=70) or higher, and cost between 50%-90% of the original product's price. Answer with only the product's full name, nothing else.",
        "required_obs": "text",
        "type_main": "long-term",
        "type_sub": "calc",
        "description": "Retrieve the price of the specified item from the order → Navigate to its category and identify the items that meet the latter condition within that category, making a note of each item and its location → Review all the noted items and search for the one that satisfies the {{product_description}} condition.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/",
            "start_url_lite": "http://172.16.2.4:7770/",
            "order_number": "179",
            "product_name": "Tziarp Multifunction 3 in 1 Selfie Stick Wireless Bluetooth Control Extendable Tripod Foldable Rotatable Self-Timer Monopod for Phone",
            "category": "Photo & Video Accessories",
            "product_description": "lightest selfie stick product",
            "expected_answer": "2022 Newest Selfie Stick for iPhone with LED Light Wireless Bluetooth Tripod Selfie Stick with Detachable Remote Compatible with iPhone 13/13 Pro/12/11/XR/X/Pro Max/Mini, Android Smartphone -White"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "fuzzy_match": "2022 Newest Selfie Stick for iPhone with LED Light Wireless Bluetooth Tripod Selfie Stick with Detachable Remote Compatible with iPhone 13/13 Pro/12/11/XR/X/Pro Max/Mini, Android Smartphone -White"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "2022 Newest Selfie Stick for iPhone with LED Light Wireless Bluetooth Tripod Selfie Stick with Detachable Remote Compatible with iPhone 13/13 Pro/12/11/XR/X/Pro Max/Mini, Android Smartphone -White"
        }
    },
    {
        "task_id": 20191,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/",
        "start_url_lite": "http://172.16.2.4:7770/",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "What is the full name of the {{product_description}} in the same subcategory {{category}} as the product {{product_name}} from my order number {{order_number}}?  The product must have at least 5 reviews, a rating of 3.5 (rating>=70) or higher, and cost between 50%-90% of the original product's price. Answer with only the product's full name, nothing else.",
        "intent": "What is the full name of the the bread which country of Origin is USA in the same subcategory Breads as the product Whole Foods Market, Bread Batard Olive, 19 Ounce from my order number 162?  The product must have at least 5 reviews, a rating of 3.5 (rating>=70) or higher, and cost between 50%-90% of the original product's price. Answer with only the product's full name, nothing else.",
        "required_obs": "text",
        "type_main": "long-term",
        "type_sub": "calc",
        "description": "Retrieve the price of the specified item from the order → Navigate to its category and identify the items that meet the latter condition within that category, making a note of each item and its location → Review all the noted items and search for the one that satisfies the {{product_description}} condition.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/",
            "start_url_lite": "http://172.16.2.4:7770/",
            "order_number": "162",
            "product_name": "Whole Foods Market, Bread Batard Olive, 19 Ounce",
            "category": "Breads",
            "product_description": "the bread which country of Origin is USA",
            "expected_answer": "B&M Brown Bread, Original Flavor, 16 Ounce (Pack of 12)"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "fuzzy_match": "B&M Brown Bread, Original Flavor, 16 Ounce (Pack of 12)"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "B&M Brown Bread, Original Flavor, 16 Ounce (Pack of 12)"
        }
    },
    {
        "task_id": 20192,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/",
        "start_url_lite": "http://172.16.2.4:7770/",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "What is the full name of the {{product_description}} in the same subcategory {{category}} as the product {{product_name}} from my order number {{order_number}}?  The product must have at least 5 reviews, a rating of 3.5 (rating>=70) or higher, and cost between 50%-90% of the original product's price. Answer with only the product's full name, nothing else.",
        "intent": "What is the full name of the A product that includes only the film, without a camera. in the same subcategory Film Photography as the product RDPIII 120 Provia 100F from my order number 179?  The product must have at least 5 reviews, a rating of 3.5 (rating>=70) or higher, and cost between 50%-90% of the original product's price. Answer with only the product's full name, nothing else.",
        "required_obs": "text",
        "type_main": "long-term",
        "type_sub": "calc",
        "description": "Retrieve the price of the specified item from the order → Navigate to its category and identify the items that meet the latter condition within that category, making a note of each item and its location → Review all the noted items and search for the one that satisfies the {{product_description}} condition.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/",
            "start_url_lite": "http://172.16.2.4:7770/",
            "order_number": "179",
            "product_name": "RDPIII 120 Provia 100F",
            "category": "Film Photography",
            "product_description": "A product that includes only the film, without a camera.",
            "expected_answer": "Lomography Color Negative 400 ISO 35mm 3 Pack"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "fuzzy_match": "Lomography Color Negative 400 ISO 35mm 3 Pack"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "Lomography Color Negative 400 ISO 35mm 3 Pack"
        }
    },
    {
        "task_id": 20193,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/",
        "start_url_lite": "http://172.16.2.4:7770/",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "What is the full name of the {{product_description}} in the same subcategory {{category}} as the product {{product_name}} from my order number {{order_number}}?  The product must have at least 5 reviews, a rating of 3.5 (rating>=70) or higher, and cost between 50%-90% of the original product's price. Answer with only the product's full name, nothing else.",
        "intent": "What is the full name of the The most cheapest Adapter. in the same subcategory AC Adapters as the product SupplySource AC-DC Adapter for Sony Playstation VR Virtual Reality Headset PSVR PSVR2 from my order number 160?  The product must have at least 5 reviews, a rating of 3.5 (rating>=70) or higher, and cost between 50%-90% of the original product's price. Answer with only the product's full name, nothing else.",
        "required_obs": "text",
        "type_main": "long-term",
        "type_sub": "calc",
        "description": "Retrieve the price of the specified item from the order → Navigate to its category and identify the items that meet the latter condition within that category, making a note of each item and its location → Review all the noted items and search for the one that satisfies the {{product_description}} condition.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/",
            "start_url_lite": "http://172.16.2.4:7770/",
            "order_number": "160",
            "product_name": "SupplySource AC-DC Adapter for Sony Playstation VR Virtual Reality Headset PSVR PSVR2",
            "category": "AC Adapters",
            "product_description": "The most cheapest Adapter.",
            "expected_answer": "POWSEED 5V 3A Switching AC Adapter 15W Replacement Power Cord Charger for Android Tablets Webcam Digital Cameras Routers Toys Recorder Bluetooth Speaker TV Box GPS and More 5V Electronics"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "fuzzy_match": "POWSEED 5V 3A Switching AC Adapter 15W Replacement Power Cord Charger for Android Tablets Webcam Digital Cameras Routers Toys Recorder Bluetooth Speaker TV Box GPS and More 5V Electronics"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "POWSEED 5V 3A Switching AC Adapter 15W Replacement Power Cord Charger for Android Tablets Webcam Digital Cameras Routers Toys Recorder Bluetooth Speaker TV Box GPS and More 5V Electronics"
        }
    },
    {
        "task_id": 20194,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/",
        "start_url_lite": "http://172.16.2.4:7770/",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "What is the full name of the {{product_description}} in the same subcategory {{category}} as the product {{product_name}} from my order number {{order_number}}?  The product must have at least 5 reviews, a rating of 3.5 (rating>=70) or higher, and cost between 50%-90% of the original product's price. Answer with only the product's full name, nothing else.",
        "intent": "What is the full name of the Adapter manufactured by FIT-POWER in the same subcategory AC Adapters as the product SupplySource AC-DC Adapter for Sony Playstation VR Virtual Reality Headset PSVR PSVR2 from my order number 160?  The product must have at least 5 reviews, a rating of 3.5 (rating>=70) or higher, and cost between 50%-90% of the original product's price. Answer with only the product's full name, nothing else.",
        "required_obs": "text",
        "type_main": "long-term",
        "type_sub": "calc",
        "description": "Retrieve the price of the specified item from the order → Navigate to its category and identify the items that meet the latter condition within that category, making a note of each item and its location → Review all the noted items and search for the one that satisfies the {{product_description}} condition.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/",
            "start_url_lite": "http://172.16.2.4:7770/",
            "order_number": "160",
            "product_name": "SupplySource AC-DC Adapter for Sony Playstation VR Virtual Reality Headset PSVR PSVR2",
            "category": "AC Adapters",
            "product_description": "Adapter manufactured by FIT-POWER",
            "expected_answer": "HY1C 24V 1A Power Supply Adapter, 100V-240V AC to DC 24 Volt 1Amp Charger for 24vdc 100mA 500mA 1000mA Christmas LED Strip Light, CCTV Camera, DC Pump, Humidifier, Massage Gun, Mini Printer, Cleaner"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "fuzzy_match": "HY1C 24V 1A Power Supply Adapter, 100V-240V AC to DC 24 Volt 1Amp Charger for 24vdc 100mA 500mA 1000mA Christmas LED Strip Light, CCTV Camera, DC Pump, Humidifier, Massage Gun, Mini Printer, Cleaner"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "HY1C 24V 1A Power Supply Adapter, 100V-240V AC to DC 24 Volt 1Amp Charger for 24vdc 100mA 500mA 1000mA Christmas LED Strip Light, CCTV Camera, DC Pump, Humidifier, Massage Gun, Mini Printer, Cleaner"
        }
    },
    {
        "task_id": 20200,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/",
        "start_url_lite": "http://172.16.2.4:7770/cell-phones-accessories/accessories/automobile-accessories.html",
        "intent_template": "Take me to the page of the product with the most reviews among the highest-rated products priced within 90-120% of the average price for {{category}}.",
        "intent": "Take me to the page of the product with the most reviews among the highest-rated products priced within 90-120% of the average price for Automobile Accessories.",
        "required_obs": "text",
        "type_main": "long-term",
        "type_sub": "calc",
        "description": "First, go to the specified category. Then, check the prices of all products in the category and calculate the average price. Finally, find the product with the highest rating and most reviews within 90-120% of the average price.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/",
            "start_url_lite": "http://172.16.2.4:7770/cell-phones-accessories/accessories/automobile-accessories.html",
            "category": "Automobile Accessories",
            "product_name": "andobil Original Magnetic Car Vent Mount Compatible with MagSafe [Case Friendly] Car Mount for iPhone 13 Pro Max 12 Mini [Powerful Magnet] Air Vent Car Phone Holder, Cell Phone Holder for Most of Car",
            "product_url": "http://172.16.2.4:7770/andobil-original-magnetic-car-vent-mount-compatible-with-magsafe-case-friendly-car-mount-for-iphone-13-pro-max-12-mini-powerful-magnet-air-vent-car-phone-holder-cell-phone-holder-for-most-of-car.html",
            "avg_price": 26.69,
            "price": 29.98,
            "price_ratio": 1.12,
            "rating": 4.9,
            "review_count": 10
        },
        "eval": {
            "eval_types": [
                "url_match"
            ],
            "reference_answers": "",
            "reference_url": "http://172.16.2.4:7770/andobil-original-magnetic-car-vent-mount-compatible-with-magsafe-case-friendly-car-mount-for-iphone-13-pro-max-12-mini-powerful-magnet-air-vent-car-phone-holder-cell-phone-holder-for-most-of-car.html",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "http://172.16.2.4:7770/andobil-original-magnetic-car-vent-mount-compatible-with-magsafe-case-friendly-car-mount-for-iphone-13-pro-max-12-mini-powerful-magnet-air-vent-car-phone-holder-cell-phone-holder-for-most-of-car.html"
        }
    },
    {
        "task_id": 20201,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/",
        "start_url_lite": "http://172.16.2.4:7770/grocery-gourmet-food/meat-seafood/seafood.html",
        "intent_template": "Take me to the page of the product with the most reviews among the highest-rated products priced within 90-120% of the average price for {{category}}.",
        "intent": "Take me to the page of the product with the most reviews among the highest-rated products priced within 90-120% of the average price for Meat and Seafood, Seafood.",
        "required_obs": "text",
        "type_main": "long-term",
        "type_sub": "calc",
        "description": "First, go to the specified category. Then, check the prices of all products in the category and calculate the average price. Finally, find the product with the highest rating and most reviews within 90-120% of the average price.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/",
            "start_url_lite": "http://172.16.2.4:7770/grocery-gourmet-food/meat-seafood/seafood.html",
            "category": "Meat and Seafood, Seafood",
            "product_name": "Ahi Tuna Loin 4 lbs, Tristan ULT Fresh, #1 Grade, Skinless, Boneless, and Bloodline Removed",
            "product_url": "http://172.16.2.4:7770/ahi-tuna-loin-4-lbs-tristan-ult-fresh-1-grade-skinless-boneless-and-bloodline-removed.html",
            "avg_price": 105.34,
            "price": 124.99,
            "price_ratio": 1.19,
            "rating": 5.0,
            "review_count": 1
        },
        "eval": {
            "eval_types": [
                "url_match"
            ],
            "reference_answers": "",
            "reference_url": "http://172.16.2.4:7770/ahi-tuna-loin-4-lbs-tristan-ult-fresh-1-grade-skinless-boneless-and-bloodline-removed.html",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "http://172.16.2.4:7770/ahi-tuna-loin-4-lbs-tristan-ult-fresh-1-grade-skinless-boneless-and-bloodline-removed.html"
        }
    },
    {
        "task_id": 20202,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/",
        "start_url_lite": "http://172.16.2.4:7770/grocery-gourmet-food/deli-prepared-foods.html",
        "intent_template": "Take me to the page of the product with the most reviews among the highest-rated products priced within 90-120% of the average price for {{category}}.",
        "intent": "Take me to the page of the product with the most reviews among the highest-rated products priced within 90-120% of the average price for Deli & Prepared Foods.",
        "required_obs": "text",
        "type_main": "long-term",
        "type_sub": "calc",
        "description": "First, go to the specified category. Then, check the prices of all products in the category and calculate the average price. Finally, find the product with the highest rating and most reviews within 90-120% of the average price.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/",
            "start_url_lite": "http://172.16.2.4:7770/grocery-gourmet-food/deli-prepared-foods.html",
            "category": "Deli & Prepared Foods",
            "product_name": "Amazon Kitchen, Beef & Broccoli Stir-Fry, No-Prep Kit for 2, 27 oz",
            "product_url": "http://172.16.2.4:7770/amazon-kitchen-beef-broccoli-stir-fry-no-prep-kit-for-2-27-oz.html",
            "avg_price": 42.19,
            "price": 44.81,
            "price_ratio": 1.06,
            "rating": 5.0,
            "review_count": 1
        },
        "eval": {
            "eval_types": [
                "url_match"
            ],
            "reference_answers": "",
            "reference_url": "http://172.16.2.4:7770/amazon-kitchen-beef-broccoli-stir-fry-no-prep-kit-for-2-27-oz.html",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "http://172.16.2.4:7770/amazon-kitchen-beef-broccoli-stir-fry-no-prep-kit-for-2-27-oz.html"
        }
    },
    {
        "task_id": 20203,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/",
        "start_url_lite": "http://172.16.2.4:7770/video-games/legacy-systems/playstation-systems.html",
        "intent_template": "Take me to the page of the product with the most reviews among the highest-rated products priced within 90-120% of the average price for {{category}}.",
        "intent": "Take me to the page of the product with the most reviews among the highest-rated products priced within 90-120% of the average price for PlayStation Systems.",
        "required_obs": "text",
        "type_main": "long-term",
        "type_sub": "calc",
        "description": "First, go to the specified category. Then, check the prices of all products in the category and calculate the average price. Finally, find the product with the highest rating and most reviews within 90-120% of the average price.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/",
            "start_url_lite": "http://172.16.2.4:7770/video-games/legacy-systems/playstation-systems.html",
            "category": "PlayStation Systems",
            "product_name": "KontrolFreek Spaceland Zombies Edition for Playstation 4 (PS4)",
            "product_url": "http://172.16.2.4:7770/kontrolfreek-spaceland-zombies-edition-for-playstation-4-ps4.html",
            "avg_price": 30.52,
            "price": 36.6,
            "price_ratio": 1.2,
            "rating": 4.9,
            "review_count": 9
        },
        "eval": {
            "eval_types": [
                "url_match"
            ],
            "reference_answers": "",
            "reference_url": "http://172.16.2.4:7770/kontrolfreek-spaceland-zombies-edition-for-playstation-4-ps4.html",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "http://172.16.2.4:7770/kontrolfreek-spaceland-zombies-edition-for-playstation-4-ps4.html"
        }
    },
    {
        "task_id": 20204,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/",
        "start_url_lite": "http://172.16.2.4:7770/electronics/computers-accessories/tablet-accessories.html",
        "intent_template": "Take me to the page of the product with the most reviews among the highest-rated products priced within 90-120% of the average price for {{category}}.",
        "intent": "Take me to the page of the product with the most reviews among the highest-rated products priced within 90-120% of the average price for Tablet Accessories.",
        "required_obs": "text",
        "type_main": "long-term",
        "type_sub": "calc",
        "description": "First, go to the specified category. Then, check the prices of all products in the category and calculate the average price. Finally, find the product with the highest rating and most reviews within 90-120% of the average price.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/",
            "start_url_lite": "http://172.16.2.4:7770/electronics/computers-accessories/tablet-accessories.html",
            "category": "Tablet Accessories",
            "product_name": "Screen Protector for Humminbird Helix 7 G3N (Screen Protector by BoxWave) - ClearTouch Anti-Glare (2-Pack), Anti-Fingerprint Matte Film Skin",
            "product_url": "http://172.16.2.4:7770/screen-protector-for-humminbird-helix-7-g3n-screen-protector-by-boxwave-cleartouch-anti-glare-2-pack-anti-fingerprint-matte-film-skin.html",
            "avg_price": 18.21,
            "price": 16.95,
            "price_ratio": 0.93,
            "rating": 5.0,
            "review_count": 4
        },
        "eval": {
            "eval_types": [
                "url_match"
            ],
            "reference_answers": "",
            "reference_url": "http://172.16.2.4:7770/screen-protector-for-humminbird-helix-7-g3n-screen-protector-by-boxwave-cleartouch-anti-glare-2-pack-anti-fingerprint-matte-film-skin.html",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "http://172.16.2.4:7770/screen-protector-for-humminbird-helix-7-g3n-screen-protector-by-boxwave-cleartouch-anti-glare-2-pack-anti-fingerprint-matte-film-skin.html"
        }
    },
    {
        "task_id": 20210,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/",
        "start_url_lite": "http://172.16.2.4:7770/",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "List only the SKUs (without any additional text) of {{product_description}} in the same subcategory {{category}} as the product {{product_name}} from my order number {{order_number}}. The products must have at least 5 reviews, a rating of 3.5 (rating >= 70) or higher, and cost between 50%-90% of the original product's price. Answer with only the full product names, nothing else. Format your answer with one SKU per line, sorted alphabetically.",
        "intent": "List only the SKUs (without any additional text) of The breads manufactured by Gourmet Market in the same subcategory Breads as the product Whole Foods Market, Bread Batard Olive, 19 Ounce from my order number 162. The products must have at least 5 reviews, a rating of 3.5 (rating >= 70) or higher, and cost between 50%-90% of the original product's price. Answer with only the full product names, nothing else. Format your answer with one SKU per line, sorted alphabetically.",
        "required_obs": "text",
        "type_main": "long-term",
        "type_sub": "calc",
        "description": "Retrieve the price of the specified item from the order → Navigate to its category and identify the items that meet the latter condition within that category, making a note of each item and its location → Review all the noted items and search for the one that satisfies the {{product_description}} condition.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/",
            "start_url_lite": "http://172.16.2.4:7770/",
            "order_number": "162",
            "product_name": "Whole Foods Market, Bread Batard Olive, 19 Ounce",
            "category": "Breads",
            "product_description": "The breads manufactured by Gourmet Market",
            "expected_products": "Authentic German Klosterbrot Bread Pack of 2, Jewish Rye Bread Pack of 4",
            "expected_answer": "B088DLW8V1\nB08922FBD7"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "exact_match": "B088DLW8V1\nB08922FBD7"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "B088DLW8V1\nB08922FBD7"
        }
    },
    {
        "task_id": 20211,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/",
        "start_url_lite": "http://172.16.2.4:7770/",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "List only the SKUs (without any additional text) of {{product_description}} in the same subcategory {{category}} as the product {{product_name}} from my order number {{order_number}}. The products must have at least 5 reviews, a rating of 3.5 (rating >= 70) or higher, and cost between 50%-90% of the original product's price. Answer with only the full product names, nothing else. Format your answer with one SKU per line, sorted alphabetically.",
        "intent": "List only the SKUs (without any additional text) of Adapters with exchange periods longer than one year in the same subcategory AC Adapters as the product SupplySource AC-DC Adapter for Sony Playstation VR Virtual Reality Headset PSVR PSVR2 from my order number 160. The products must have at least 5 reviews, a rating of 3.5 (rating >= 70) or higher, and cost between 50%-90% of the original product's price. Answer with only the full product names, nothing else. Format your answer with one SKU per line, sorted alphabetically.",
        "required_obs": "text",
        "type_main": "long-term",
        "type_sub": "calc",
        "description": "Retrieve the price of the specified item from the order → Navigate to its category and identify the items that meet the latter condition within that category, making a note of each item and its location → Review all the noted items and search for the one that satisfies the {{product_description}} condition.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/",
            "start_url_lite": "http://172.16.2.4:7770/",
            "order_number": "160",
            "product_name": "SupplySource AC-DC Adapter for Sony Playstation VR Virtual Reality Headset PSVR PSVR2",
            "category": "AC Adapters",
            "product_description": "Adapters with exchange periods longer than one year",
            "expected_products": "Pwr+ Charger for Booster PAC ES5000 ES2500 J900 ESA218: ES6000 ES1224 ESA217 ESA214 ESA22 Power Supply AC Adapter Extra Long 6.7 Ft Cord UL Listed Replacement with Small Jack\n9V AC Power Cord for Schwinn A10 A20 430 420 460 101 130 150 170 202 220 230 240 270 Bike Exercise Elliptical Recumbent Upright Trainer Power Supply",
            "expected_answer": "B076CTVS7L\nB08Z71243H"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "exact_match": "B076CTVS7L\nB08Z71243H"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "B076CTVS7L\nB08Z71243H"
        }
    },
    {
        "task_id": 20220,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/",
        "start_url_lite": "http://172.16.2.4:7770/",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "List the top 3 categories with the highest spending from my past orders during the period from {{start_date}} to {{end_date}}. For each category, show the name and the percentage of the total amount spent (excluding shipping costs) during this period, rounded to the nearest 5% (e.g., 30%, 35%, 40%). Format each line as 'Category: X%'. Categories should be from (Beauty & Personal Care, Cell Phones & Accessories, Clothing, Shoes & Jewelry, Electronics, Grocery & Gourmet Food, Health & Household, Home & Kitchen, Office Products, Patio, Lawn & Garden, Sports & Outdoors, Tools & Home Improvement, Video Games).",
        "intent": "List the top 3 categories with the highest spending from my past orders during the period from March 2022 to August 2022. For each category, show the name and the percentage of the total amount spent (excluding shipping costs) during this period, rounded to the nearest 5% (e.g., 30%, 35%, 40%). Format each line as 'Category: X%'. Categories should be from (Beauty & Personal Care, Cell Phones & Accessories, Clothing, Shoes & Jewelry, Electronics, Grocery & Gourmet Food, Health & Household, Home & Kitchen, Office Products, Patio, Lawn & Garden, Sports & Outdoors, Tools & Home Improvement, Video Games).",
        "required_obs": "any",
        "type_main": "long-term",
        "type_sub": "calc",
        "description": "Classify the items from my orders within a specified time period into major categories and calculate the total spending for each. Then, compute the percentage share of each category and list the top 3 categories by spending.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/",
            "start_url_lite": "http://172.16.2.4:7770/",
            "period": "First half 2022",
            "start_date": "March 2022",
            "end_date": "August 2022",
            "total_spending": "494.96",
            "top_spending_categories": "Home & Kitchen: 55%\nGrocery & Gourmet Food: 30%\nBeauty & Personal Care: 10%"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "exact_match": "Home & Kitchen: 55%\nGrocery & Gourmet Food: 30%\nBeauty & Personal Care: 10%"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "Home & Kitchen: 55%\nGrocery & Gourmet Food: 30%\nBeauty & Personal Care: 10%"
        }
    },
    {
        "task_id": 20221,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/",
        "start_url_lite": "http://172.16.2.4:7770/",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "List the top 3 categories with the highest spending from my past orders during the period from {{start_date}} to {{end_date}}. For each category, show the name and the percentage of the total amount spent (excluding shipping costs) during this period, rounded to the nearest 5% (e.g., 30%, 35%, 40%). Format each line as 'Category: X%'. Categories should be from (Beauty & Personal Care, Cell Phones & Accessories, Clothing, Shoes & Jewelry, Electronics, Grocery & Gourmet Food, Health & Household, Home & Kitchen, Office Products, Patio, Lawn & Garden, Sports & Outdoors, Tools & Home Improvement, Video Games).",
        "intent": "List the top 3 categories with the highest spending from my past orders during the period from July 2022 to September 2022. For each category, show the name and the percentage of the total amount spent (excluding shipping costs) during this period, rounded to the nearest 5% (e.g., 30%, 35%, 40%). Format each line as 'Category: X%'. Categories should be from (Beauty & Personal Care, Cell Phones & Accessories, Clothing, Shoes & Jewelry, Electronics, Grocery & Gourmet Food, Health & Household, Home & Kitchen, Office Products, Patio, Lawn & Garden, Sports & Outdoors, Tools & Home Improvement, Video Games).",
        "required_obs": "any",
        "type_main": "long-term",
        "type_sub": "calc",
        "description": "Classify the items from my orders within a specified time period into major categories and calculate the total spending for each. Then, compute the percentage share of each category and list the top 3 categories by spending.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/",
            "start_url_lite": "http://172.16.2.4:7770/",
            "period": "Summer 2022",
            "start_date": "July 2022",
            "end_date": "September 2022",
            "total_spending": "3060.52",
            "top_spending_categories": "Office Products: 80%\nHome & Kitchen: 10%\nElectronics: 5%"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "exact_match": "Office Products: 80%\nHome & Kitchen: 10%\nElectronics: 5%"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "Office Products: 80%\nHome & Kitchen: 10%\nElectronics: 5%"
        }
    },
    {
        "task_id": 20222,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/",
        "start_url_lite": "http://172.16.2.4:7770/",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "List the top 3 categories with the highest spending from my past orders during the period from {{start_date}} to {{end_date}}. For each category, show the name and the percentage of the total amount spent (excluding shipping costs) during this period, rounded to the nearest 5% (e.g., 30%, 35%, 40%). Format each line as 'Category: X%'. Categories should be from (Beauty & Personal Care, Cell Phones & Accessories, Clothing, Shoes & Jewelry, Electronics, Grocery & Gourmet Food, Health & Household, Home & Kitchen, Office Products, Patio, Lawn & Garden, Sports & Outdoors, Tools & Home Improvement, Video Games).",
        "intent": "List the top 3 categories with the highest spending from my past orders during the period from March 2022 to June 2022. For each category, show the name and the percentage of the total amount spent (excluding shipping costs) during this period, rounded to the nearest 5% (e.g., 30%, 35%, 40%). Format each line as 'Category: X%'. Categories should be from (Beauty & Personal Care, Cell Phones & Accessories, Clothing, Shoes & Jewelry, Electronics, Grocery & Gourmet Food, Health & Household, Home & Kitchen, Office Products, Patio, Lawn & Garden, Sports & Outdoors, Tools & Home Improvement, Video Games).",
        "required_obs": "any",
        "type_main": "long-term",
        "type_sub": "calc",
        "description": "Classify the items from my orders within a specified time period into major categories and calculate the total spending for each. Then, compute the percentage share of each category and list the top 3 categories by spending.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/",
            "start_url_lite": "http://172.16.2.4:7770/",
            "period": "Spring 2022",
            "start_date": "March 2022",
            "end_date": "June 2022",
            "total_spending": "413.82",
            "top_spending_categories": "Home & Kitchen: 65%\nGrocery & Gourmet Food: 30%\nClothing, Shoes & Jewelry: 5%"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "exact_match": "Home & Kitchen: 65%\nGrocery & Gourmet Food: 30%\nClothing, Shoes & Jewelry: 5%"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "Home & Kitchen: 65%\nGrocery & Gourmet Food: 30%\nClothing, Shoes & Jewelry: 5%"
        }
    },
    {
        "task_id": 20223,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/",
        "start_url_lite": "http://172.16.2.4:7770/",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "List the top 3 categories with the highest spending from my past orders during the period from {{start_date}} to {{end_date}}. For each category, show the name and the percentage of the total amount spent (excluding shipping costs) during this period, rounded to the nearest 5% (e.g., 30%, 35%, 40%). Format each line as 'Category: X%'. Categories should be from (Beauty & Personal Care, Cell Phones & Accessories, Clothing, Shoes & Jewelry, Electronics, Grocery & Gourmet Food, Health & Household, Home & Kitchen, Office Products, Patio, Lawn & Garden, Sports & Outdoors, Tools & Home Improvement, Video Games).",
        "intent": "List the top 3 categories with the highest spending from my past orders during the period from January 2023 to March 2023. For each category, show the name and the percentage of the total amount spent (excluding shipping costs) during this period, rounded to the nearest 5% (e.g., 30%, 35%, 40%). Format each line as 'Category: X%'. Categories should be from (Beauty & Personal Care, Cell Phones & Accessories, Clothing, Shoes & Jewelry, Electronics, Grocery & Gourmet Food, Health & Household, Home & Kitchen, Office Products, Patio, Lawn & Garden, Sports & Outdoors, Tools & Home Improvement, Video Games).",
        "required_obs": "any",
        "type_main": "long-term",
        "type_sub": "calc",
        "description": "Classify the items from my orders within a specified time period into major categories and calculate the total spending for each. Then, compute the percentage share of each category and list the top 3 categories by spending.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/",
            "start_url_lite": "http://172.16.2.4:7770/",
            "period": "Winter 2023",
            "start_date": "January 2023",
            "end_date": "March 2023",
            "total_spending": "1508.69",
            "top_spending_categories": "Home & Kitchen: 60%\nElectronics: 25%\nBeauty & Personal Care: 5%"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "exact_match": "Home & Kitchen: 60%\nElectronics: 25%\nBeauty & Personal Care: 5%"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "Home & Kitchen: 60%\nElectronics: 25%\nBeauty & Personal Care: 5%"
        }
    },
    {
        "task_id": 20224,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/",
        "start_url_lite": "http://172.16.2.4:7770/",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "List the top 3 categories with the highest spending from my past orders during the period from {{start_date}} to {{end_date}}. For each category, show the name and the percentage of the total amount spent (excluding shipping costs) during this period, rounded to the nearest 5% (e.g., 30%, 35%, 40%). Format each line as 'Category: X%'. Categories should be from (Beauty & Personal Care, Cell Phones & Accessories, Clothing, Shoes & Jewelry, Electronics, Grocery & Gourmet Food, Health & Household, Home & Kitchen, Office Products, Patio, Lawn & Garden, Sports & Outdoors, Tools & Home Improvement, Video Games).",
        "intent": "List the top 3 categories with the highest spending from my past orders during the period from October 2022 to December 2022. For each category, show the name and the percentage of the total amount spent (excluding shipping costs) during this period, rounded to the nearest 5% (e.g., 30%, 35%, 40%). Format each line as 'Category: X%'. Categories should be from (Beauty & Personal Care, Cell Phones & Accessories, Clothing, Shoes & Jewelry, Electronics, Grocery & Gourmet Food, Health & Household, Home & Kitchen, Office Products, Patio, Lawn & Garden, Sports & Outdoors, Tools & Home Improvement, Video Games).",
        "required_obs": "any",
        "type_main": "long-term",
        "type_sub": "calc",
        "description": "Classify the items from my orders within a specified time period into major categories and calculate the total spending for each. Then, compute the percentage share of each category and list the top 3 categories by spending.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/",
            "start_url_lite": "http://172.16.2.4:7770/",
            "period": "Fall 2022",
            "start_date": "October 2022",
            "end_date": "December 2022",
            "total_spending": "1662.49",
            "top_spending_categories": "Home & Kitchen: 25%\nBeauty & Personal Care: 20%\nElectronics: 20%"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "exact_match": "Home & Kitchen: 25%\nBeauty & Personal Care: 20%\nElectronics: 20%"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "Home & Kitchen: 25%\nBeauty & Personal Care: 20%\nElectronics: 20%"
        }
    },
    {
        "task_id": 20230,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/",
        "start_url_lite": "http://172.16.2.4:7770/",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "List only the SKUs (without any additional text) of products in the {{category}} category that are priced within 60-80% of the original product {{product}} from my Order {{order_number}} and have a higher rating than the original product. Format your answer with one SKU per line, sorted alphabetically.",
        "intent": "List only the SKUs (without any additional text) of products in the Home Storage Hooks category that are priced within 60-80% of the original product NOZE Rustic Coat Rack Wall Mounted Shelf with 4 Hooks, Hanging Entryway Organizer for Mug Coffee Cup, Holding Solid Wooden Shelf with 2 Baskets for Kitchen Living Room, Bathroom and Bedroom from my Order 170 and have a higher rating than the original product. Format your answer with one SKU per line, sorted alphabetically.",
        "required_obs": "text",
        "type_main": "long-term",
        "type_sub": "calc",
        "description": "Retrieve the price of the corresponding product from the Order. Next, navigate to the Category, and save the SKUs of all products within the category that meet the price constraint.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/",
            "start_url_lite": "http://172.16.2.4:7770/",
            "order_number": "170",
            "product": "NOZE Rustic Coat Rack Wall Mounted Shelf with 4 Hooks, Hanging Entryway Organizer for Mug Coffee Cup, Holding Solid Wooden Shelf with 2 Baskets for Kitchen Living Room, Bathroom and Bedroom",
            "category": "Home Storage Hooks",
            "original_price": "40.99",
            "original_rating": "3.9",
            "cheaper_better_products": "LIANTRAL Wall Mounted Coat Rack, 15-Inch, Set of 2 Wood Rustic Coat Hooks with 4 Hooks for Entryway, Mudroom, Bathroom, Kitchen(Brown), Coat Rack Wall Mounted, 5 Tri Hooks, 2 Pack Wall Hook Rack, Wooden Base, Metal Coat Hook Rail for Coat Hat Key, Rustic Coat Rack for Entryway Bathroom Bedroom Kitchen, Coat Hooks for Wall, Walnut Wood Wall Hooks with 5 Swivel Foldable Arms, 12'' Length Wall Coat Rack Hat Hooks for Bathroom Entryway Bedroom Office Kitchen, Heavy Duty, MaxGear Coat Rack Wall Mount, 2 Pack Coat Hanger Hat Rack for Wall, Coat Hooks Wall Hook Rack Heavy Duty Hook Rail Wooden Wall Rack with Hooks Peg Rack Purse Rack Backpack Hanger (Bamboo), Halcent Entryway Mail Organizer Coat Rack Key Holder Hooks, Newspaper Magazine Holder with Memo Board Wall Decor, EXCELLO GLOBAL PRODUCTS Rustic Barn Wood Wall Mounted 38 by 10 in Hanging Entryway Coat Rack with 7 Hooks, BirdRock Home Dual Hook Coat and Hat Rack - 4 Dual Hooks - 17 Inches - Wall Mount - Decorative Home Storage - Entryway Foyer Hallway Bathroom Bedroom Rail - Satin Nickel Hooks - White Pine Bathroom Ra, Cast Iron Deer Head Double Hook Wall Key Rack Holder Hooks Coat Hook Home Decor, EMERIT Mail Holder Organzier Wall Mount Key Holder Rack Hanging Entryway Decorative Shelf (White 1)",
            "cheaper_better_skus": "B004WSVYP6\nB01K5YOTOI\nB07FD1YMZH\nB07HQB5L3Z\nB07ZKGFT2V\nB088QYFHCP\nB08H4XQLV5\nB08YRG3L3T\nB09NVS2G2J"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "exact_match": "B004WSVYP6\nB01K5YOTOI\nB07FD1YMZH\nB07HQB5L3Z\nB07ZKGFT2V\nB088QYFHCP\nB08H4XQLV5\nB08YRG3L3T\nB09NVS2G2J"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "B004WSVYP6\nB01K5YOTOI\nB07FD1YMZH\nB07HQB5L3Z\nB07ZKGFT2V\nB088QYFHCP\nB08H4XQLV5\nB08YRG3L3T\nB09NVS2G2J"
        }
    },
    {
        "task_id": 20231,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/",
        "start_url_lite": "http://172.16.2.4:7770/",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "List only the SKUs (without any additional text) of products in the {{category}} category that are priced within 60-80% of the original product {{product}} from my Order {{order_number}} and have a higher rating than the original product. Format your answer with one SKU per line, sorted alphabetically.",
        "intent": "List only the SKUs (without any additional text) of products in the PlayStation Systems category that are priced within 60-80% of the original product Designer Skin for Sony PlayStation PS3 SLIM System & Remote Controllers -Big Ballin from my Order 180 and have a higher rating than the original product. Format your answer with one SKU per line, sorted alphabetically.",
        "required_obs": "text",
        "type_main": "long-term",
        "type_sub": "calc",
        "description": "Retrieve the price of the corresponding product from the Order. Next, navigate to the Category, and save the SKUs of all products within the category that meet the price constraint.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/",
            "start_url_lite": "http://172.16.2.4:7770/",
            "order_number": "180",
            "product": "Designer Skin for Sony PlayStation PS3 SLIM System & Remote Controllers -Big Ballin",
            "category": "PlayStation Systems",
            "original_price": "1.63",
            "original_rating": "4.2",
            "cheaper_better_products": "Vivi Audio For Slim PS3 Playstation 3 Power Eject Ribbon Cable CECH-3001A 3001B, Sony Playstaton Portable System - Ceramic White PSP (Japan)",
            "cheaper_better_skus": "B000U696NG\nB014HDAUAA"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "exact_match": "B000U696NG\nB014HDAUAA"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "B000U696NG\nB014HDAUAA"
        }
    },
    {
        "task_id": 20232,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/",
        "start_url_lite": "http://172.16.2.4:7770/",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "List only the SKUs (without any additional text) of products in the {{category}} category that are priced within 60-80% of the original product {{product}} from my Order {{order_number}} and have a higher rating than the original product. Format your answer with one SKU per line, sorted alphabetically.",
        "intent": "List only the SKUs (without any additional text) of products in the Sets & Kits category that are priced within 60-80% of the original product Your Selfie-Ready Skincare 5-Piece Hydrating, Radiance-Boosting Super Set - Your Skin But Better Primer, Je Ne Sais Quoi Lip Treatment, Bye Bye Under Eye, Secret Sauce, and Miracle Water from my Order 164 and have a higher rating than the original product. Format your answer with one SKU per line, sorted alphabetically.",
        "required_obs": "text",
        "type_main": "long-term",
        "type_sub": "calc",
        "description": "Retrieve the price of the corresponding product from the Order. Next, navigate to the Category, and save the SKUs of all products within the category that meet the price constraint.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/",
            "start_url_lite": "http://172.16.2.4:7770/",
            "order_number": "164",
            "product": "Your Selfie-Ready Skincare 5-Piece Hydrating, Radiance-Boosting Super Set - Your Skin But Better Primer, Je Ne Sais Quoi Lip Treatment, Bye Bye Under Eye, Secret Sauce, and Miracle Water",
            "category": "Sets & Kits",
            "original_price": "99.99",
            "original_rating": "5.0",
            "cheaper_better_products": "Clinique 3-Step Skin Care System For Skin Type 2 Dry to Dry Combination Skin",
            "cheaper_better_skus": "B00FBBJOW8"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "exact_match": "B00FBBJOW8"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "B00FBBJOW8"
        }
    },
    {
        "task_id": 20233,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/",
        "start_url_lite": "http://172.16.2.4:7770/",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "List only the SKUs (without any additional text) of products in the {{category}} category that are priced within 60-80% of the original product {{product}} from my Order {{order_number}} and have a higher rating than the original product. Format your answer with one SKU per line, sorted alphabetically.",
        "intent": "List only the SKUs (without any additional text) of products in the Condiments & Salad Dressings category that are priced within 60-80% of the original product Pure Horseradish - 8oz Jar - Homestyle from my Order 165 and have a higher rating than the original product. Format your answer with one SKU per line, sorted alphabetically.",
        "required_obs": "text",
        "type_main": "long-term",
        "type_sub": "calc",
        "description": "Retrieve the price of the corresponding product from the Order. Next, navigate to the Category, and save the SKUs of all products within the category that meet the price constraint.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/",
            "start_url_lite": "http://172.16.2.4:7770/",
            "order_number": "165",
            "product": "Pure Horseradish - 8oz Jar - Homestyle",
            "category": "Condiments & Salad Dressings",
            "original_price": "8.00",
            "original_rating": "3.9",
            "cheaper_better_products": "Kraft Classic Catalina Fat Free Salad Dressing (16 fl oz Bottle), Grey Poupon Dijon Mustard (6 ct Casepack, 48 oz Jars)",
            "cheaper_better_skus": "B00I8GJLOO\nB00OKFXVAC"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "exact_match": "B00I8GJLOO\nB00OKFXVAC"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "B00I8GJLOO\nB00OKFXVAC"
        }
    },
    {
        "task_id": 20240,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/",
        "start_url_lite": "http://172.16.2.4:7770/",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "What is the total price of the cheapest {{item1_description}} combined with the cheapest {{item2_description}}{{condition}}? Please only output the number.",
        "intent": "What is the total price of the cheapest item among Nintendo Switch controller combined with the cheapest Switch kaset, each with 10 or more reviews? Please only output the number.",
        "required_obs": "text",
        "type_main": "long-term",
        "description": "Extract all items from the lists of {product1} and {product2} that meet the {condition} → Among them, find the cheapest item from each list and add their prices together.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/",
            "start_url_lite": "http://172.16.2.4:7770/",
            "item1_description": "item among Nintendo Switch controller",
            "item2_description": "Switch kaset",
            "condition": ", each with 10 or more reviews",
            "item1_price": 3.66,
            "item2_price": 16.74,
            "expected_answer": "20.40 |OR| 20.4",
            "item1_product": "",
            "item2_product": ""
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "exact_match": "20.40 |OR| 20.4"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "20.40 |OR| 20.4",
            "price_calculation": "3.66 + 16.74"
        }
    },
    {
        "task_id": 20241,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/",
        "start_url_lite": "http://172.16.2.4:7770/",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "What is the total price of the cheapest {{item1_description}} combined with the cheapest {{item2_description}}{{condition}}? Please only output the number.",
        "intent": "What is the total price of the cheapest item among Nintendo Switch controller combined with the cheapest Switch kaset, each with a review rating of 3 (rating=60) or higher? Please only output the number.",
        "required_obs": "text",
        "type_main": "long-term",
        "description": "Extract all items from the lists of {product1} and {product2} that meet the {condition} → Among them, find the cheapest item from each list and add their prices together.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/",
            "start_url_lite": "http://172.16.2.4:7770/",
            "item1_description": "item among Nintendo Switch controller",
            "item2_description": "Switch kaset",
            "condition": ", each with a review rating of 3 (rating=60) or higher",
            "item1_price": 86.0,
            "item2_price": 16.74,
            "expected_answer": "102.74",
            "item1_product": "",
            "item2_product": ""
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "exact_match": "102.74"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "102.74",
            "price_calculation": "86.0 + 16.74"
        }
    },
    {
        "task_id": 20242,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/",
        "start_url_lite": "http://172.16.2.4:7770/",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "What is the total price of the cheapest {{item1_description}} combined with the cheapest {{item2_description}}{{condition}}? Please only output the number.",
        "intent": "What is the total price of the cheapest plant seeds combined with the cheapest product from the \"Planter, Pots\" category, each with 10 or more reviews? Please only output the number.",
        "required_obs": "text",
        "type_main": "long-term",
        "description": "Extract all items from the lists of {product1} and {product2} that meet the {condition} → Among them, find the cheapest item from each list and add their prices together.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/",
            "start_url_lite": "http://172.16.2.4:7770/",
            "item1_description": "plant seeds",
            "item2_description": "product from the \"Planter, Pots\" category",
            "condition": ", each with 10 or more reviews",
            "item1_price": 6.49,
            "item2_price": 13.99,
            "expected_answer": "20.48",
            "item1_product": "",
            "item2_product": ""
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "exact_match": "20.48"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "20.48",
            "price_calculation": "6.49 + 13.99"
        }
    },
    {
        "task_id": 20243,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/",
        "start_url_lite": "http://172.16.2.4:7770/",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "What is the total price of the cheapest {{item1_description}} combined with the cheapest {{item2_description}}{{condition}}? Please only output the number.",
        "intent": "What is the total price of the cheapest plant seeds combined with the cheapest product from the \"Planter, Pots\" category, each with a review rating of 4 (rating=80) or higher? Please only output the number.",
        "required_obs": "text",
        "type_main": "long-term",
        "description": "Extract all items from the lists of {product1} and {product2} that meet the {condition} → Among them, find the cheapest item from each list and add their prices together.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/",
            "start_url_lite": "http://172.16.2.4:7770/",
            "item1_description": "plant seeds",
            "item2_description": "product from the \"Planter, Pots\" category",
            "condition": ", each with a review rating of 4 (rating=80) or higher",
            "item1_price": 14.95,
            "item2_price": 13.99,
            "expected_answer": "28.94",
            "item1_product": "",
            "item2_product": ""
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "exact_match": "28.94"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "28.94",
            "price_calculation": "14.95 + 13.99"
        }
    },
    {
        "task_id": 20244,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/",
        "start_url_lite": "http://172.16.2.4:7770/",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "What is the total price of the cheapest {{item1_description}} combined with the cheapest {{item2_description}}{{condition}}? Please only output the number.",
        "intent": "What is the total price of the cheapest spa bed or table product in the Spa Beds & Tables category that costs at least costs 50$ combined with the cheapest spa trolley or cart product in the Spa Storage Systems category that costs at least costs 20$? Please only output the number.",
        "required_obs": "text",
        "type_main": "long-term",
        "description": "Extract all items from the lists of {product1} and {product2} that meet the {condition} → Among them, find the cheapest item from each list and add their prices together.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/",
            "start_url_lite": "http://172.16.2.4:7770/",
            "item1_description": "spa bed or table product in the Spa Beds & Tables category that costs at least costs 50$",
            "item2_description": "spa trolley or cart product in the Spa Storage Systems category that costs at least costs 20$",
            "condition": "",
            "item1_price": 64.99,
            "item2_price": 28.07,
            "expected_answer": "93.06",
            "item1_product": "Sentiment 73 Inches Long 28 Inches Wide Folding Portable Massage Table with Carrying case, Black",
            "item2_product": "50kg Load Rolling Beauty Salon Trolley Bracket, Beauty Salon Storage Equipment, SPA Tool Trolley, ABS Salon Trolley, Beauty Equipment Machine,Mobile Mental Stand Trolley Rolling Cart Shelf"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "exact_match": "93.06"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "93.06",
            "price_calculation": "64.99 + 28.07"
        }
    },
    {
        "task_id": 20245,
        "sites": [
            "shopping"
        ],
        "start_url": "http://172.16.2.4:7770/",
        "start_url_lite": "http://172.16.2.4:7770/",
        "storage_state": "./.auth/shopping_state.json",
        "intent_template": "What is the total price of the cheapest {{item1_description}} combined with the cheapest {{item2_description}}{{condition}}? Please only output the number.",
        "intent": "What is the total price of the cheapest spa bed or table product in the Spa Beds & Tables category combined with the cheapest spa trolley or cart product in the Spa Storage Systems category, each with 5 or more reviews and rating greater than 3.5 (rating>=70)? Please only output the number.",
        "required_obs": "text",
        "type_main": "long-term",
        "description": "Extract all items from the lists of {product1} and {product2} that meet the {condition} → Among them, find the cheapest item from each list and add their prices together.",
        "instantiation_dict": {
            "start_url": "http://172.16.2.4:7770/",
            "start_url_lite": "http://172.16.2.4:7770/",
            "item1_description": "spa bed or table product in the Spa Beds & Tables category",
            "item2_description": "spa trolley or cart product in the Spa Storage Systems category",
            "condition": ", each with 5 or more reviews and rating greater than 3.5 (rating>=70)",
            "item1_price": 78.99,
            "item2_price": 89.88,
            "expected_answer": "168.87",
            "item1_product": "Yaheetech Portable Massage Table 84inch Massage Bed Aluminium Height Adjustable Facial Salon Tattoo Bed Black",
            "item2_product": "Beauty Salon Black Hair Color Tray Portable Utility Cart"
        },
        "eval": {
            "eval_types": [
                "string_match"
            ],
            "reference_answers": {
                "exact_match": "168.87"
            },
            "reference_url": "",
            "program_html": [],
            "string_note": "",
            "reference_answer_raw_annotation": "168.87",
            "price_calculation": "78.99 + 89.88"
        }
    }
]