{
    "model_name": "",
    "solutions": [
        {
            "problem_id": 1,
            "answer": "LEFT: sit_on toilet, RIGHT: NOT sit_on toilet",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 2,
            "answer": "LEFT: clean toilet, RIGHT: NOT clean toilet",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 3,
            "answer": "LEFT: hold carrot, RIGHT: NOT hold carrot",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 4,
            "answer": "LEFT: hold wine_glass, RIGHT: NOT hold wine_glass",
            "explanation": "",
            "evaluations": []
        },
        {
            "problem_id": 5,
            "answer": "LEFT: sip wine_glass, RIGHT: NOT sip wine_glass",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 6,
            "answer": "LEFT: drive car, RIGHT: NOT drive car",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 7,
            "answer": "LEFT: wash car, RIGHT: NOT wash car",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 8,
            "answer": "LEFT: pet cat, RIGHT: NOT pet cat",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 9,
            "answer": "LEFT: hug cat, RIGHT: NOT hug cat",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 10,
            "answer": "LEFT: drive train, RIGHT: NOT drive train",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 11,
            "answer": "LEFT: ride boat, RIGHT: NOT ride boat",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 12,
            "answer": "LEFT: sail boat, RIGHT: NOT sail boat",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 13,
            "answer": "LEFT: drive boat, RIGHT: NOT drive boat",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 14,
            "answer": "LEFT: sit_on airplane, RIGHT: NOT sit_on airplane",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 15,
            "answer": "LEFT: control tv, RIGHT: NOT control tv",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 16,
            "answer": "LEFT: watch tv, RIGHT: NOT watch tv",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 17,
            "answer": "LEFT: hold person, RIGHT: NOT hold person",
            "explanation": "",
            "evaluations": [
                {
                    "value": "OK\n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 18,
            "answer": "LEFT: kiss person, RIGHT: NOT kiss person",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 19,
            "answer": "LEFT: hug person, RIGHT: NOT hug person",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG\n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 20,
            "answer": "LEFT: cut_with knife, RIGHT: NOT cut_with knife",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG\n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 21,
            "answer": "LEFT: clean refrigerator, RIGHT: NOT clean refrigerator",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 22,
            "answer": "LEFT: open refrigerator, RIGHT: NOT open refrigerator",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 23,
            "answer": "LEFT: fly kite, RIGHT: NOT fly kite",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 24,
            "answer": "LEFT: hold kite, RIGHT: NOT hold kite",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 25,
            "answer": "LEFT: type_on laptop, RIGHT: NOT type_on laptop",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 26,
            "answer": "LEFT: hold laptop, RIGHT: NOT hold laptop",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 27,
            "answer": "LEFT: repair laptop, RIGHT: NOT repair laptop",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 28,
            "answer": "LEFT: walk cow, RIGHT: NOT walk cow",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 29,
            "answer": "LEFT: cut cake, RIGHT: NOT cut cake",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 30,
            "answer": "LEFT: hold cake, RIGHT: NOT hold cake",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 31,
            "answer": "LEFT: blow cake, RIGHT: NOT blow cake",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 32,
            "answer": "LEFT: hold donut, RIGHT: NOT hold donut",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 33,
            "answer": "LEFT: hold tennis_racket, RIGHT: NOT hold tennis_racket",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 34,
            "answer": "LEFT: catch frisbee, RIGHT: NOT catch frisbee",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 35,
            "answer": "LEFT: hold frisbee, RIGHT: NOT hold frisbee",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG\n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 36,
            "answer": "LEFT: hold toothbrush, RIGHT: NOT hold toothbrush",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 37,
            "answer": "LEFT: hold remote, RIGHT: NOT hold remote",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 38,
            "answer": "LEFT: hold fork, RIGHT: NOT hold fork",
            "explanation": "",
            "evaluations": []
        },
        {
            "problem_id": 39,
            "answer": "LEFT: hold mouse, RIGHT: NOT hold mouse",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 40,
            "answer": "LEFT: sit_on couch, RIGHT: NOT sit_on couch",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 41,
            "answer": "LEFT: lie_on couch, RIGHT: NOT lie_on couch",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 42,
            "answer": "LEFT: feed sheep, RIGHT: NOT feed sheep",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG\n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 43,
            "answer": "LEFT: pet sheep, RIGHT: NOT pet sheep",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 44,
            "answer": "LEFT: hold sheep, RIGHT: NOT hold sheep",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 45,
            "answer": "LEFT: carry handbag, RIGHT: NOT carry handbag",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 46,
            "answer": "LEFT: hold handbag, RIGHT: NOT hold handbag",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 47,
            "answer": "LEFT: ride surfboard, RIGHT: NOT ride surfboard",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 48,
            "answer": "LEFT: lie_on surfboard, RIGHT: NOT lie_on surfboard",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 49,
            "answer": "LEFT: carry surfboard, RIGHT: NOT carry surfboard",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 50,
            "answer": "LEFT: inspect surfboard, RIGHT: NOT inspect surfboard",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 51,
            "answer": "LEFT: jump surfboard, RIGHT: NOT jump surfboard",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 52,
            "answer": "LEFT: hold scissors, RIGHT: NOT hold scissors",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 53,
            "answer": "LEFT: cut_with scissors, RIGHT: NOT cut_with scissors",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 54,
            "answer": "LEFT: eat apple, RIGHT: NOT eat apple",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 55,
            "answer": "LEFT: hold apple, RIGHT: NOT hold apple",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 56,
            "answer": "LEFT: wash apple, RIGHT: NOT wash apple",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 57,
            "answer": "LEFT: sit_on bench, RIGHT: NOT sit_on bench",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 58,
            "answer": "LEFT: lie_on bench, RIGHT: NOT lie_on bench",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 59,
            "answer": "LEFT: sit_on chair, RIGHT: NOT sit_on chair",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 60,
            "answer": "LEFT: toast wine_glass, RIGHT: NOT toast wine_glass",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 61,
            "answer": "LEFT: board train, RIGHT: NOT board train",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 62,
            "answer": "LEFT: stand_on boat, RIGHT: NOT stand_on boat",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 63,
            "answer": "LEFT: row boat, RIGHT: NOT row boat",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 64,
            "answer": "LEFT: direct airplane, RIGHT: NOT direct airplane",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 65,
            "answer": "LEFT: greet person, RIGHT: NOT greet person",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 66,
            "answer": "LEFT: lick knife, RIGHT: NOT lick knife",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 67,
            "answer": "LEFT: wield knife, RIGHT: NOT wield knife",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 68,
            "answer": "LEFT: stick knife, RIGHT: NOT stick knife",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 69,
            "answer": "LEFT: read laptop, RIGHT: NOT read laptop",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 70,
            "answer": "LEFT: milk cow, RIGHT: NOT milk cow",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 71,
            "answer": "LEFT: grind snowboard, RIGHT: NOT grind snowboard",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 72,
            "answer": "LEFT: release bird, RIGHT: NOT release bird",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 73,
            "answer": "LEFT: grind skateboard, RIGHT: NOT grind skateboard",
            "explanation": "",
            "evaluations": []
        },
        {
            "problem_id": 74,
            "answer": "LEFT: hold_and_about_to_eat banana, RIGHT: NOT hold_and_about_to_eat banana",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 75,
            "answer": "LEFT: train dog, RIGHT: NOT train dog",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 76,
            "answer": "LEFT: sit_inside bus, RIGHT: NOT sit_inside bus",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 77,
            "answer": "LEFT: block sports_ball, RIGHT: NOT block sports_ball",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 78,
            "answer": "LEFT: serve sports_ball, RIGHT: NOT serve sports_ball",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 79,
            "answer": "LEFT: turn motorcycle, RIGHT: NOT turn motorcycle",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 80,
            "answer": "LEFT: push motorcycle, RIGHT: NOT push motorcycle",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 81,
            "answer": "LEFT: swing tennis_racket, RIGHT: NOT swing tennis_racket",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 82,
            "answer": "LEFT: throw frisbee, RIGHT: NOT throw frisbee",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 83,
            "answer": "LEFT: brush_with toothbrush, RIGHT: NOT brush_with toothbrush",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 84,
            "answer": "LEFT: point_at_camera remote, RIGHT: NOT point_at_camera remote",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 85,
            "answer": "LEFT: use mouse, RIGHT: NOT use mouse",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 86,
            "answer": "LEFT: shear sheep, RIGHT: NOT shear sheep",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 87,
            "answer": "LEFT: wear tie, RIGHT: NOT wear tie",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG\n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 88,
            "answer": "LEFT: adjust_or_tie tie, RIGHT: NOT adjust_or_tie tie",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 89,
            "answer": "LEFT: help_adjust_or_tie tie, RIGHT: NOT help_adjust_or_tie tie",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 90,
            "answer": "LEFT: smell apple, RIGHT: NOT smell apple",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 91,
            "answer": "LEFT: hold_and_about_to_eat apple, RIGHT: NOT hold_and_about_to_eat apple",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 92,
            "answer": "LEFT: pick apple, RIGHT: NOT pick apple",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 93,
            "answer": "LEFT: peel_or_cut apple, RIGHT: NOT peel_or_cut apple",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 94,
            "answer": "LEFT: sit_on_with_multiple_person bench, RIGHT: NOT sit_on_with_multiple_person bench",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 95,
            "answer": "LEFT: stand_on chair, RIGHT: NOT stand_on chair",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 96,
            "answer": "LEFT: lie_on_like chair, RIGHT: NOT lie_on_like chair",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 97,
            "answer": "LEFT: eat orange, RIGHT: NOT eat orange",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 98,
            "answer": "LEFT: squeeze orange, RIGHT: NOT squeeze orange",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 99,
            "answer": "LEFT: peel orange, RIGHT: NOT peel orange",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        },
        {
            "problem_id": 100,
            "answer": "LEFT: hold orange, RIGHT: NOT hold orange",
            "explanation": "",
            "evaluations": [
                {
                    "value": "WRONG \n",
                    "author": "gemini-1.5-pro"
                }
            ]
        }
    ]
}