[{"id": 1, "image": "COCO_val2014_000000144305.jpg", "conversations": [{"from": "human", "value": "Generate a short caption of the image."}, {"from": "gpt", "value": "Focusing on the visible elements in this image: keyboard, laptop, dining table, generate a short caption for this image."}]}, {"id": 7, "image": "COCO_val2014_000000164091.jpg", "conversations": [{"from": "human", "value": "Generate a short caption of the image."}, {"from": "gpt", "value": "Focusing on the visible elements in this image: person, bed, teddy bear, generate a short caption for this image."}]}, {"id": 13, "image": "COCO_val2014_000000308003.jpg", "conversations": [{"from": "human", "value": "Generate a short caption of the image."}, {"from": "gpt", "value": "Focusing on the visible elements in this image: chair, bench, dining table, generate a short caption for this image."}]}, {"id": 19, "image": "COCO_val2014_000000054870.jpg", "conversations": [{"from": "human", "value": "Generate a short caption of the image."}, {"from": "gpt", "value": "Focusing on the visible elements in this image: knife, sandwich, fork, generate a short caption for this image."}]}, {"id": 25, "image": "COCO_val2014_000000095908.jpg", "conversations": [{"from": "human", "value": "Generate a short caption of the image."}, {"from": "gpt", "value": "Focusing on the visible elements in this image: mouse, laptop, dining table, keyboard, cup, generate a short caption for this image."}]}, {"id": 31, "image": "COCO_val2014_000000329614.jpg", "conversations": [{"from": "human", "value": "Generate a short caption of the image."}, {"from": "gpt", "value": "Focusing on the visible elements in this image: person, car, bus, generate a short caption for this image."}]}, {"id": 37, "image": "COCO_val2014_000000324008.jpg", "conversations": [{"from": "human", "value": "Generate a short caption of the image."}, {"from": "gpt", "value": "Focusing on the visible elements in this image: banana, person, train, dining table, airplane, orange, cup, generate a short caption for this image."}]}, {"id": 43, "image": "COCO_val2014_000000052700.jpg", "conversations": [{"from": "human", "value": "Generate a short caption of the image."}, {"from": "gpt", "value": "Focusing on the visible elements in this image: person, spoon, handbag, fork, dining table, cake, generate a short caption for this image."}]}, {"id": 49, "image": "COCO_val2014_000000501311.jpg", "conversations": [{"from": "human", "value": "Generate a short caption of the image."}, {"from": "gpt", "value": "Focusing on the visible elements in this image: baseball glove, person, sports ball, generate a short caption for this image."}]}, {"id": 55, "image": "COCO_val2014_000000247179.jpg", "conversations": [{"from": "human", "value": "Generate a short caption of the image."}, {"from": "gpt", "value": "Focusing on the visible elements in this image: truck, person, elephant, clock, generate a short caption for this image."}]}, {"id": 61, "image": "COCO_val2014_000000577912.jpg", "conversations": [{"from": "human", "value": "Generate a short caption of the image."}, {"from": "gpt", "value": "Focusing on the visible elements in this image: sink, toothbrush, toilet, bottle, generate a short caption for this image."}]}, {"id": 67, "image": "COCO_val2014_000000196341.jpg", "conversations": [{"from": "human", "value": "Generate a short caption of the image."}, {"from": "gpt", "value": "Focusing on the visible elements in this image: clock, handbag, person, generate a short caption for this image."}]}, {"id": 73, "image": "COCO_val2014_000000234433.jpg", "conversations": [{"from": "human", "value": "Generate a short caption of the image."}, {"from": "gpt", "value": "Focusing on the visible elements in this image: traffic light, person, bicycle, car, generate a short caption for this image."}]}, {"id": 79, "image": "COCO_val2014_000000455610.jpg", "conversations": [{"from": "human", "value": "Generate a short caption of the image."}, {"from": "gpt", "value": "Focusing on the visible elements in this image: traffic light, person, motorcycle, car, generate a short caption for this image."}]}, {"id": 85, "image": "COCO_val2014_000000039053.jpg", "conversations": [{"from": "human", "value": "Generate a short caption of the image."}, {"from": "gpt", "value": "Focusing on the visible elements in this image: spoon, bowl, dining table, apple, cake, cell phone, cup, generate a short caption for this image."}]}, {"id": 91, "image": "COCO_val2014_000000493862.jpg", "conversations": [{"from": "human", "value": "Generate a short caption of the image."}, {"from": "gpt", "value": "Focusing on the visible elements in this image: dog, person, tennis racket, generate a short caption for this image."}]}, {"id": 97, "image": "COCO_val2014_000000269873.jpg", "conversations": [{"from": "human", "value": "Generate a short caption of the image."}, {"from": "gpt", "value": "Focusing on the visible elements in this image: traffic light, person, car, bus, generate a short caption for this image."}]}, {"id": 103, "image": "COCO_val2014_000000524656.jpg", "conversations": [{"from": "human", "value": "Generate a short caption of the image."}, {"from": "gpt", "value": "Focusing on the visible elements in this image: bowl, spoon, knife, sink, refrigerator, toaster, wine glass, bottle, microwave, oven, cup, generate a short caption for this image."}]}, {"id": 109, "image": "COCO_val2014_000000369547.jpg", "conversations": [{"from": "human", "value": "Generate a short caption of the image."}, {"from": "gpt", "value": "Focusing on the visible elements in this image: dog, cat, bird, generate a short caption for this image."}]}, {"id": 115, "image": "COCO_val2014_000000181446.jpg", "conversations": [{"from": "human", "value": "Generate a short caption of the image."}, {"from": "gpt", "value": "Focusing on the visible elements in this image: bowl, person, bottle, sink, refrigerator, generate a short caption for this image."}]}, {"id": 121, "image": "COCO_val2014_000000128119.jpg", "conversations": [{"from": "human", "value": "Generate a short caption of the image."}, {"from": "gpt", "value": "Focusing on the visible elements in this image: backpack, person, handbag, bicycle, cell phone, bus, generate a short caption for this image."}]}, {"id": 127, "image": "COCO_val2014_000000401027.jpg", "conversations": [{"from": "human", "value": "Generate a short caption of the image."}, {"from": "gpt", "value": "Focusing on the visible elements in this image: sandwich, fork, dining table, knife, cup, generate a short caption for this image."}]}, {"id": 133, "image": "COCO_val2014_000000253630.jpg", "conversations": [{"from": "human", "value": "Generate a short caption of the image."}, {"from": "gpt", "value": "Focusing on the visible elements in this image: tv, chair, potted plant, dining table, generate a short caption for this image."}]}, {"id": 139, "image": "COCO_val2014_000000326021.jpg", "conversations": [{"from": "human", "value": "Generate a short caption of the image."}, {"from": "gpt", "value": "Focusing on the visible elements in this image: person, teddy bear, dining table, generate a short caption for this image."}]}, {"id": 145, "image": "COCO_val2014_000000317320.jpg", "conversations": [{"from": "human", "value": "Generate a short caption of the image."}, {"from": "gpt", "value": "Focusing on the visible elements in this image: motorcycle, person, bicycle, generate a short caption for this image."}]}, {"id": 151, "image": "COCO_val2014_000000507171.jpg", "conversations": [{"from": "human", "value": "Generate a short caption of the image."}, {"from": "gpt", "value": "Focusing on the visible elements in this image: person, surfboard, cat, generate a short caption for this image."}]}, {"id": 157, "image": "COCO_val2014_000000161227.jpg", "conversations": [{"from": "human", "value": "Generate a short caption of the image."}, {"from": "gpt", "value": "Focusing on the visible elements in this image: couch, laptop, remote, chair, keyboard, cell phone, generate a short caption for this image."}]}, {"id": 163, "image": "COCO_val2014_000000025812.jpg", "conversations": [{"from": "human", "value": "Generate a short caption of the image."}, {"from": "gpt", "value": "Focusing on the visible elements in this image: snowboard, person, skis, generate a short caption for this image."}]}, {"id": 169, "image": "COCO_val2014_000000267537.jpg", "conversations": [{"from": "human", "value": "Generate a short caption of the image."}, {"from": "gpt", "value": "Focusing on the visible elements in this image: tie, person, bench, umbrella, generate a short caption for this image."}]}, {"id": 175, "image": "COCO_val2014_000000195645.jpg", "conversations": [{"from": "human", "value": "Generate a short caption of the image."}, {"from": "gpt", "value": "Focusing on the visible elements in this image: person, wine glass, handbag, bottle, dining table, bench, generate a short caption for this image."}]}, {"id": 181, "image": "COCO_val2014_000000155671.jpg", "conversations": [{"from": "human", "value": "Generate a short caption of the image."}, {"from": "gpt", "value": "Focusing on the visible elements in this image: traffic light, truck, car, generate a short caption for this image."}]}, {"id": 187, "image": "COCO_val2014_000000412894.jpg", "conversations": [{"from": "human", "value": "Generate a short caption of the image."}, {"from": "gpt", "value": "Focusing on the visible elements in this image: traffic light, person, backpack, handbag, generate a short caption for this image."}]}, {"id": 193, "image": "COCO_val2014_000000032625.jpg", "conversations": [{"from": "human", "value": "Generate a short caption of the image."}, {"from": "gpt", "value": "Focusing on the visible elements in this image: bowl, fork, dining table, book, cup, generate a short caption for this image."}]}, {"id": 199, "image": "COCO_val2014_000000018688.jpg", "conversations": [{"from": "human", "value": "Generate a short caption of the image."}, {"from": "gpt", "value": "Focusing on the visible elements in this image: baseball glove, person, sports ball, baseball bat, generate a short caption for this image."}]}, {"id": 205, "image": "COCO_val2014_000000343534.jpg", "conversations": [{"from": "human", "value": "Generate a short caption of the image."}, {"from": "gpt", "value": "Focusing on the visible elements in this image: dog, potted plant, chair, book, teddy bear, generate a short caption for this image."}]}, {"id": 211, "image": "COCO_val2014_000000370657.jpg", "conversations": [{"from": "human", "value": "Generate a short caption of the image."}, {"from": "gpt", "value": "Focusing on the visible elements in this image: person, motorcycle, handbag, generate a short caption for this image."}]}, {"id": 217, "image": "COCO_val2014_000000481596.jpg", "conversations": [{"from": "human", "value": "Generate a short caption of the image."}, {"from": "gpt", "value": "Focusing on the visible elements in this image: chair, person, hot dog, generate a short caption for this image."}]}, {"id": 223, "image": "COCO_val2014_000000460347.jpg", "conversations": [{"from": "human", "value": "Generate a short caption of the image."}, {"from": "gpt", "value": "Focusing on the visible elements in this image: person, car, bus, generate a short caption for this image."}]}, {"id": 229, "image": "COCO_val2014_000000398753.jpg", "conversations": [{"from": "human", "value": "Generate a short caption of the image."}, {"from": "gpt", "value": "Focusing on the visible elements in this image: baseball glove, person, sports ball, baseball bat, generate a short caption for this image."}]}, {"id": 235, "image": "COCO_val2014_000000067833.jpg", "conversations": [{"from": "human", "value": "Generate a short caption of the image."}, {"from": "gpt", "value": "Focusing on the visible elements in this image: person, bowl, dining table, knife, wine glass, bottle, fork, pizza, cup, generate a short caption for this image."}]}, {"id": 241, "image": "COCO_val2014_000000272880.jpg", "conversations": [{"from": "human", "value": "Generate a short caption of the image."}, {"from": "gpt", "value": "Focusing on the visible elements in this image: person, fork, dining table, knife, pizza, cup, generate a short caption for this image."}]}]