{
  "attribute": "text",
  "desc": "long_description",
  "jsonl_file": "text_eval_prompts.jsonl",
  "line_idx": 4,
  "questions": [
    "Is the text 'CARD' in the image?",
    "Is the text 'CLIP' in the image?",
    "Is the text 'with' in the image?",
    "Is the text 'Finger' in the image?",
    "Is the text 'Blister' in the image?"
  ],
  "gt_answers": [
    "Yes",
    "Yes",
    "Yes",
    "Yes",
    "Yes"
  ],
  "model_pred": [
    "yes",
    "no",
    "yes",
    "no",
    "yes"
  ],
  "model_output": "yes because 'CARD' is in the image.\nno because 'CLIP' is not in the image.\nyes because 'with' is in the image.\nno because 'Finger' is not in the image.\nyes because 'Blister' is in the image."
}