{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import openai"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "api_key = \"xxxx\"\n",
    "os.environ['OPENAI_API_KEY'] = api_key\n",
    "openai.api_key = api_key"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json \n",
    "\n",
    "with open(\"testdev_balanced_questions.json\", \"r\") as file:\n",
    "    data = json.load(file)\n",
    "\n",
    "data_list = list(data.values())\n",
    "qusetion2structure = {}\n",
    "structure2cnt = {\n",
    "    \"query\": 44,\n",
    "    \"compare\": 6,\n",
    "    \"verify\": 22,\n",
    "    \"choose\": 11,\n",
    "    \"logical\": 17,\n",
    "}\n",
    "\n",
    "for idx, item in enumerate(data_list):\n",
    "    structure = item[\"types\"][\"structural\"]\n",
    "    cnt = structure2cnt[structure]\n",
    "    if cnt > 0:\n",
    "        structure2cnt[structure] = cnt - 1\n",
    "    else:\n",
    "        continue\n",
    "    question = item[\"question\"]\n",
    "    qusetion2structure[question] = structure\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1/100. Is it overcast?, verify\n",
      "2/100. Who is wearing the dress?, query\n",
      "3/100. Does the utensil on top of the table look clean and black?, logical\n",
      "4/100. Is the surfer that looks wet wearing a wetsuit?, verify\n",
      "5/100. How tall is the chair in the bottom of the photo?, query\n",
      "6/100. What kind of device is on top of the desk?, query\n",
      "7/100. What is the airplane flying above?, query\n",
      "8/100. What color are the pants?, query\n",
      "9/100. Is the ground blue or brown?, choose\n",
      "10/100. What is around the open window?, query\n",
      "11/100. What's around the window?, query\n",
      "12/100. Who is standing at the table?, query\n",
      "13/100. Are there drapes to the right of the bed?, verify\n",
      "14/100. What is hanging above the chalkboard?, query\n",
      "15/100. Is the cake on a platter?, verify\n",
      "16/100. Is the person to the right of the cup wearing jeans?, verify\n",
      "17/100. What device is sitting next to the mouse pad?, query\n",
      "18/100. Does the sweater look open and blue?, logical\n",
      "19/100. Is the jacket long sleeved and black?, logical\n",
      "20/100. Are there beds next to the small outlet?, verify\n",
      "21/100. On which side of the picture is the leather bag?, query\n",
      "22/100. Is the blue pillow square and large?, logical\n",
      "23/100. Which color is the cake?, query\n",
      "24/100. What is the name of the cooking utensil that is hang from the hook?, query\n",
      "25/100. Where is the skinny person standing?, query\n",
      "26/100. Are the plates on top of an ottoman?, verify\n",
      "27/100. Is the freezer near the wall small or large?, choose\n",
      "28/100. What type of food is to the left of the baby that is sitting atop the woman?, query\n",
      "29/100. Is the fence made of cement or aluminum?, choose\n",
      "30/100. Which side are the white houses on?, query\n",
      "31/100. Are both the phone and the coffee cup the same color?, compare\n",
      "32/100. Are there either any small refrigerators or microwaves in the picture?, logical\n",
      "33/100. How does that car look like, orange or maybe white?, choose\n",
      "34/100. What color is the book?, query\n",
      "35/100. What color is the dirt?, query\n",
      "36/100. Is the bag made of leather lying on top of a sofa?, verify\n",
      "37/100. What are the drapes around of?, query\n",
      "38/100. On which side is the picture?, query\n",
      "39/100. What material is the crosswalk in front of the stores?, query\n",
      "40/100. How large are the sprinkles that are sprinkled on the cupcakes?, query\n",
      "41/100. What type of material is the crosswalk near the street lamp made of?, query\n",
      "42/100. Which kind of clothing is pink?, query\n",
      "43/100. How is the clothing item that is pink called?, query\n",
      "44/100. Which kind of clothing is not pink?, query\n",
      "45/100. Is this helicopter on or off?, choose\n",
      "46/100. Do you see any cats?, verify\n",
      "47/100. Is that shoe behind a dog?, verify\n",
      "48/100. What kind of clothing is sleeveless?, query\n",
      "49/100. Is the field soft and snowy?, logical\n",
      "50/100. What fruits are beneath the microwave?, query\n",
      "51/100. Which color is the shirt?, query\n",
      "52/100. What is beneath the microwave?, query\n",
      "53/100. On which side of the picture is the chair?, query\n",
      "54/100. Is the happy man to the left or to the right of the woman in the center?, choose\n",
      "55/100. Who is wearing a wristband?, query\n",
      "56/100. Is there a pear beneath the appliance that looks silver and black?, verify\n",
      "57/100. Is this a bed or a cabinet?, choose\n",
      "58/100. On which side is the router?, query\n",
      "59/100. What is the color of the pants?, query\n",
      "60/100. Who is wearing the shirt?, query\n",
      "61/100. Who is wearing a shirt?, query\n",
      "62/100. What's the man doing?, query\n",
      "63/100. Are there rivers or oceans that are not calm?, logical\n",
      "64/100. What is the picture frame hanging from?, query\n",
      "65/100. How do the cars look like, dense or sparse?, choose\n",
      "66/100. What food isn't baked?, query\n",
      "67/100. Which kind of vehicle is in front of the flag?, query\n",
      "68/100. What is the vehicle that is in front of the flag?, query\n",
      "69/100. What is hanging from the wall?, query\n",
      "70/100. What's the skateboarder jumping off of?, query\n",
      "71/100. Is the van in front of a balloon?, verify\n",
      "72/100. What is the color of this bench?, query\n",
      "73/100. Are the cabinets below the stove wooden and open?, logical\n",
      "74/100. Are the boxes to the right of the man full and square?, logical\n",
      "75/100. Is the horse next to the other horse both baby and brown?, logical\n",
      "76/100. Is the river wide or is it narrow?, choose\n",
      "77/100. Is the umbrella in the bottom part of the picture?, verify\n",
      "78/100. Does the blanket look soft and white?, logical\n",
      "79/100. Are there refrigerators to the left of the stove?, verify\n",
      "80/100. Is there an elephant near the person that is wearing a coat?, verify\n",
      "81/100. Is there a blender to the right of the yellow drink?, verify\n",
      "82/100. Is the gift sitting on the floor?, verify\n",
      "83/100. Which material makes up the round glasses, glass or wire?, choose\n",
      "84/100. Is the knife to the right of a man?, verify\n",
      "85/100. Are there any red fire trucks?, verify\n",
      "86/100. Are there both toothbrushes and mats in this picture?, logical\n",
      "87/100. Is the soap dish to the right of the soap dispenser?, verify\n",
      "88/100. Are the shorts large and blue?, logical\n",
      "89/100. Does the device under the picture frame look black?, verify\n",
      "90/100. Which material are the trousers made of, cloth or leather?, choose\n",
      "91/100. Do you see any skis?, verify\n",
      "92/100. Does the utensil beside the pan have black color and small size?, logical\n",
      "93/100. What is common to the door and the kitten?, compare\n",
      "94/100. Do you see either any containers or dream catchers?, logical\n",
      "95/100. Are there both girls and soccer balls in this image?, logical\n",
      "96/100. Are there either any skateboarders or snowboarders that are jumping?, logical\n",
      "97/100. Do the soap bottle and the clock have the same color?, compare\n",
      "98/100. Is the plate different in color than the purse?, compare\n",
      "99/100. What is common to the soccer ball and the backpack?, compare\n",
      "100/100. What is common to the door and the room?, compare\n",
      "model selection results:\n",
      "verify\n",
      "dandelin/vilt-b32-finetuned-vqa 22\n",
      "========================================\n",
      "query\n",
      "dandelin/vilt-b32-finetuned-vqa 44\n",
      "========================================\n",
      "logical\n",
      "dandelin/vilt-b32-finetuned-vqa 17\n",
      "========================================\n",
      "choose\n",
      "dandelin/vilt-b32-finetuned-vqa 11\n",
      "========================================\n",
      "compare\n",
      "dandelin/vilt-b32-finetuned-vqa 6\n",
      "========================================\n"
     ]
    }
   ],
   "source": [
    "template = \"\"\"\n",
    "Please choose the most suitable model from\n",
    "[\n",
    "{\"model id\": \"dandelin/vilt-b32-finetuned-vqa\", \"downloads\": 65092, \"likes\": 289, \"published time\": \"2021-02\", \"description\": \"Vision-and-Language Transformer (ViLT), fine-tuned on VQAv2. Vision-and-Language Transformer (ViLT) model fine-tuned on VQAv2. It was introduced in the paper ViLT: Vision-and-Language Transformer Without Convolution or Region Supervision by Kim et al.\"},\n",
    "{\"model id\": \"microsoft/git-base-textvqa\",  \"downloads\": 1173, \"likes\": 6, \"published time\": \"2022-05\", \"description\": \"GIT (GenerativeImage2Text), base-sized, fine-tuned on TextVQA. GIT (short for GenerativeImage2Text) model, base-sized version, fine-tuned on TextVQA. It was introduced in the paper GIT: A Generative Image-to-text Transformer for Vision and Language by Wang et al\"},\n",
    "{\"model id\": \"Salesforce/blip-vqa-base\", \"downloads\": 81312, \"likes\": 55, \"published time\": \"2022-01\", \"description\": \"BLIP: Bootstrapping Language-Image Pre-training for Unified Vision-Language Understanding and Generation. Model card for BLIP trained on visual question answering- base architecture (with ViT base backbone).\"}\n",
    "{\"model id\": \"Salesforce/blip2-opt-2.7b\", \"downloads\": 180666, \"likes\": 165, \"published time\": \"2023-01\", \"description\": \"BLIP-2, OPT-2.7b, pre-trained only. BLIP-2 model, leveraging OPT-2.7b (a large language model with 2.7 billion parameters). It was introduced in the paper BLIP-2: Bootstrapping Language-Image Pre-training with Frozen Image Encoders and Large Language Models by Li et al.\"}\n",
    "{\"model id\": \"Salesforce/blip2-flan-t5-xl\", \"downloads\": 1, \"likes\": 2, \"published time\": \"2023-01\", \"description\": \"BLIP-2, Flan T5-xl, pre-trained only. BLIP-2 model, leveraging Flan T5-xl (a large language model). It was introduced in the paper BLIP-2: Bootstrapping Language-Image Pre-training with Frozen Image Encoders and Large Language Models by Li et al.\"}\n",
    "{\"model id\": \"instructblip-vicuna-7b\", \"downloads\": 83320, \"likes\": 44, \"published time\": \"2023-05\", \"description\": \"InstructBLIP model. InstructBLIP model using Vicuna-7b as language model. InstructBLIP was introduced in the paper InstructBLIP: Towards General-purpose Vision-Language Models with Instruction Tuning by Dai et al.\"}\n",
    "{\"model id\": \"instructblip-flan-t5-xl\", \"downloads\": 10364, \"likes\": 21, \"published time\": \"2023-05\", \"description\": \"InstructBLIP model. InstructBLIP model using Flan-T5-xl as language model. InstructBLIP was introduced in the paper InstructBLIP: Towards General-purpose Vision-Language Models with Instruction Tuning by Dai et al.\"}\n",
    "] for the task {\"task\": \"visual question answering\", \"args\": {\"image\": IMAGE, \"question\": QUESTION}. The output must be in a strict JSON format: {\"id\": \"id\", \"reason\": \"your detail reasons for the choice.\n",
    "\"\"\"\n",
    "\n",
    "model_list = [\n",
    "    \"dandelin/vilt-b32-finetuned-vqa\",\n",
    "    \"microsoft/git-base-textvqa\",\n",
    "    \"Salesforce/blip-vqa-base\",\n",
    "    \"Salesforce/blip2-opt-2.7b\",\n",
    "    \"Salesforce/blip2-flan-t5-xl\",\n",
    "    \"instructblip-vicuna-7b\",\n",
    "    \"instructblip-flan-t5-xl\"\n",
    "]\n",
    "\n",
    "import time\n",
    "from collections import defaultdict\n",
    "reasons = {}\n",
    "structure2model2cnt = {}\n",
    "l = len(qusetion2structure)\n",
    "for idx, (question, structure) in enumerate(qusetion2structure.items()):\n",
    "    flag = True\n",
    "    if structure not in structure2model2cnt:\n",
    "        structure2model2cnt[structure] = defaultdict(int)\n",
    "    while flag:\n",
    "        try:\n",
    "            content = template + f\"QUESTION here is: {question}\"\n",
    "            messages = [{\"role\": \"system\", \"content\": \"Model Selection Stage: Given the user request and the parsed tasks, the AI assistant helps the user to select a suitable model from a list of models to process the user request. The assistant should focus more on the description of the model and find the model that has the most potential to solve requests and tasks.\"}, {\"role\": \"user\", \"content\": content}]\n",
    "            response = openai.ChatCompletion.create(\n",
    "                model = \"gpt-3.5-turbo\", # gpt-3.5-turbo-0613\n",
    "                messages = messages,\n",
    "                temperature = 0.0,\n",
    "            )\n",
    "            res_reason = response.get(\"choices\")[0][\"message\"][\"content\"].lstrip('\\n').rstrip('\\n')\n",
    "            reasons[question] = res_reason\n",
    "            for model in model_list:\n",
    "                if model in res_reason:\n",
    "                    structure2model2cnt[structure][model] += 1\n",
    "            flag = False\n",
    "        except:\n",
    "            time.sleep(20)\n",
    "    idx += 1\n",
    "    print(f\"{idx}/{l}. {question}, {structure}\")\n",
    "    \n",
    "print(\"model selection results:\")\n",
    "for structure, model2cnt in structure2model2cnt.items():\n",
    "    print(structure)\n",
    "    for model, cnt in model2cnt.items():\n",
    "        print(model, cnt)\n",
    "    print(\"====\"*10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "recsys",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
