{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "49b13a92-92c1-4529-8c43-d294d882ffa1",
   "metadata": {},
   "outputs": [],
   "source": [
    "# conda create --name internvl --clone qwen\n",
    "# conda activate internvl\n",
    "# module load cuda/12.2\n",
    "# pip install lmdeploy>=0.7.3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "94b2614b-d64a-4c06-b362-31d5cf5c2d8e",
   "metadata": {},
   "outputs": [],
   "source": [
    "import re\n",
    "import csv\n",
    "import glob\n",
    "import json\n",
    "\n",
    "from lmdeploy import pipeline, TurbomindEngineConfig, ChatTemplateConfig\n",
    "from lmdeploy.vl import load_image\n",
    "from tqdm import tqdm\n",
    "\n",
    "\n",
    "model = 'OpenGVLab/InternVL3-38B-AWQ'\n",
    "pipe = pipeline(model, backend_config=TurbomindEngineConfig(session_len=16384, tp=1), chat_template_config=ChatTemplateConfig(model_name='internvl2_5'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "65e26fed-2df7-4f02-887d-0a06aa300388",
   "metadata": {},
   "outputs": [],
   "source": [
    "text = \"\"\"You are an image analysis tool specialized in facial attribute classification. \n",
    "          For the provided face image, output a JSON object with the following attributes:\n",
    "        \n",
    "            {\n",
    "              \"gender\": [\"male\", \"female\"],\n",
    "              \"age\": [\"young\", \"middle-aged\", \"senior\"],\n",
    "              \"skin_color\": [\"light\", \"medium\", \"dark\"],\n",
    "              \"ancestry\": [\"asian\", \"south_asian\", \"black\", \"latino/hispanic\", \"middle_eastern\", \"white\", \"indigenous\"],\n",
    "              \"hair_color\": [\"black\", \"brown\", \"red\", \"blonde\", \"gray\", \"other\"],\n",
    "              \"bangs\": [\"yes\", \"no\"],\n",
    "              \"bald\": [\"yes\", \"no\"],\n",
    "              \"beard\": [\"no\", \"mustache\", \"stubble\", \"full\"],\n",
    "              \"glasses\": [\"no\", \"regular\", \"sun\"],\n",
    "              \"headwear\": [\"no\", \"beanie\", \"cap\", \"hat\", \"headband\", \"hijab\", \"helmet\", \"turban\"],\n",
    "            }\n",
    "\n",
    "          Ensure the labeling is based on visible evidence only. If an attribute is unclear, return \"unknown\".\n",
    "          \n",
    "          Only output the JSON without any additional explanation or text.\n",
    "          \n",
    "          Example JSON output:\n",
    "          \n",
    "          {\n",
    "            \"gender\": \"female\",\n",
    "            \"age\": \"middle-aged\",\n",
    "            \"skin_color\": \"light\",\n",
    "            \"ancestry\": \"asian\",\n",
    "            \"hair_color\": \"black\",\n",
    "            \"bangs\": \"no\",\n",
    "            \"bald\": \"no\",\n",
    "            \"beard\": \"no\",\n",
    "            \"glasses\": \"sun\",\n",
    "            \"headwear\": \"beanie\",\n",
    "          }\n",
    "          \"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a0145ccd-ad74-4c08-92c6-c1e981ae3753",
   "metadata": {},
   "outputs": [],
   "source": [
    "default_keys = [\"gender\", \"age\", \"skin_color\", \"ancestry\", \"hair_color\", \"bangs\", \n",
    "                \"bald\", \"beard\", \"glasses\", \"headwear\"]\n",
    "\n",
    "# Function to process a single image\n",
    "def process_images(batch_images):\n",
    "    # Sample messages for batch inference\n",
    "    prompts = [(text, load_image(img)) for img in batch_images]\n",
    "    response = pipe(prompts)\n",
    "    result = []\n",
    "    for i in range(len(batch_images)):\n",
    "        # Extracting the JSON part\n",
    "        json_output = re.search(r\"\\{.*\\}\", response[i].text, re.DOTALL).group()\n",
    "        json_output = re.sub(r',\\s*}', '}', json_output)  # This removes a comma just before a closing curly brace\n",
    "        data = json.loads(json_output)\n",
    "        result.append([batch_images[i].replace('../RFW/data/', '')] + [data.get(key, \"unknown\") for key in default_keys])\n",
    "    return result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f6c880b3-cc8e-4c02-aa95-c308dab33fb8",
   "metadata": {},
   "outputs": [],
   "source": [
    "init = 0\n",
    "files = glob.glob('../RFW/data/*/*/*')\n",
    "\n",
    "output_file = \"internvl8b_labels.csv\"\n",
    "\n",
    "# Column names\n",
    "columns = [\"File Name\", \"Gender\", \"Age\", \"Skin Color\", \"Ancestry\", \"Hair Color\", \n",
    "           \"Bangs\", \"Bald\", \"Beard\", \"Glasses\", \"Headwear\"]\n",
    "\n",
    "# Chunk settings\n",
    "chunk_size = 32  # Number of images processed per batch\n",
    "\n",
    "# Write CSV header\n",
    "with open(output_file, \"w\", newline=\"\") as csvfile:\n",
    "    writer = csv.writer(csvfile)\n",
    "    writer.writerow(columns)  # CSV Header\n",
    "\n",
    "# Process in chunks\n",
    "for start_idx in tqdm(range(init, len(files), chunk_size), desc=\"Processing Images\"):\n",
    "    end_idx = min(start_idx + chunk_size, len(files))\n",
    "    chunk = files[start_idx:end_idx]\n",
    "    results = process_images(chunk)\n",
    "    # Save chunk results to CSV\n",
    "    with open(output_file, \"a\", newline=\"\") as csvfile:\n",
    "        writer = csv.writer(csvfile)\n",
    "        writer.writerows(results)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6956ff6e-b82e-4ac2-be5f-7aebf9785530",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
