{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "a8075aeb",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['any-to-any', 'audio-classification', 'audio-to-audio', 'audio-text-to-text', 'automatic-speech-recognition', 'depth-estimation', 'document-question-answering', 'visual-document-retrieval', 'feature-extraction', 'fill-mask', 'image-classification', 'image-feature-extraction', 'image-segmentation', 'image-to-image', 'image-text-to-text', 'image-to-text', 'image-to-video', 'keypoint-detection', 'mask-generation', 'object-detection', 'video-classification', 'question-answering', 'reinforcement-learning', 'sentence-similarity', 'summarization', 'table-question-answering', 'tabular-classification', 'tabular-regression', 'text-classification', 'text-generation', 'text-ranking', 'text-to-image', 'text-to-speech', 'text-to-video', 'token-classification', 'translation', 'unconditional-image-generation', 'video-text-to-text', 'video-to-video', 'visual-question-answering', 'zero-shot-classification', 'zero-shot-image-classification', 'zero-shot-object-detection', 'text-to-3d', 'image-to-3d']\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'domain': 'Multimodal Text-to-Image',\n",
       " 'framework': 'Hugging Face',\n",
       " 'functionality': 'Text-to-Image generation',\n",
       " 'api_name': 'stabilityai/stable-diffusion-2-base',\n",
       " 'api_call': \"StableDiffusionPipeline.from_pretrained('stabilityai/stable-diffusion-2-base', scheduler=EulerDiscreteScheduler.from_pretrained('stabilityai/stable-diffusion-2-base', subfolder=scheduler), torch_dtype=torch.float16)\",\n",
       " 'performance': {'dataset': 'COCO2017 validation set',\n",
       "  'accuracy': 'Not optimized for FID scores'},\n",
       " 'description': 'Stable Diffusion v2-base is a diffusion-based text-to-image generation model trained on a subset of LAION-5B dataset. It can be used to generate and modify images based on text prompts. The model uses a fixed, pretrained text encoder (OpenCLIP-ViT/H) and is intended for research purposes only.',\n",
       " 'model_name': 'stabilityai/stable-diffusion-2-base'}"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import sys\n",
    "import os\n",
    "from dotenv import load_dotenv\n",
    "\n",
    "# Load .env file\n",
    "env_path = os.path.join(os.path.dirname(os.getcwd()), \"..\", \".env\")\n",
    "load_dotenv(env_path)\n",
    "\n",
    "# Add the project root to the path\n",
    "project_root = os.getenv(\"PARENT_ROOT\")\n",
    "if project_root not in sys.path:\n",
    "    sys.path.insert(0, project_root)\n",
    "\n",
    "from huggingface_hub import ModelCard, list_models\n",
    "from data.utils.utility import clean_markdown\n",
    "from time import sleep\n",
    "import datetime\n",
    "import json\n",
    "\n",
    "f = os.getenv(\"TASKS_FILE\")\n",
    "t = json.load(open(f, \"r\"))\n",
    "tasks = list(t.keys())\n",
    "print(tasks)\n",
    "corpus_path = os.path.join(os.getenv(\"DATA_PATH\"), \"model_indices/e1_e2.json\")\n",
    "with open(corpus_path, \"r\") as f:\n",
    "    # read iteratively line by line\n",
    "    corpus = [json.loads(line) for line in f]\n",
    "\n",
    "corpus[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "41628e6e",
   "metadata": {},
   "outputs": [],
   "source": [
    "limit = 50\n",
    "min_year = 2025\n",
    "collected_models = []\n",
    "seen_ids = set()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "ffafb580",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Search: task='any-to-any'\n",
      "Found 0 models for tag 'any-to-any'\n",
      "\n",
      "Search: task='audio-classification'\n",
      "Found 2 models for tag 'audio-classification'\n",
      "\n",
      "Search: task='audio-to-audio'\n",
      "Found 1 models for tag 'audio-to-audio'\n",
      "\n",
      "Search: task='audio-text-to-text'\n",
      "Found 0 models for tag 'audio-text-to-text'\n",
      "\n",
      "Search: task='automatic-speech-recognition'\n",
      "Found 9 models for tag 'automatic-speech-recognition'\n",
      "\n",
      "Search: task='depth-estimation'\n",
      "Found 2 models for tag 'depth-estimation'\n",
      "\n",
      "Search: task='document-question-answering'\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Repo card metadata block was not found. Setting CardData to empty.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Found 0 models for tag 'document-question-answering'\n",
      "\n",
      "Search: task='visual-document-retrieval'\n",
      "Found 0 models for tag 'visual-document-retrieval'\n",
      "\n",
      "Search: task='feature-extraction'\n",
      "Found 5 models for tag 'feature-extraction'\n",
      "\n",
      "Search: task='fill-mask'\n",
      "Found 3 models for tag 'fill-mask'\n",
      "\n",
      "Search: task='image-classification'\n",
      "Found 2 models for tag 'image-classification'\n",
      "\n",
      "Search: task='image-feature-extraction'\n",
      "Found 1 models for tag 'image-feature-extraction'\n",
      "\n",
      "Search: task='image-segmentation'\n",
      "Found 1 models for tag 'image-segmentation'\n",
      "\n",
      "Search: task='image-to-image'\n",
      "Found 5 models for tag 'image-to-image'\n",
      "\n",
      "Search: task='image-text-to-text'\n",
      "Found 1 models for tag 'image-text-to-text'\n",
      "\n",
      "Search: task='image-to-text'\n",
      "Found 2 models for tag 'image-to-text'\n",
      "\n",
      "Search: task='image-to-video'\n",
      "Found 0 models for tag 'image-to-video'\n",
      "\n",
      "Search: task='keypoint-detection'\n",
      "Found 0 models for tag 'keypoint-detection'\n",
      "\n",
      "Search: task='mask-generation'\n",
      "Found 1 models for tag 'mask-generation'\n",
      "\n",
      "Search: task='object-detection'\n",
      "Found 5 models for tag 'object-detection'\n",
      "\n",
      "Search: task='video-classification'\n",
      "Found 0 models for tag 'video-classification'\n",
      "\n",
      "Search: task='question-answering'\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Repo card metadata block was not found. Setting CardData to empty.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Found 1 models for tag 'question-answering'\n",
      "\n",
      "Search: task='reinforcement-learning'\n",
      "Found 0 models for tag 'reinforcement-learning'\n",
      "\n",
      "Search: task='sentence-similarity'\n",
      "Found 2 models for tag 'sentence-similarity'\n",
      "\n",
      "Search: task='summarization'\n",
      "Found 3 models for tag 'summarization'\n",
      "\n",
      "Search: task='table-question-answering'\n",
      "Found 13 models for tag 'table-question-answering'\n",
      "\n",
      "Search: task='tabular-classification'\n",
      "Found 0 models for tag 'tabular-classification'\n",
      "\n",
      "Search: task='tabular-regression'\n",
      "Found 1 models for tag 'tabular-regression'\n",
      "\n",
      "Search: task='text-classification'\n",
      "Found 3 models for tag 'text-classification'\n",
      "\n",
      "Search: task='text-generation'\n",
      "Found 1 models for tag 'text-generation'\n",
      "\n",
      "Search: task='text-ranking'\n",
      "Found 0 models for tag 'text-ranking'\n",
      "\n",
      "Search: task='text-to-image'\n",
      "Found 1 models for tag 'text-to-image'\n",
      "\n",
      "Search: task='text-to-speech'\n",
      "Found 1 models for tag 'text-to-speech'\n",
      "\n",
      "Search: task='text-to-video'\n",
      "Found 1 models for tag 'text-to-video'\n",
      "\n",
      "Search: task='token-classification'\n",
      "Found 2 models for tag 'token-classification'\n",
      "\n",
      "Search: task='translation'\n",
      "Found 2 models for tag 'translation'\n",
      "\n",
      "Search: task='unconditional-image-generation'\n",
      "Found 0 models for tag 'unconditional-image-generation'\n",
      "\n",
      "Search: task='video-text-to-text'\n",
      "Found 0 models for tag 'video-text-to-text'\n",
      "\n",
      "Search: task='video-to-video'\n",
      "Found 0 models for tag 'video-to-video'\n",
      "\n",
      "Search: task='visual-question-answering'\n",
      "Found 5 models for tag 'visual-question-answering'\n",
      "\n",
      "Search: task='zero-shot-classification'\n",
      "Found 5 models for tag 'zero-shot-classification'\n",
      "\n",
      "Search: task='zero-shot-image-classification'\n",
      "Found 10 models for tag 'zero-shot-image-classification'\n",
      "\n",
      "Search: task='zero-shot-object-detection'\n",
      "Found 3 models for tag 'zero-shot-object-detection'\n",
      "\n",
      "Search: task='text-to-3d'\n",
      "Found 1 models for tag 'text-to-3d'\n",
      "\n",
      "Search: task='image-to-3d'\n",
      "Found 0 models for tag 'image-to-3d'\n"
     ]
    }
   ],
   "source": [
    "def parse_created_at(model):\n",
    "    created_at = model.created_at or model.cardData.get(\"createdAt\")\n",
    "    if isinstance(created_at, str):\n",
    "        return datetime.fromisoformat(created_at.replace(\"Z\", \"+00:00\"))\n",
    "    return created_at\n",
    "\n",
    "\n",
    "def is_old(model, max_year=2025, max_month=8):\n",
    "    try:\n",
    "        created_at = parse_created_at(model)\n",
    "        if not created_at:\n",
    "            return False\n",
    "        return created_at.year < max_year or (\n",
    "            created_at.year == max_year and created_at.month < max_month\n",
    "        )\n",
    "    except Exception:\n",
    "        return False\n",
    "\n",
    "\n",
    "def load_modelcard(model_id):\n",
    "    try:\n",
    "        card = ModelCard.load(model_id)\n",
    "        return card.text\n",
    "    except Exception:\n",
    "        return None\n",
    "\n",
    "\n",
    "count = 0\n",
    "for task in tasks:\n",
    "    task_count = 0\n",
    "    print(f\"\\nSearch: task='{task}'\")\n",
    "\n",
    "    try:\n",
    "        models = list_models(filter=task, sort=\"downloads\", direction=-1, limit=limit)\n",
    "\n",
    "        for model in models:\n",
    "            if model.id in seen_ids:\n",
    "                continue\n",
    "\n",
    "            created_at = parse_created_at(model)\n",
    "            if not created_at:\n",
    "                continue\n",
    "\n",
    "            # TODO: change to is_old to get older models\n",
    "            if not is_old(model, min_year):\n",
    "                continue\n",
    "\n",
    "            # drop if model is not in corpus\n",
    "            if not any(entry[\"model_name\"] == model.id for entry in corpus):\n",
    "                continue\n",
    "\n",
    "            modelcard = load_modelcard(model.id)\n",
    "            if not modelcard:\n",
    "                continue\n",
    "\n",
    "            modelcard = clean_markdown(modelcard)\n",
    "            if len(modelcard.split()) <= 500:\n",
    "                continue\n",
    "\n",
    "            collected_models.append(\n",
    "                {\n",
    "                    \"model_id\": model.id,\n",
    "                    \"created_at\": created_at.isoformat(),\n",
    "                    \"downloads\": model.downloads,\n",
    "                    \"likes\": model.likes,\n",
    "                    \"author\": model.author,\n",
    "                    \"tags\": model.tags,\n",
    "                    \"modelcard\": modelcard,\n",
    "                    \"domain\": task,\n",
    "                }\n",
    "            )\n",
    "\n",
    "            seen_ids.add(model.id)\n",
    "            task_count += 1\n",
    "\n",
    "    except Exception as e:\n",
    "        print(f\"Error for task={task}: {e}\")\n",
    "\n",
    "    sleep(4)\n",
    "    print(f\"Found {len(collected_models) - count} models for tag '{task}'\")\n",
    "    count = len(collected_models)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "505cb17a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Before filtering, 95 entries found.\n",
      "After filtering, 95 entries remain.\n"
     ]
    }
   ],
   "source": [
    "import re\n",
    "import unicodedata\n",
    "\n",
    "\n",
    "def normalize_text(text: str) -> str:\n",
    "    \"\"\"\n",
    "    Normalize text for robust substring matching.\n",
    "    \"\"\"\n",
    "    # Unicode normalization\n",
    "    text = unicodedata.normalize(\"NFKC\", text)\n",
    "\n",
    "    # Case folding (stronger than lower())\n",
    "    text = text.casefold()\n",
    "\n",
    "    # Collapse whitespace\n",
    "    text = re.sub(r\"\\s+\", \" \", text).strip()\n",
    "\n",
    "    return text\n",
    "\n",
    "\n",
    "def is_autogenerated_modelcard(modelcard: str) -> bool:\n",
    "    \"\"\"\n",
    "    Returns True if the normalized modelcard contains\n",
    "    'This model card has been automatically generated.'.\n",
    "    \"\"\"\n",
    "    if not modelcard:\n",
    "        return False\n",
    "\n",
    "    target = normalize_text(\"This model card has been automatically generated.\")\n",
    "    content = normalize_text(modelcard)\n",
    "\n",
    "    return target in content\n",
    "\n",
    "\n",
    "print(f\"Before filtering, {len(collected_models)} entries found.\")\n",
    "# Filter out autogenerated model cards\n",
    "filtered_models = [\n",
    "    entry\n",
    "    for entry in collected_models\n",
    "    if not is_autogenerated_modelcard(entry.get(\"modelcard\", \"\"))\n",
    "]\n",
    "print(f\"After filtering, {len(filtered_models)} entries remain.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "96cd1c2d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>model_id</th>\n",
       "      <th>created_at</th>\n",
       "      <th>downloads</th>\n",
       "      <th>likes</th>\n",
       "      <th>author</th>\n",
       "      <th>tags</th>\n",
       "      <th>modelcard</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>domain</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>table-question-answering</th>\n",
       "      <td>13</td>\n",
       "      <td>13</td>\n",
       "      <td>13</td>\n",
       "      <td>13</td>\n",
       "      <td>0</td>\n",
       "      <td>13</td>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>zero-shot-image-classification</th>\n",
       "      <td>10</td>\n",
       "      <td>10</td>\n",
       "      <td>10</td>\n",
       "      <td>10</td>\n",
       "      <td>0</td>\n",
       "      <td>10</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>automatic-speech-recognition</th>\n",
       "      <td>9</td>\n",
       "      <td>9</td>\n",
       "      <td>9</td>\n",
       "      <td>9</td>\n",
       "      <td>0</td>\n",
       "      <td>9</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>object-detection</th>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>visual-question-answering</th>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>feature-extraction</th>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>image-to-image</th>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>zero-shot-classification</th>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>text-classification</th>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>summarization</th>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>zero-shot-object-detection</th>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fill-mask</th>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>image-to-text</th>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>depth-estimation</th>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>audio-classification</th>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>token-classification</th>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>translation</th>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>image-classification</th>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>sentence-similarity</th>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>image-feature-extraction</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>audio-to-audio</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>image-segmentation</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>image-text-to-text</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>question-answering</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mask-generation</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>text-generation</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>tabular-regression</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>text-to-image</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>text-to-3d</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>text-to-speech</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>text-to-video</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                model_id  created_at  downloads  likes  \\\n",
       "domain                                                                   \n",
       "table-question-answering              13          13         13     13   \n",
       "zero-shot-image-classification        10          10         10     10   \n",
       "automatic-speech-recognition           9           9          9      9   \n",
       "object-detection                       5           5          5      5   \n",
       "visual-question-answering              5           5          5      5   \n",
       "feature-extraction                     5           5          5      5   \n",
       "image-to-image                         5           5          5      5   \n",
       "zero-shot-classification               5           5          5      5   \n",
       "text-classification                    3           3          3      3   \n",
       "summarization                          3           3          3      3   \n",
       "zero-shot-object-detection             3           3          3      3   \n",
       "fill-mask                              3           3          3      3   \n",
       "image-to-text                          2           2          2      2   \n",
       "depth-estimation                       2           2          2      2   \n",
       "audio-classification                   2           2          2      2   \n",
       "token-classification                   2           2          2      2   \n",
       "translation                            2           2          2      2   \n",
       "image-classification                   2           2          2      2   \n",
       "sentence-similarity                    2           2          2      2   \n",
       "image-feature-extraction               1           1          1      1   \n",
       "audio-to-audio                         1           1          1      1   \n",
       "image-segmentation                     1           1          1      1   \n",
       "image-text-to-text                     1           1          1      1   \n",
       "question-answering                     1           1          1      1   \n",
       "mask-generation                        1           1          1      1   \n",
       "text-generation                        1           1          1      1   \n",
       "tabular-regression                     1           1          1      1   \n",
       "text-to-image                          1           1          1      1   \n",
       "text-to-3d                             1           1          1      1   \n",
       "text-to-speech                         1           1          1      1   \n",
       "text-to-video                          1           1          1      1   \n",
       "\n",
       "                                author  tags  modelcard  \n",
       "domain                                                   \n",
       "table-question-answering             0    13         13  \n",
       "zero-shot-image-classification       0    10         10  \n",
       "automatic-speech-recognition         0     9          9  \n",
       "object-detection                     0     5          5  \n",
       "visual-question-answering            0     5          5  \n",
       "feature-extraction                   0     5          5  \n",
       "image-to-image                       0     5          5  \n",
       "zero-shot-classification             0     5          5  \n",
       "text-classification                  0     3          3  \n",
       "summarization                        0     3          3  \n",
       "zero-shot-object-detection           0     3          3  \n",
       "fill-mask                            0     3          3  \n",
       "image-to-text                        0     2          2  \n",
       "depth-estimation                     0     2          2  \n",
       "audio-classification                 0     2          2  \n",
       "token-classification                 0     2          2  \n",
       "translation                          0     2          2  \n",
       "image-classification                 0     2          2  \n",
       "sentence-similarity                  0     2          2  \n",
       "image-feature-extraction             0     1          1  \n",
       "audio-to-audio                       0     1          1  \n",
       "image-segmentation                   0     1          1  \n",
       "image-text-to-text                   0     1          1  \n",
       "question-answering                   0     1          1  \n",
       "mask-generation                      0     1          1  \n",
       "text-generation                      0     1          1  \n",
       "tabular-regression                   0     1          1  \n",
       "text-to-image                        0     1          1  \n",
       "text-to-3d                           0     1          1  \n",
       "text-to-speech                       0     1          1  \n",
       "text-to-video                        0     1          1  "
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.DataFrame(collected_models)\n",
    "df.groupby(\"domain\").count().sort_values(by=\"model_id\", ascending=False)\n",
    "# nunique domains"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "d4126b92",
   "metadata": {},
   "outputs": [],
   "source": [
    "# # search for Qwen/Qwen3-Omni-30B-A3B-Instruct\n",
    "# for item in collected_models:\n",
    "#     if \"Qwen3-Omni-30B-A3B\" in item[\"model_id\"]:\n",
    "#         print(item)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "d789847a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "After removing duplicates, 95 entries remain.\n"
     ]
    }
   ],
   "source": [
    "def remove_duplicates_by_modelcard(models, key=\"modelcard\", priority_key=\"downloads\"):\n",
    "    \"\"\"\n",
    "    Remove duplicate models based on a key (e.g., 'modelcard').\n",
    "    When duplicates are found, keep the one with the highest value for priority_key.\n",
    "\n",
    "    Args:\n",
    "        models: List of model dictionaries\n",
    "        key: Field to check for duplicates (default: 'modelcard')\n",
    "        priority_key: Field to use for selecting which duplicate to keep (default: 'downloads')\n",
    "\n",
    "    Returns:\n",
    "        List of unique models\n",
    "    \"\"\"\n",
    "    unique_items = {}\n",
    "\n",
    "    for item in models:\n",
    "        key_value = item.get(key)\n",
    "\n",
    "        if key_value is None:\n",
    "            # Skip items without the key\n",
    "            continue\n",
    "\n",
    "        if key_value not in unique_items:\n",
    "            # First time seeing this key value\n",
    "            unique_items[key_value] = item\n",
    "        else:\n",
    "            # Duplicate found - keep the one with higher priority_key value\n",
    "            current_priority = item.get(priority_key, 0)\n",
    "            existing_priority = unique_items[key_value].get(priority_key, 0)\n",
    "\n",
    "            if current_priority > existing_priority:\n",
    "                unique_items[key_value] = item\n",
    "\n",
    "    return list(unique_items.values())\n",
    "\n",
    "\n",
    "cleaned_models = remove_duplicates_by_modelcard(\n",
    "    filtered_models, key=\"modelcard\", priority_key=\"downloads\"\n",
    ")\n",
    "print(f\"After removing duplicates, {len(cleaned_models)} entries remain.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "6373f22a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/leonardo/home/userexternal/ggramagl/prjects/CCO/self-instruct/data\n",
      "legacy_model_cards_from_step_2.jsonl\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "9406"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "6464"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "4305"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "8529"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "10213"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "10303"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "10537"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "17765"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "17736"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "17773"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "17751"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "7513"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "6665"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "6683"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "6957"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "7851"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "8286"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "6938"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "10261"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "10815"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "6235"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "8681"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "5166"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "5364"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "5261"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "5130"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "4630"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "4487"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "4607"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "4346"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "4374"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "7398"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "6212"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "6050"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "5937"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "5307"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "4155"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "5342"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "4239"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "4324"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "4868"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "5451"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "5356"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "4286"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "5589"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "5392"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "5972"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "6540"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "6529"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "5991"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "5973"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "5408"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "6516"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "5953"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "5961"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "5412"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "5411"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "6571"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "6537"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "7012"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "4539"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "9415"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "7159"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "6979"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "13922"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "8475"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "5225"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "5427"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "4552"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "5970"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "4520"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "5217"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "6469"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "5266"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "6590"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "6536"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "6518"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "6241"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "4851"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "9017"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "5575"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "7933"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "7954"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "7894"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "6054"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "6702"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "8822"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "10796"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "11472"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "7367"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "8101"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "5146"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "5132"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "5129"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "5587"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# save collected_models to jsonl\n",
    "print(os.getenv(\"SELF_INSTRUCT_ROOT_DATA\"))\n",
    "print(os.getenv(\"FILE_NAME_LEGACY_MODEL_CARDS_STEP_2\"))\n",
    "output_file = os.path.join(\n",
    "    os.getenv(\"SELF_INSTRUCT_ROOT_DATA\"),\n",
    "    os.getenv(\"FILE_NAME_LEGACY_MODEL_CARDS_STEP_2\"),\n",
    ")\n",
    "with open(output_file, \"w\") as f:\n",
    "    for item in cleaned_models:\n",
    "        f.write(json.dumps(item) + \"\\n\")\n",
    "# read collected_models from jsonl\n",
    "with open(output_file, \"r\") as f:\n",
    "    cleaned_models = [json.loads(line) for line in f]"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
