"""
registry.py

Exhaustive list of pretrained VLMs (with full descriptions / links to corresponding names and sections of paper).
"""

# === Pretrained Model Registry ===
# fmt: off
MODEL_REGISTRY = {
    # === LLaVa v1.5 Reproductions ===
    "reproduction-llava-v15+7b": {
        "model_id": "reproduction-llava-v15+7b",
        "names": ["LLaVa v1.5 7B (Reproduction)"],
        "description": {
            "name": "LLaVa v1.5 7B (Reproduction)",
            "optimization_procedure": "multi-stage",
            "visual_representation": "CLIP ViT-L/14 @ 336px",
            "image_processing": "Letterbox",
            "language_model": "Vicuña v1.5 7B",
            "datasets": ["LLaVa v1.5 Instruct"],
            "train_epochs": 1,
        }
    },
    "reproduction-llava-v15+13b": {
        "model_id": "reproduction-llava-v15+13b",
        "names": ["LLaVa v1.5 13B (Reproduction)"],
        "description": {
            "name": "LLaVa v1.5 13B (Reproduction)",
            "optimization_procedure": "multi-stage",
            "visual_representation": "CLIP ViT-L/14 @ 336px",
            "image_processing": "Letterbox",
            "language_model": "Vicuña v1.5 13B",
            "datasets": ["LLaVa v1.5 Instruct"],
            "train_epochs": 1,
        }
    },

    # === Section 4.1 :: Optimization Procedure ===
    "one-stage+7b": {
        "model_id": "one-stage+7b",
        "names": [
            "One-Stage 7B",
            "Single-Stage 7B",
            "Frozen ViT (Single-Stage)",
            "CLIP ViT-L 336px (Letterbox)",
            "CLIP ViT-L 336px",
            "Vicuña v1.5 7B",
            "1 Epoch",
            "Base",
        ],
        "description": {
            "name": "Single-Stage 7B",
            "optimization_procedure": "single-stage",
            "visual_representation": "CLIP ViT-L/14 @ 336px",
            "image_processing": "Letterbox",
            "language_model": "Vicuña v1.5 7B",
            "datasets": ["LLaVa v1.5 Instruct"],
            "train_epochs": 1,
        }
    },
    "one-stage+13b": {
        "model_id": "one-stage+13b",
        "names": [
            "One-Stage 13B",
            "Single-Stage 13B",
            "Vicuña v1.5 13B",
        ],
        "description": {
            "name": "Single-Stage 13B",
            "optimization_procedure": "single-stage",
            "visual_representation": "CLIP ViT-L/14 @ 336px",
            "image_processing": "Letterbox",
            "language_model": "Vicuña v1.5 13B",
            "datasets": ["LLaVa v1.5 Instruct"],
            "train_epochs": 1,
        }
    },

    "full-ft-multi-stage+7b": {
        "model_id": "full-ft-multi-stage+7b",
        "names": ["Finetune ViT (Multi-Stage)"],
        "description": {
            "name": "Finetune ViT (Multi-Stage)",
            "optimization_procedure": "multi-stage-full-finetune",
            "visual_representation": "CLIP ViT-L/14 @ 336px",
            "image_processing": "Letterbox",
            "language_model": "Vicuña v1.5 7B",
            "datasets": ["LLaVa v1.5 Instruct"],
            "train_epochs": 1,
        }
    },
    "full-ft-one-stage+7b": {
        "model_id": "full-ft-one-stage+7b",
        "names": ["Finetune ViT (Single-Stage)"],
        "description": {
            "name": "Finetune ViT (Single-Stage)",
            "optimization_procedure": "single-stage-full-finetune",
            "visual_representation": "CLIP ViT-L/14 @ 336px",
            "image_processing": "Letterbox",
            "language_model": "Vicuña v1.5 7B",
            "datasets": ["LLaVa v1.5 Instruct"],
            "train_epochs": 1,
        }
    },

    # === Section 4.2 :: Image Processing and Visual Representations ===
    "in1k-224px+7b": {
        "model_id": "in1k-224px+7b",
        "names": ["IN1K ViT-L 224px"],
        "description": {
            "name": "IN1K ViT-L 224px",
            "optimization_procedure": "single-stage",
            "visual_representation": "ImageNet-21K+1K ViT-L/16 @ 224px",
            "image_processing": "Letterbox",
            "language_model": "Vicuña v1.5 7B",
            "datasets": ["LLaVa v1.5 Instruct"],
            "train_epochs": 1,
        },
    },
    "dinov2-224px+7b": {
        "model_id": "dinov2-224px+7b",
        "names": ["DINOv2 ViT-L 224px"],
        "description": {
            "name": "DINOv2 ViT-L 224px",
            "optimization_procedure": "single-stage",
            "visual_representation": "DINOv2 ViT-L/14 @ 224px",
            "image_processing": "Letterbox",
            "language_model": "Vicuña v1.5 7B",
            "datasets": ["LLaVa v1.5 Instruct"],
            "train_epochs": 1,
        },
    },
    "clip-224px+7b": {
        "model_id": "clip-224px+7b",
        "names": ["CLIP ViT-L 224px"],
        "description": {
            "name": "CLIP ViT-L 224px",
            "optimization_procedure": "single-stage",
            "visual_representation": "CLIP ViT-L/14 @ 224px",
            "image_processing": "Letterbox",
            "language_model": "Vicuña v1.5 7B",
            "datasets": ["LLaVa v1.5 Instruct"],
            "train_epochs": 1,
        },
    },
    "siglip-224px+7b": {
        "model_id": "siglip-224px+7b",
        "names": ["SigLIP ViT-SO 224px"],
        "description": {
            "name": "SigLIP ViT-SO 224px",
            "optimization_procedure": "single-stage",
            "visual_representation": "SigLIP ViT-SO/14 @ 224px",
            "image_processing": "Letterbox",
            "language_model": "Vicuña v1.5 7B",
            "datasets": ["LLaVa v1.5 Instruct"],
            "train_epochs": 1,
        },
    },

    "clip-336px-resize-crop+7b": {
        "model_id": "clip-336px-resize-crop+7b",
        "names": ["CLIP ViT-L 336px (Resize Crop)"],
        "description": {
            "name": "CLIP ViT-L 336px (Resize Crop)",
            "optimization_procedure": "single-stage",
            "visual_representation": "CLIP ViT-L/14 @ 336px",
            "image_processing": "Resize Crop",
            "language_model": "Vicuña v1.5 7B",
            "datasets": ["LLaVa v1.5 Instruct"],
            "train_epochs": 1,
        }
    },
    "clip-336px-resize-naive+7b": {
        "model_id": "clip-336px-resize-naive+7b",
        "names": ["CLIP ViT-L 336px (Naive Resize)", "CLIP 336px (Naive Resize)"],
        "description": {
            "name": "CLIP ViT-L 336px (Naive Resize)",
            "optimization_procedure": "single-stage",
            "visual_representation": "CLIP ViT-L/14 @ 336px",
            "image_processing": "Naive Resize",
            "language_model": "Vicuña v1.5 7B",
            "datasets": ["LLaVa v1.5 Instruct"],
            "train_epochs": 1,
        }
    },
    "siglip-384px-letterbox+7b": {
        "model_id": "siglip-384px-letterbox+7b",
        "names": ["SigLIP ViT-SO 384px (Letterbox)", "SigLIP ViT-SO 384px"],
        "description": {
            "name": "SigLIP ViT-SO 384px (Letterbox)",
            "optimization_procedure": "single-stage",
            "visual_representation": "SigLIP ViT-SO/14 @ 384px",
            "image_processing": "Letterbox",
            "language_model": "Vicuña v1.5 7B",
            "datasets": ["LLaVa v1.5 Instruct"],
            "train_epochs": 1,
        }
    },
    "siglip-384px-resize-crop+7b": {
        "model_id": "siglip-384px-resize-crop+7b",
        "names": ["SigLIP ViT-SO 384px (Resize Crop)"],
        "description": {
            "name": "SigLIP ViT-SO 384px (Resize Crop)",
            "optimization_procedure": "single-stage",
            "visual_representation": "SigLIP ViT-SO/14 @ 384px",
            "image_processing": "Resize Crop",
            "language_model": "Vicuña v1.5 7B",
            "datasets": ["LLaVa v1.5 Instruct"],
            "train_epochs": 1,
        }
    },
    "siglip-384px-resize-naive+7b": {
        "model_id": "siglip-384px-resize-naive+7b",
        "names": ["SigLIP ViT-SO 384px (Naive Resize)", "SigLIP 384px (Naive Resize)"],
        "description": {
            "name": "SigLIP ViT-SO 384px (Naive Resize)",
            "optimization_procedure": "single-stage",
            "visual_representation": "SigLIP ViT-SO/14 @ 384px",
            "image_processing": "Naive Resize",
            "language_model": "Vicuña v1.5 7B",
            "datasets": ["LLaVa v1.5 Instruct"],
            "train_epochs": 1,
        }
    },

    "dinoclip-336px-letterbox+7b": {
        "model_id": "dinoclip-336px-letterbox+7b",
        "names": ["DINOv2 + CLIP 336px (Letterbox)"],
        "description": {
            "name": "DINOv2 + CLIP 336px (Letterbox)",
            "optimization_procedure": "single-stage",
            "visual_representation": "DINOv2 ViT-L/14 + CLIP ViT-L/14 @ 336px",
            "image_processing": "Letterbox",
            "language_model": "Vicuña v1.5 7B",
            "datasets": ["LLaVa v1.5 Instruct"],
            "train_epochs": 1,
        }
    },
    "dinoclip-336px-resize-naive+7b": {
        "model_id": "dinoclip-336px-resize-naive+7b",
        "names": ["DINOv2 + CLIP 336px (Naive Resize)"],
        "description": {
            "name": "DINOv2 + CLIP 336px (Naive Resize)",
            "optimization_procedure": "single-stage",
            "visual_representation": "DINOv2 ViT-L/14 + CLIP ViT-L/14 @ 336px",
            "image_processing": "Naive Resize",
            "language_model": "Vicuña v1.5 7B",
            "datasets": ["LLaVa v1.5 Instruct"],
            "train_epochs": 1,
        }
    },
    "dinosiglip-384px-letterbox+7b": {
        "model_id": "dinosiglip-384px-letterbox+7b",
        "names": ["DINOv2 + SigLIP 384px (Letterbox)"],
        "description": {
            "name": "DINOv2 + SigLIP 384px (Letterbox)",
            "optimization_procedure": "single-stage",
            "visual_representation": "DINOv2 ViT-L/14 + SigLIP ViT-L/14 @ 384px",
            "image_processing": "Letterbox",
            "language_model": "Vicuña v1.5 7B",
            "datasets": ["LLaVa v1.5 Instruct"],
            "train_epochs": 1,
        }
    },
    "dinosiglip-384px-resize-naive+7b": {
        "model_id": "dinosiglip-384px-resize-naive+7b",
        "names": ["DINOv2 + SigLIP 384px (Naive Resize)"],
        "description": {
            "name": "DINOv2 + SigLIP 384px (Naive Resize)",
            "optimization_procedure": "single-stage",
            "visual_representation": "DINOv2 ViT-L/14 + SigLIP ViT-L/14 @ 384px",
            "image_processing": "Naive Resize",
            "language_model": "Vicuña v1.5 7B",
            "datasets": ["LLaVa v1.5 Instruct"],
            "train_epochs": 1,
        }
    },

    # === Section 4.3 :: Language Models ===
    "llama2+7b": {
        "model_id": "llama2+7b",
        "names": ["Llama-2 7B"],
        "description": {
            "name": "Llama-2 7B",
            "optimization_procedure": "single-stage",
            "visual_representation": "CLIP ViT-L/14 @ 336px",
            "image_processing": "Letterbox",
            "language_model": "Llama-2 7B",
            "datasets": ["LLaVa v1.5 Instruct"],
            "train_epochs": 1,
        },
    },
    "llama2+13b": {
        "model_id": "llama2+13b",
        "names": ["Llama-2 13B"],
        "description": {
            "name": "Llama-2 13B",
            "optimization_procedure": "single-stage",
            "visual_representation": "CLIP ViT-L/14 @ 336px",
            "image_processing": "Letterbox",
            "language_model": "Llama-2 13B",
            "datasets": ["LLaVa v1.5 Instruct"],
            "train_epochs": 1,
        },
    },

    "vicuna-no-cotraining+7b": {
        "model_id": "vicuna-no-cotraining+7b",
        "names": ["Vicuña v1.5 7B (No Co-training)"],
        "description": {
            "name": "Vicuña v1.5 7B (No Co-training)",
            "optimization_procedure": "single-stage",
            "visual_representation": "CLIP ViT-L/14 @ 336px",
            "image_processing": "Letterbox",
            "language_model": "Vicuña v1.5 7B",
            "datasets": ["LLaVa v1.5 Multimodal-Only"],
            "train_epochs": 1,
        },
    },
    "llama2-no-cotraining+7b": {
        "model_id": "llama2-no-cotraining+7b",
        "names": ["Llama-2 7B (No Co-training)"],
        "description": {
            "name": "Llama-2 7B (No Co-training)",
            "optimization_procedure": "single-stage",
            "visual_representation": "CLIP ViT-L/14 @ 336px",
            "image_processing": "Letterbox",
            "language_model": "Llama-2 7B",
            "datasets": ["LLaVa v1.5 Multimodal-Only"],
            "train_epochs": 1,
        },
    },

    # === Section 4.4 :: Scaling Properties ===
    "train-1.25-epochs+7b": {
        "model_id": "train-1.25-epochs+7b",
        "names": ["1.25 Epochs"],
        "description": {
            "name": "1.25 Epochs",
            "optimization_procedure": "single-stage",
            "visual_representation": "CLIP ViT-L/14 @ 336px",
            "image_processing": "Letterbox",
            "language_model": "Vicuña v1.5 7B",
            "datasets": ["LLaVa v1.5 Instruct"],
            "train_epochs": 1.25,
        }
    },
    "train-1.5-epochs+7b": {
        "model_id": "train-1.5-epochs+7b",
        "names": ["1.5 Epochs"],
        "description": {
            "name": "1.5 Epochs",
            "optimization_procedure": "single-stage",
            "visual_representation": "CLIP ViT-L/14 @ 336px",
            "image_processing": "Letterbox",
            "language_model": "Vicuña v1.5 7B",
            "datasets": ["LLaVa v1.5 Instruct"],
            "train_epochs": 1.5,
        }
    },
    "train-2-epochs+7b": {
        "model_id": "train-2-epochs+7b",
        "names": ["2 Epochs"],
        "description": {
            "name": "2 Epochs",
            "optimization_procedure": "single-stage",
            "visual_representation": "CLIP ViT-L/14 @ 336px",
            "image_processing": "Letterbox",
            "language_model": "Vicuña v1.5 7B",
            "datasets": ["LLaVa v1.5 Instruct"],
            "train_epochs": 2,
        }
    },
    "train-3-epochs+7b": {
        "model_id": "train-3-epochs+7b",
        "names": ["3 Epochs"],
        "description": {
            "name": "3 Epochs",
            "optimization_procedure": "single-stage",
            "visual_representation": "CLIP ViT-L/14 @ 336px",
            "image_processing": "Letterbox",
            "language_model": "Vicuña v1.5 7B",
            "datasets": ["LLaVa v1.5 Instruct"],
            "train_epochs": 3,
        }
    },

    "llava-lvis4v+7b": {
        "model_id": "llava-lvis4v+7b",
        "names": ["Base + LVIS-4V"],
        "description": {
            "name": "Base + LVIS-4V",
            "optimization_procedure": "single-stage",
            "visual_representation": "CLIP ViT-L/14 @ 336px",
            "image_processing": "Letterbox",
            "language_model": "Vicuña v1.5 7B",
            "datasets": ["LLaVa v1.5 Instruct", "LVIS-Instruct-4V"],
            "train_epochs": 1,
        }
    },
    "llava-lrv+7b": {
        "model_id": "llava-lrv+7b",
        "names": ["Base + LRV"],
        "description": {
            "name": "Base + LRV",
            "optimization_procedure": "single-stage",
            "visual_representation": "CLIP ViT-L/14 @ 336px",
            "image_processing": "Letterbox",
            "language_model": "Vicuña v1.5 7B",
            "datasets": ["LLaVa v1.5 Instruct", "LRV-Instruct"],
            "train_epochs": 1,
        }
    },
    "llava-lvis4v-lrv+7b": {
        "model_id": "llava-lvis4v-lrv+7b",
        "names": ["Base + LVIS-4V + LRV"],
        "description": {
            "name": "Base + LVIS-4V + LRV",
            "optimization_procedure": "single-stage",
            "visual_representation": "CLIP ViT-L/14 @ 336px",
            "image_processing": "Letterbox",
            "language_model": "Vicuña v1.5 7B",
            "datasets": ["LLaVa v1.5 Instruct", "LVIS-Instruct-4V", "LRV-Instruct"],
            "train_epochs": 1,
        }
    },

    # ===

    # === CLIP Prism Models ===
    "prism-clip-controlled+7b": {
        "model_id": "prism-clip-controlled+7b",
        "names": ["Prism-CLIP 7B (Controlled)"],
        "description": {
            "name": "CLIP Prism 7B (Controlled)",
            "optimization_procedure": "single-stage",
            "visual_representation": "CLIP ViT-L/14 @ 336px",
            "image_processing": "Naive Resize",
            "language_model": "Llama-2 7B",
            "datasets": ["LLaVa v1.5 Instruct"],
            "train_epochs": 1,
        }
    },
    "prism-clip-controlled+13b": {
        "model_id": "prism-clip-controlled+13b",
        "names": ["Prism-CLIP 13B (Controlled)"],
        "description": {
            "name": "CLIP Prism 13B (Controlled)",
            "optimization_procedure": "single-stage",
            "visual_representation": "CLIP ViT-L/14 @ 336px",
            "image_processing": "Naive Resize",
            "language_model": "Llama-2 13B",
            "datasets": ["LLaVa v1.5 Instruct"],
            "train_epochs": 1,
        }
    },
    "prism-clip+7b": {
        "model_id": "prism-clip+7b",
        "names": ["Prism-CLIP 7B"],
        "description": {
            "name": "CLIP Prism 7B",
            "optimization_procedure": "single-stage",
            "visual_representation": "CLIP ViT-L/14 @ 336px",
            "image_processing": "Naive Resize",
            "language_model": "Llama-2 7B",
            "datasets": ["LLaVa v1.5 Instruct", "LVIS-Instruct-4V", "LRV-Instruct"],
            "train_epochs": 2,
        },
    },
    "prism-clip+13b": {
        "model_id": "prism-clip+13b",
        "names": ["Prism-CLIP 13B"],
        "description": {
            "name": "CLIP Prism 13B",
            "optimization_procedure": "single-stage",
            "visual_representation": "CLIP ViT-L/14 @ 336px",
            "image_processing": "Naive Resize",
            "language_model": "Llama-2 13B",
            "datasets": ["LLaVa v1.5 Instruct", "LVIS-Instruct-4V", "LRV-Instruct"],
            "train_epochs": 2,
        },
    },

    # === SigLIP Prism Models ==
    "prism-siglip-controlled+7b": {
        "model_id": "prism-siglip-controlled+7b",
        "names": ["Prism-SigLIP 7B (Controlled)"],
        "description": {
            "name": "SigLIP Prism 7B (Controlled)",
            "optimization_procedure": "single-stage",
            "visual_representation": "SigLIP ViT-SO/14 @ 384px",
            "image_processing": "Naive Resize",
            "language_model": "Llama-2 7B",
            "datasets": ["LLaVa v1.5 Instruct"],
            "train_epochs": 1,
        }
    },
    "prism-siglip-controlled+13b": {
        "model_id": "prism-siglip-controlled+7b",
        "names": ["Prism-SigLIP 13B (Controlled)"],
        "description": {
            "name": "SigLIP Prism 13B (Controlled)",
            "optimization_procedure": "single-stage",
            "visual_representation": "SigLIP ViT-SO/14 @ 384px",
            "image_processing": "Naive Resize",
            "language_model": "Llama-2 13B",
            "datasets": ["LLaVa v1.5 Instruct"],
            "train_epochs": 1,
        }
    },
    "prism-siglip+7b": {
        "model_id": "prism-siglip+7b",
        "names": ["Prism-SigLIP 7B"],
        "description": {
            "name": "SigLIP Prism 7B",
            "optimization_procedure": "single-stage",
            "visual_representation": "SigLIP ViT-SO/14 @ 384px",
            "image_processing": "Naive Resize",
            "language_model": "Llama-2 7B",
            "datasets": ["LLaVa v1.5 Instruct", "LVIS-Instruct-4V", "LRV-Instruct"],
            "train_epochs": 2,
        }
    },
    "prism-siglip+13b": {
        "model_id": "prism-siglip+13b",
        "names": ["Prism-SigLIP 13B"],
        "description": {
            "name": "SigLIP Prism 13B",
            "optimization_procedure": "single-stage",
            "visual_representation": "SigLIP ViT-SO/14 @ 384px",
            "image_processing": "Naive Resize",
            "language_model": "Llama-2 13B",
            "datasets": ["LLaVa v1.5 Instruct", "LVIS-Instruct-4V", "LRV-Instruct"],
            "train_epochs": 2,
        }
    },

    # === DINOSigLIP Prism Models ===
    "prism-dinosiglip-controlled+7b": {
        "model_id": "prism-dinosiglip-controlled+7b",
        "names": ["Prism-DINOSigLIP 7B (Controlled)", "Prism 7B (Controlled)"],
        "description": {
            "name": "DINOSigLIP Prism 7B (Controlled)",
            "optimization_procedure": "single-stage",
            "visual_representation": "DINOv2 ViT-L/14 + SigLIP ViT-SO/14 @ 384px",
            "image_processing": "Naive Resize",
            "language_model": "Llama-2 7B",
            "datasets": ["LLaVa v1.5 Instruct"],
            "train_epochs": 1,
        }
    },
    "prism-dinosiglip-controlled+13b": {
        "model_id": "prism-dinosiglip-controlled+13b",
        "names": ["Prism-DINOSigLIP 13B (Controlled)", "Prism 13B (Controlled)"],
        "description": {
            "name": "DINOSigLIP Prism 13B (Controlled)",
            "optimization_procedure": "single-stage",
            "visual_representation": "DINOv2 ViT-L/14 + SigLIP ViT-SO/14 @ 384px",
            "image_processing": "Naive Resize",
            "language_model": "Llama-2 13B",
            "datasets": ["LLaVa v1.5 Instruct"],
            "train_epochs": 1,
        }
    },
    "prism-dinosiglip+7b": {
        "model_id": "prism-dinosiglip+7b",
        "names": ["Prism-DINOSigLIP 7B"],
        "description": {
            "name": "DINOSigLIP Prism 7B",
            "optimization_procedure": "single-stage",
            "visual_representation": "DINOv2 ViT-L/14 + SigLIP ViT-SO/14 @ 384px",
            "image_processing": "Naive Resize",
            "language_model": "Llama-2 7B",
            "datasets": ["LLaVa v1.5 Instruct", "LVIS-Instruct-4V", "LRV-Instruct"],
            "train_epochs": 2,
        },
    },
    "prism-dinosiglip+13b": {
        "model_id": "prism-dinosiglip+13b",
        "names": ["Prism-DINOSigLIP 13B"],
        "description": {
            "name": "DINOSigLIP Prism 13B",
            "optimization_procedure": "single-stage",
            "visual_representation": "DINOv2 ViT-L/14 + SigLIP ViT-SO/14 @ 384px",
            "image_processing": "Naive Resize",
            "language_model": "Llama-2 13B",
            "datasets": ["LLaVa v1.5 Instruct", "LVIS-Instruct-4V", "LRV-Instruct"],
            "train_epochs": 2,
        },
    },

    # === DINOSigLIP 224px Prism Models ===
    "prism-dinosiglip-224px-controlled+7b": {
        "model_id": "prism-dinosiglip-224px-controlled+7b",
        "names": ["Prism-DINOSigLIP 224px 7B (Controlled)"],
        "description": {
            "name": "DINOSigLIP 224px 7B (Controlled)",
            "optimization_procedure": "single-stage",
            "visual_representation": "DINOv2 ViT-L/14 + SigLIP ViT-SO 14 @ 224px",
            "image_processing": "Naive Resize",
            "language_model": "Llama-2 7B",
            "datasets": ["LLaVa v1.5 Instruct"],
            "train_epochs": 1,
        }
    },
    "prism-dinosiglip-224px+7b": {
        "model_id": "prism-dinosiglip-224px+7b",
        "names": ["Prism-DINOSigLIP 224px 7B"],
        "description": {
            "name": "DINOSigLIP 224px 7B",
            "optimization_procedure": "single-stage",
            "visual_representation": "DINOv2 ViT-L/14 + SigLIP ViT-SO 14 @ 224px",
            "image_processing": "Naive Resize",
            "language_model": "Llama-2 7B",
            "datasets": ["LLaVa v1.5 Instruct", "LVIS-Instruct-4V", "LRV-Instruct"],
            "train_epochs": 2,
        }
    },

    # === Additional LLM Backbones ===
    "llama2-chat+7b": {
        "model_id": "llama2-chat+7b",
        "names": ["Llama-2 Chat 7B"],
        "description": {
            "name": "Llama-2 Chat 7B",
            "optimization_procedure": "single-stage",
            "visual_representation": "CLIP ViT-L/14 @ 336px",
            "image_processing": "Letterbox",
            "language_model": "Llama-2 Chat 7B",
            "datasets": ["LLaVa v1.5 Instruct"],
            "train_epochs": 1,
        }
    },
    "llama2-chat+13b": {
        "model_id": "llama2-chat+13b",
        "names": ["Llama-2 Chat 13B"],
        "description": {
            "name": "Llama-2 Chat 13B",
            "optimization_procedure": "single-stage",
            "visual_representation": "CLIP ViT-L/14 @ 336px",
            "image_processing": "Letterbox",
            "language_model": "Llama-2 Chat 13B",
            "datasets": ["LLaVa v1.5 Instruct"],
            "train_epochs": 1,
        }
    },
    "mistral-v0.1+7b": {
        "model_id": "mistral-v0.1+7b",
        "names": ["Mistral v0.1 7B"],
        "description": {
            "name": "Mistral v0.1 7B",
            "optimization_procedure": "single-stage",
            "visual_representation": "CLIP ViT-L/14 @ 336px",
            "image_processing": "Letterbox",
            "language_model": "Mistral v0.1 7B",
            "datasets": ["LLaVa v1.5 Instruct"],
            "train_epochs": 1,
        }
    },
    "mistral-instruct-v0.1+7b": {
        "model_id": "mistral-instruct-v0.1+7b",
        "names": ["Mistral Instruct v0.1 7B"],
        "description": {
            "name": "Mistral Instruct v0.1 7B",
            "optimization_procedure": "single-stage",
            "visual_representation": "CLIP ViT-L/14 @ 336px",
            "image_processing": "Letterbox",
            "language_model": "Mistral Instruct v0.1 7B",
            "datasets": ["LLaVa v1.5 Instruct"],
            "train_epochs": 1,
        }
    },
    "phi-2+3b": {
        "model_id": "phi-2+3b",
        "names": ["Phi-2 3B"],
        "description": {
            "name": "Phi-2 3B",
            "optimization_procedure": "single-stage",
            "visual_representation": "CLIP ViT-L/14 @ 336px",
            "image_processing": "Letterbox",
            "language_model": "Phi-2 3B",
            "datasets": ["LLaVa v1.5 Instruct"],
            "train_epochs": 1,
        }
    },
}

# Build Global Registry (Model ID, Name) -> Metadata
GLOBAL_REGISTRY = {name: v for k, v in MODEL_REGISTRY.items() for name in [k] + v["names"]}

# fmt: on
