{
    "embed_dim": 512,
    "vision_cfg": {
        "image_size": 224,
        "layers": 12,
        "width": 768,
        "patch_size": 32,
        "output_tokens": true
    },
    "text_cfg": {
        "hf_model_name": "roberta-base",
        "hf_tokenizer_name": "roberta-base",
        "proj": "linear",
        "width": 768,
        "output_tokens": true
    },
    "multimodal_cfg": {
        "context_length": 76,
        "width": 768,
        "heads": 8,
        "layers": 12
    },
    "custom_text": true
}
