{
    "embed_dim": 512,
    "vision_cfg": {
        "image_size": 224,
        "layers": 12,
        "width": 768,
        "patch_size": 32
    },
    "text_cfg": {
        "hf_model_name": "xlm-roberta-base",
        "hf_tokenizer_name": "xlm-roberta-base",
        "hf_pooler_type": "mean_pooler"
    }
}
