model,image_size,image_width,text_width,embed_dim,mparams,image_mparams,text_mparams,gflops,image_gflops,text_gflops
ViT-S-32-alt,224,384,256,256,43.22,22.59,20.63,3.56,2.29,1.27
ViT-S-32,224,384,384,384,63.09,22.64,40.44,5.66,2.29,3.38
ViT-M-32-alt,224,512,384,384,80.07,39.63,40.44,7.37,3.99,3.38
ViT-M-32,224,512,512,512,103.12,39.69,63.43,9.95,3.99,5.96
ViT-S-16-alt,224,384,256,256,42.4,21.76,20.63,10.47,9.2,1.27
ViT-S-16,224,384,384,384,62.26,21.81,40.44,12.58,9.2,3.38
ViT-B-32,224,768,512,512,151.28,87.85,63.43,14.78,8.82,5.96
ViT-B-32-quickgelu,224,768,512,512,151.28,87.85,63.43,14.78,8.82,5.96
convnext_tiny,224,768,512,1024,92.3,28.61,63.69,14.87,8.91,5.96
ViT-B-32-256,256,768,512,512,151.29,87.86,63.43,17.46,11.5,5.96
RN50,224,64,512,1024,102.01,38.32,63.69,18.18,12.22,5.96
RN50-quickgelu,224,64,512,1024,102.01,38.32,63.69,18.18,12.22,5.96
ViT-M-16-alt,224,512,384,384,78.98,38.53,40.44,19.36,15.98,3.38
ViT-M-16,224,512,512,512,102.02,38.59,63.43,21.94,15.98,5.96
vit_relpos_medium_patch16_cls_224,224,768,512,512,101.94,38.51,63.43,21.99,16.03,5.96
mt5-base-ViT-B-32,224,768,512,512,365.71,87.85,277.86,22.12,8.82,13.3
convnext_small,224,768,512,512,113.28,49.85,63.43,23.33,17.37,5.96
ViT-B-32-plus-256,256,896,640,640,210.3,119.13,91.16,24.83,15.56,9.27
RN101,224,64,512,512,119.69,56.26,63.43,25.5,19.54,5.96
RN101-quickgelu,224,64,512,512,119.69,56.26,63.43,25.5,19.54,5.96
vit_medium_patch16_gap_256,256,768,512,512,102.04,38.61,63.43,27.1,21.14,5.96
coca_ViT-B-32,224,768,512,512,253.56,89.16,63.43,33.34,9.19,5.96
convnext_base,224,768,512,512,151.52,88.09,63.43,36.67,30.71,5.96
swin_base_patch4_window7_224,224,768,640,640,178.56,87.4,91.16,40.13,30.86,9.27
ViT-B-16,224,768,512,512,149.62,86.19,63.43,41.09,35.13,5.96
ViT-B-16-quickgelu,224,768,512,512,149.62,86.19,63.43,41.09,35.13,5.96
EVA02-B-16,224,768,512,512,149.69,86.26,63.43,41.09,35.13,5.96
ViT-B-16-SigLIP,224,768,768,768,203.16,92.88,110.27,46.44,35.42,11.02
convnext_base_w,256,768,640,640,179.39,88.22,91.16,49.38,40.11,9.27
RN50x4,288,80,640,640,178.3,87.14,91.16,51.82,42.56,9.27
coca_roberta-ViT-B-32,224,768,768,512,420.37,87.85,124.45,53.12,8.82,13.12
ViT-B-16-plus,224,896,640,640,208.35,117.19,91.16,56.75,47.49,9.27
ViT-B-16-SigLIP-256,256,768,768,768,203.2,92.93,110.27,57.84,46.82,11.02
ViT-B-16-SigLIP-i18n-256,256,768,768,768,370.63,92.93,277.7,57.84,46.82,11.02
ViT-B-16-plus-240,240,896,640,640,208.38,117.21,91.16,64.03,54.76,9.27
convnext_base_w_320,320,768,640,640,179.39,88.22,91.16,71.94,62.67,9.27
convnext_large,224,768,768,768,321.06,197.41,123.65,82.02,68.72,13.3
coca_base,288,768,768,512,440.34,86.4,134.66,99.09,46.47,13.3
roberta-ViT-B-32,224,768,512,512,212.72,87.85,124.87,105.87,8.82,97.05
xlm-roberta-base-ViT-B-32,224,768,512,512,366.12,87.85,278.27,105.87,8.82,97.05
convnext_large_d,256,768,768,768,351.77,199.77,152.0,107.5,89.76,17.73
ViT-B-16-SigLIP-384,384,768,768,768,203.45,93.18,110.27,123.15,112.13,11.02
ViT-L-16,224,1024,768,768,427.74,304.09,123.65,136.41,123.11,13.3
convnext_large_d_320,320,768,768,768,351.77,199.77,152.0,157.98,140.25,17.73
RN50x16,384,96,768,768,290.98,167.33,123.65,162.69,149.39,13.3
ViT-L-14-CLIPA,224,1024,768,768,414.21,303.96,110.25,167.5,162.03,5.47
EVA02-L-14,224,768,768,768,427.76,304.11,123.65,175.3,162.0,13.3
ViT-L-14,224,1024,768,768,427.62,303.97,123.65,175.33,162.03,13.3
ViT-L-14-quickgelu,224,1024,768,768,427.62,303.97,123.65,175.33,162.03,13.3
convnext_xlarge,256,768,1024,1024,653.89,350.25,303.65,198.38,159.14,39.24
ViT-L-16-SigLIP-256,256,768,1024,1024,652.15,315.96,336.19,201.62,162.56,39.06
coca_ViT-L-14,224,1024,768,768,638.45,306.72,123.65,214.52,163.64,13.3
ViT-B-16-SigLIP-512,512,768,768,768,203.79,93.52,110.27,227.26,216.24,11.02
ViT-SO400M-14-SigLIP,224,768,1152,1152,877.36,427.68,449.68,233.54,220.35,13.19
ViT-L-14-280,280,1024,768,768,427.76,304.11,123.65,271.79,258.49,13.3
ViT-L-16-320,320,1024,768,768,427.95,304.3,123.65,271.93,258.63,13.3
ViT-H-16,224,1280,1024,1024,986.26,632.23,354.03,301.72,254.63,47.09
ViT-H-14-CLIPA,224,1280,1024,1024,968.24,632.07,336.16,354.02,334.59,19.43
nllb-clip-base,224,768,512,512,501.89,87.85,414.04,369.6,8.82,360.78
ViT-H-14,224,1280,1024,1024,986.11,632.08,354.03,381.68,334.59,47.09
ViT-H-14-quickgelu,224,1280,1024,1024,986.11,632.08,354.03,381.68,334.59,47.09
ViT-L-14-CLIPA-336,336,1024,768,768,414.54,304.29,110.25,387.39,381.92,5.47
EVA02-L-14-336,336,768,768,768,428.08,304.43,123.65,395.16,381.86,13.3
ViT-L-14-336,336,1024,768,768,427.94,304.29,123.65,395.22,381.92,13.3
ViT-L-16-SigLIP-384,384,768,1024,1024,652.48,316.28,336.19,422.91,383.85,39.06
convnext_xxlarge,256,768,1024,1024,1200.58,846.54,354.03,443.03,395.94,47.09
mt5-xl-ViT-H-14,224,1280,512,1024,2306.75,632.08,1674.68,514.04,334.59,179.45
EVA01-g-14,224,768,768,1024,1136.44,1012.59,123.85,547.36,534.06,13.3
RN50x64,448,128,1024,1024,623.26,420.38,202.88,552.65,529.11,23.55
EVA01-g-14-plus,224,768,1024,1024,1366.62,1012.59,354.03,581.15,534.06,47.09
ViT-g-14,224,1408,1024,1024,1366.68,1012.65,354.03,581.15,534.06,47.09
convnext_xxlarge_320,320,768,1024,1024,1200.58,846.54,354.03,665.74,618.65,47.09
xlm-roberta-large-ViT-H-14,224,1280,512,1024,1193.01,632.08,560.94,671.01,334.59,336.42
ViT-SO400M-14-SigLIP-384,384,768,1152,1152,877.96,428.23,449.73,723.48,670.35,53.13
ViT-H-14-CLIPA-336,336,1280,1024,1024,968.64,632.48,336.16,800.88,781.45,19.43
ViT-bigG-14-CLIPA,224,1664,1280,1280,2517.22,1844.9,672.32,1007.93,967.5,40.44
ViT-H-14-378-quickgelu,378,1280,1024,1024,986.71,632.68,354.03,1054.05,1006.96,47.09
ViT-bigG-14,224,1664,1280,1280,2539.57,1844.91,694.66,1065.36,967.5,97.86
nllb-clip-large,224,1280,512,1024,1399.22,632.08,767.14,1468.46,334.59,1133.87
ViT-e-14,224,1792,1280,1280,4581.09,3807.72,773.37,2091.45,1981.35,110.1
ViT-bigG-14-CLIPA-336,336,1664,1280,1280,2517.76,1845.44,672.32,2271.58,2231.15,40.44
EVA02-E-14,224,768,1024,1024,4704.59,4350.56,354.03,2311.42,2264.33,47.09
EVA02-E-14-plus,224,768,1280,1024,5044.89,4350.56,694.33,2362.19,2264.33,97.86
