{
    "model_id": {
        "value": "DINO-MM",
        "confidence": 0.9522
    },
    "model_name": {
        "value": "DINO-MM",
        "confidence": 1.0
    },
    "version": {
        "value": null,
        "confidence": 0.0
    },
    "release_date": {
        "value": "2022-06-14",
        "confidence": 1.0
    },
    "last_updated": {
        "value": "2022-06-14",
        "confidence": 0.9888
    },
    "short_description": {
        "value": "DINO-MM is a self-supervised vision transformer model for joint SAR-optical representation learning in remote sensing. It extends the DINO SSL algorithm with a RandomSensorDrop augmentation to enable flexible unimodal and multimodal learning.",
        "confidence": 0.8181
    },
    "paper_link": {
        "value": "https://arxiv.org/abs/2204.05381",
        "confidence": 1.0
    },
    "citations": {
        "value": null,
        "confidence": 0.0
    },
    "repository": {
        "value": "https://github.com/zhu-xlab/DINO-MM",
        "confidence": 1.0
    },
    "weights": {
        "value": null,
        "confidence": 0.0
    },
    "backbone": {
        "value": "ViT-S/8",
        "confidence": 0.9997
    },
    "num_layers": {
        "value": null,
        "confidence": 0.0
    },
    "num_parameters": {
        "value": null,
        "confidence": 0.0
    },
    "pretext_training_type": {
        "value": "Contrastive self-supervised learning with knowledge distillation (DINO)",
        "confidence": 0.7506
    },
    "masking_strategy": {
        "value": "RandomSensorDrop: randomly masks out SAR or optical channels of the concatenated input image",
        "confidence": 0.8787
    },
    "pretraining": {
        "value": "Self-supervised pretraining using DINO with vision transformer backbone and RandomSensorDrop for multimodal SAR-optical data.",
        "confidence": 0.6821
    },
    "domain_knowledge": {
        "value": [],
        "confidence": 0.0
    },
    "backbone_modifications": {
        "value": [
            {
                "value": "Added RandomSensorDrop augmentation module",
                "confidence": 0.5653
            }
        ]
    },
    "supported_sensors": {
        "value": [
            {
                "value": "Sentinel-1",
                "confidence": 1.0
            },
            {
                "value": "Sentinel-2",
                "confidence": 1.0
            }
        ]
    },
    "modality_integration_type": {
        "value": "Homogeneous Multimodal",
        "confidence": 0.9981
    },
    "modalities": {
        "value": [
            {
                "value": "SAR",
                "confidence": 0.9311
            },
            {
                "value": "Multispectral",
                "confidence": 0.9874
            }
        ]
    },
    "spectral_alignment": {
        "value": "none",
        "confidence": 0.9469
    },
    "temporal_alignment": {
        "value": "none",
        "confidence": 1.0
    },
    "spatial_resolution": {
        "value": "10m",
        "confidence": 1.0
    },
    "temporal_resolution": {
        "value": null,
        "confidence": 0.0
    },
    "bands": {
        "value": [
            {
                "value": "SAR",
                "confidence": 0.5627
            },
            {
                "value": "Optical",
                "confidence": 0.4646
            }
        ]
    },
    "pretraining_phases": [
        {
            "dataset": {
                "value": "BigEarthNet-MM",
                "confidence": 1.0
            },
            "regions_coverage": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "time_range": {
                "value": null,
                "confidence": 0.0
            },
            "num_images": {
                "value": 311667,
                "confidence": 0.9998
            },
            "token_size": {
                "value": null,
                "confidence": 0.0
            },
            "image_resolution": {
                "value": "120x120",
                "confidence": 0.9999
            },
            "epochs": {
                "value": 100,
                "confidence": 1.0
            },
            "batch_size": {
                "value": 256,
                "confidence": 1.0
            },
            "learning_rate": {
                "value": "linearly ramped up to 0.0005 over first 10 epochs, then cosine decay",
                "confidence": 0.3442
            },
            "augmentations": {
                "value": [
                    {
                        "value": "RandomResizedCrop",
                        "confidence": 0.8848
                    },
                    {
                        "value": "RandomHorizontalFlip",
                        "confidence": 1.0
                    },
                    {
                        "value": "RandomColorJitter",
                        "confidence": 1.0
                    },
                    {
                        "value": "RandomGrayscale",
                        "confidence": 1.0
                    },
                    {
                        "value": "RandomGaussianBlur",
                        "confidence": 1.0
                    },
                    {
                        "value": "RandomSolarize",
                        "confidence": 1.0
                    },
                    {
                        "value": "RandomSensorDrop",
                        "confidence": 0.9952
                    }
                ]
            },
            "processing": {
                "value": [
                    {
                        "value": "Resampled to 10m spatial resolution",
                        "confidence": 0.4242
                    },
                    {
                        "value": "Cloud and snow artifact filtering",
                        "confidence": 0.3248
                    }
                ]
            },
            "sampling": {
                "value": null,
                "confidence": 0.0
            },
            "processing_level": {
                "value": null,
                "confidence": 0.0
            },
            "cloud_cover": {
                "value": "Cloud and snow artifact filtering",
                "confidence": 0.3341
            },
            "missing_data": {
                "value": null,
                "confidence": 0.0
            },
            "masking_ratio": {
                "value": null,
                "confidence": 0.0
            }
        }
    ],
    "benchmarks": [
        {
            "task": {
                "value": "Scene classification",
                "confidence": 0.6007
            },
            "application": {
                "value": "Remote sensing multimodal (SAR-optical) image classification",
                "confidence": 0.3068
            },
            "dataset": {
                "value": "BigEarthNet-MM",
                "confidence": 1.0
            },
            "metrics": {
                "value": [
                    {
                        "value": "mean average precision",
                        "confidence": 0.8893
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 79.5,
                        "confidence": 0.8439
                    },
                    {
                        "value": 87.1,
                        "confidence": 1.3
                    },
                    {
                        "value": 87.1,
                        "confidence": 1.3
                    },
                    {
                        "value": 75.3,
                        "confidence": 1.0
                    },
                    {
                        "value": 82.9,
                        "confidence": 1.0
                    },
                    {
                        "value": 82.8,
                        "confidence": 1.0
                    }
                ]
            },
            "sensor": {
                "value": [
                    {
                        "value": "Sentinel-1",
                        "confidence": 1.0
                    },
                    {
                        "value": "Sentinel-2",
                        "confidence": 1.0
                    }
                ]
            },
            "regions": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "original_samples": {
                "value": 590326,
                "confidence": 0.9912
            },
            "num_samples": {
                "value": 118065,
                "confidence": 0.8762
            },
            "sampling_percentage": {
                "value": null,
                "confidence": 0.0
            },
            "num_classes": {
                "value": null,
                "confidence": 0.0
            },
            "classes": {
                "value": []
            },
            "image_resolution": {
                "value": "120x120",
                "confidence": 1.0
            },
            "spatial_resolution": {
                "value": "10m",
                "confidence": 1.0
            },
            "bands_used": {
                "value": [
                    {
                        "value": "SAR",
                        "confidence": 0.9997
                    },
                    {
                        "value": "Multispectral",
                        "confidence": 0.9958
                    }
                ]
            },
            "augmentations": {
                "value": [
                    {
                        "value": "RandomResizedCrop",
                        "confidence": 0.9689
                    },
                    {
                        "value": "RandomHorizontalFlip",
                        "confidence": 1.0
                    },
                    {
                        "value": "RandomColorJitter",
                        "confidence": 1.0
                    },
                    {
                        "value": "RandomGrayscale",
                        "confidence": 1.0
                    },
                    {
                        "value": "RandomGaussianBlur",
                        "confidence": 1.0
                    },
                    {
                        "value": "RandomSolarize",
                        "confidence": 1.0
                    },
                    {
                        "value": "RandomSensorDrop",
                        "confidence": 1.0
                    }
                ]
            },
            "optimizer": {
                "value": "SGD (linear classifier), AdamW (supervised baseline)",
                "confidence": 0.3282
            },
            "batch_size": {
                "value": 256,
                "confidence": 1.0
            },
            "learning_rate": {
                "value": 0.01,
                "confidence": 1.0
            },
            "epochs": {
                "value": 100,
                "confidence": 1.0
            },
            "loss_function": {
                "value": "torch.nn.MultiLabelSoftMarginLoss (supervised baseline)",
                "confidence": 0.4035
            },
            "split_ratio": {
                "value": "311667 train / 103944 val / 118065 test",
                "confidence": 0.5136
            }
        }
    ]
}