{
    "model_id": {
        "value": "SwiMDiff",
        "confidence": 0.9954
    },
    "model_name": {
        "value": "SwiMDiff",
        "confidence": 1.0
    },
    "version": {
        "value": "v1",
        "confidence": 0.5578
    },
    "release_date": {
        "value": "2024-01-10",
        "confidence": 0.9888
    },
    "last_updated": {
        "value": "2024-01-10",
        "confidence": 0.9912
    },
    "short_description": {
        "value": "SwiMDiff is a self-supervised pre-training framework for remote sensing images that integrates scene-wide matching contrastive learning with a diffusion model constraint to enhance both global semantic and fine-grained feature extraction.",
        "confidence": 0.8651
    },
    "paper_link": {
        "value": "https://arxiv.org/abs/2401.05093",
        "confidence": 1.0
    },
    "citations": {
        "value": null,
        "confidence": 0.0
    },
    "repository": {
        "value": null,
        "confidence": 0.0
    },
    "weights": {
        "value": null,
        "confidence": 0.0
    },
    "backbone": {
        "value": "ResNet-18",
        "confidence": 1.0
    },
    "num_layers": {
        "value": null,
        "confidence": 0.0
    },
    "num_parameters": {
        "value": null,
        "confidence": 0.0
    },
    "pretext_training_type": {
        "value": "Self-supervised contrastive learning with scene-wide matching and auxiliary diffusion constraint",
        "confidence": 0.835
    },
    "masking_strategy": {
        "value": "Pixel-level diffusion constraint (Gaussian noise addition)",
        "confidence": 0.6325
    },
    "pretraining": {
        "value": "Joint training of a dual-branch contrastive learning network (MoCo-v2) with a diffusion model (DDPM) using scene-wide matching and pixel-level diffusion constraint.",
        "confidence": 0.6912
    },
    "domain_knowledge": {
        "value": [
            {
                "value": "Scene-wide matching to avoid false negatives from same scene",
                "confidence": 0.5955
            },
            {
                "value": "Diffusion model for pixel-level detail",
                "confidence": 0.5882
            }
        ]
    },
    "backbone_modifications": {
        "value": [
            {
                "value": "Integration of diffusion model features with contrastive encoder features at feature level",
                "confidence": 0.5575
            }
        ]
    },
    "supported_sensors": {
        "value": [
            {
                "value": "Sentinel-2",
                "confidence": 1.0
            }
        ]
    },
    "modality_integration_type": {
        "value": "Unimodal",
        "confidence": 1.0
    },
    "modalities": {
        "value": [
            {
                "value": "Multispectral",
                "confidence": 0.9998
            }
        ]
    },
    "spectral_alignment": {
        "value": "none",
        "confidence": 0.7352
    },
    "temporal_alignment": {
        "value": "none",
        "confidence": 0.9986
    },
    "spatial_resolution": {
        "value": "10m",
        "confidence": 0.9375
    },
    "temporal_resolution": {
        "value": null,
        "confidence": 0.0
    },
    "bands": {
        "value": [
            {
                "value": "RGB",
                "confidence": 0.9792
            },
            {
                "value": "10 Sentinel-2 bands",
                "confidence": 0.4567
            }
        ]
    },
    "pretraining_phases": [
        {
            "dataset": {
                "value": "Sen12MS (subset)",
                "confidence": 0.5358
            },
            "regions_coverage": {
                "value": [
                    {
                        "value": "Global (Sen12MS covers worldwide)",
                        "confidence": 0.3037
                    }
                ]
            },
            "time_range": {
                "value": null,
                "confidence": 0.0
            },
            "num_images": {
                "value": 40000,
                "confidence": 0.9527
            },
            "token_size": {
                "value": null,
                "confidence": 0.0
            },
            "image_resolution": {
                "value": "256x256",
                "confidence": 1.0
            },
            "epochs": {
                "value": 1000,
                "confidence": 1.0
            },
            "batch_size": {
                "value": 256,
                "confidence": 1.0
            },
            "learning_rate": {
                "value": "0.03 (SGD for CL), 0.001 (Adam for diffusion)",
                "confidence": 0.3974
            },
            "augmentations": {
                "value": [
                    {
                        "value": "random cropping",
                        "confidence": 0.7295
                    },
                    {
                        "value": "color jittering",
                        "confidence": 0.9953
                    },
                    {
                        "value": "flipping",
                        "confidence": 0.964
                    }
                ]
            },
            "processing": {
                "value": [
                    {
                        "value": "Atmospheric correction",
                        "confidence": 0.6619
                    }
                ]
            },
            "sampling": {
                "value": "Randomly select 10000 Sentinel-2 RGB images per season (4 seasons)",
                "confidence": 0.309
            },
            "processing_level": {
                "value": null,
                "confidence": 0.0
            },
            "cloud_cover": {
                "value": null,
                "confidence": 0.0
            },
            "missing_data": {
                "value": null,
                "confidence": 0.0
            },
            "masking_ratio": {
                "value": null,
                "confidence": 0.0
            }
        }
    ],
    "benchmarks": [
        {
            "task": {
                "value": "Change detection",
                "confidence": 0.6463
            },
            "application": {
                "value": "Change detection in multispectral satellite imagery",
                "confidence": 0.3171
            },
            "dataset": {
                "value": "OSCD",
                "confidence": 0.9469
            },
            "metrics": {
                "value": [
                    {
                        "value": "Precision",
                        "confidence": 0.9163
                    },
                    {
                        "value": "Recall",
                        "confidence": 1.0
                    },
                    {
                        "value": "F1",
                        "confidence": 1.0
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 63.6,
                        "confidence": 1.0
                    },
                    {
                        "value": 40.9,
                        "confidence": 1.0
                    },
                    {
                        "value": 49.6,
                        "confidence": 1.0
                    }
                ]
            },
            "sensor": {
                "value": [
                    {
                        "value": "Sentinel-2",
                        "confidence": 1.0
                    }
                ]
            },
            "regions": {
                "value": [
                    {
                        "value": "Urban areas (OSCD)",
                        "confidence": 0.3029
                    }
                ]
            },
            "original_samples": {
                "value": null,
                "confidence": 0.0
            },
            "num_samples": {
                "value": null,
                "confidence": 0.0
            },
            "sampling_percentage": {
                "value": null,
                "confidence": 0.0
            },
            "num_classes": {
                "value": 2,
                "confidence": 1.0
            },
            "classes": {
                "value": [
                    {
                        "value": "Change",
                        "confidence": 0.5083
                    },
                    {
                        "value": "No change",
                        "confidence": 0.7304
                    }
                ]
            },
            "image_resolution": {
                "value": "256x256",
                "confidence": 0.5396
            },
            "spatial_resolution": {
                "value": "10m, 20m, 60m",
                "confidence": 0.5638
            },
            "bands_used": {
                "value": [
                    {
                        "value": "RGB",
                        "confidence": 0.9999
                    }
                ]
            },
            "augmentations": {
                "value": [
                    {
                        "value": "random horizontal flip",
                        "confidence": 0.7705
                    },
                    {
                        "value": "90 degree rotation",
                        "confidence": 0.5139
                    }
                ]
            },
            "optimizer": {
                "value": "Adam",
                "confidence": 1.0
            },
            "batch_size": {
                "value": 32,
                "confidence": 1.0
            },
            "learning_rate": {
                "value": 0.001,
                "confidence": 1.0
            },
            "epochs": {
                "value": 100,
                "confidence": 1.0
            },
            "loss_function": {
                "value": "BCE Loss",
                "confidence": 0.7901
            },
            "split_ratio": {
                "value": null,
                "confidence": 0.0
            }
        },
        {
            "task": {
                "value": "Change detection",
                "confidence": 1.0
            },
            "application": {
                "value": "Change detection in very high-resolution bitemporal imagery",
                "confidence": 0.3133
            },
            "dataset": {
                "value": "LEVIR-CD",
                "confidence": 0.9996
            },
            "metrics": {
                "value": [
                    {
                        "value": "Precision",
                        "confidence": 1.0
                    },
                    {
                        "value": "Recall",
                        "confidence": 1.0
                    },
                    {
                        "value": "F1",
                        "confidence": 1.0
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 83.6,
                        "confidence": 1.0
                    },
                    {
                        "value": 78.3,
                        "confidence": 1.0
                    },
                    {
                        "value": 80.9,
                        "confidence": 1.0
                    }
                ]
            },
            "sensor": {
                "value": [
                    {
                        "value": "Google Earth",
                        "confidence": 0.9968
                    }
                ]
            },
            "regions": {
                "value": [
                    {
                        "value": "Texas, USA",
                        "confidence": 0.9641
                    }
                ]
            },
            "original_samples": {
                "value": 637,
                "confidence": 0.9998
            },
            "num_samples": {
                "value": null,
                "confidence": 0.0
            },
            "sampling_percentage": {
                "value": null,
                "confidence": 0.0
            },
            "num_classes": {
                "value": 2,
                "confidence": 1.0
            },
            "classes": {
                "value": [
                    {
                        "value": "Change",
                        "confidence": 0.9989
                    },
                    {
                        "value": "No change",
                        "confidence": 0.9993
                    }
                ]
            },
            "image_resolution": {
                "value": "256x256 (tiles from 1024x1024)",
                "confidence": 0.5448
            },
            "spatial_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "bands_used": {
                "value": [
                    {
                        "value": "RGB",
                        "confidence": 0.9982
                    }
                ]
            },
            "augmentations": {
                "value": null,
                "confidence": 0.0
            },
            "optimizer": {
                "value": "Adam",
                "confidence": 1.0
            },
            "batch_size": {
                "value": 32,
                "confidence": 1.0
            },
            "learning_rate": {
                "value": 0.001,
                "confidence": 1.0
            },
            "epochs": {
                "value": 100,
                "confidence": 1.0
            },
            "loss_function": {
                "value": "BCE Loss",
                "confidence": 0.9738
            },
            "split_ratio": {
                "value": null,
                "confidence": 0.0
            }
        },
        {
            "task": {
                "value": "Land-cover classification",
                "confidence": 0.9811
            },
            "application": {
                "value": "Multi-label land-cover classification",
                "confidence": 0.7909
            },
            "dataset": {
                "value": "BigEarthNet",
                "confidence": 1.0
            },
            "metrics": {
                "value": [
                    {
                        "value": "mAP",
                        "confidence": 0.9859
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 69.9,
                        "confidence": 1.0
                    },
                    {
                        "value": 81.1,
                        "confidence": 1.0
                    }
                ]
            },
            "sensor": {
                "value": [
                    {
                        "value": "Sentinel-2",
                        "confidence": 1.0
                    }
                ]
            },
            "regions": {
                "value": [
                    {
                        "value": "Europe (10 countries)",
                        "confidence": 0.6081
                    }
                ]
            },
            "original_samples": {
                "value": 590326,
                "confidence": 1.0
            },
            "num_samples": {
                "value": 59032,
                "confidence": 0.8582
            },
            "sampling_percentage": {
                "value": 10,
                "confidence": 1.0
            },
            "num_classes": {
                "value": 19,
                "confidence": 1.0
            },
            "classes": {
                "value": null,
                "confidence": 0.0
            },
            "image_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "spatial_resolution": {
                "value": "10m, 20m, 60m",
                "confidence": 0.991
            },
            "bands_used": {
                "value": [
                    {
                        "value": "RGB",
                        "confidence": 0.8886
                    }
                ]
            },
            "augmentations": {
                "value": null,
                "confidence": 0.0
            },
            "optimizer": {
                "value": "Adam",
                "confidence": 0.9998
            },
            "batch_size": {
                "value": 256,
                "confidence": 1.0
            },
            "learning_rate": {
                "value": 0.001,
                "confidence": 1.0
            },
            "epochs": {
                "value": 100,
                "confidence": 1.0
            },
            "loss_function": {
                "value": "Cross-entropy Loss",
                "confidence": 0.6967
            },
            "split_ratio": {
                "value": null,
                "confidence": 0.0
            }
        },
        {
            "task": {
                "value": "Land-cover classification",
                "confidence": 1.0
            },
            "application": {
                "value": "Single-label land-cover classification",
                "confidence": 0.9184
            },
            "dataset": {
                "value": "EuroSAT",
                "confidence": 1.0
            },
            "metrics": {
                "value": [
                    {
                        "value": "mAP",
                        "confidence": 1.0
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 89.1,
                        "confidence": 1.0
                    },
                    {
                        "value": 96.1,
                        "confidence": 1.0
                    }
                ]
            },
            "sensor": {
                "value": [
                    {
                        "value": "Sentinel-2",
                        "confidence": 1.0
                    }
                ]
            },
            "regions": {
                "value": [
                    {
                        "value": "Europe",
                        "confidence": 0.986
                    }
                ]
            },
            "original_samples": {
                "value": 27000,
                "confidence": 1.0
            },
            "num_samples": {
                "value": null,
                "confidence": 0.0
            },
            "sampling_percentage": {
                "value": null,
                "confidence": 0.0
            },
            "num_classes": {
                "value": 10,
                "confidence": 1.0
            },
            "classes": {
                "value": [
                    {
                        "value": "Annual Crop",
                        "confidence": 0.9882
                    },
                    {
                        "value": "Forest",
                        "confidence": 1.0
                    },
                    {
                        "value": "Herbaceous Vegetation",
                        "confidence": 0.9993
                    },
                    {
                        "value": "Highway",
                        "confidence": 1.0
                    },
                    {
                        "value": "Industrial",
                        "confidence": 1.0
                    },
                    {
                        "value": "Pasture",
                        "confidence": 1.0
                    },
                    {
                        "value": "Permanent Crop",
                        "confidence": 1.0
                    },
                    {
                        "value": "Residential",
                        "confidence": 1.0
                    },
                    {
                        "value": "River",
                        "confidence": 1.0
                    },
                    {
                        "value": "Sea Lake",
                        "confidence": 0.9838
                    }
                ]
            },
            "image_resolution": {
                "value": "64x64",
                "confidence": 1.0
            },
            "spatial_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "bands_used": {
                "value": [
                    {
                        "value": "RGB",
                        "confidence": 0.9604
                    }
                ]
            },
            "augmentations": {
                "value": null,
                "confidence": 0.0
            },
            "optimizer": {
                "value": "Adam",
                "confidence": 0.9705
            },
            "batch_size": {
                "value": 256,
                "confidence": 1.0
            },
            "learning_rate": {
                "value": 0.001,
                "confidence": 1.0
            },
            "epochs": {
                "value": 100,
                "confidence": 1.0
            },
            "loss_function": {
                "value": "Cross-entropy Loss",
                "confidence": 1.0
            },
            "split_ratio": {
                "value": null,
                "confidence": 0.0
            }
        }
    ]
}