{
    "model_id": {
        "value": "satmae_pp",
        "confidence": 0.6693
    },
    "model_name": {
        "value": "SatMAE++",
        "confidence": 1.0
    },
    "version": {
        "value": "1.0",
        "confidence": 0.9391
    },
    "release_date": {
        "value": "2024-03-08",
        "confidence": 1.0
    },
    "last_updated": {
        "value": "2024-03-08",
        "confidence": 0.9999
    },
    "short_description": {
        "value": "SatMAE++ is a multi-scale masked autoencoder framework for pre-training transformers on multi-spectral and optical satellite imagery, leveraging convolution-based upsampling blocks for multi-scale reconstruction. It achieves state-of-the-art results on several remote sensing benchmarks.",
        "confidence": 0.8186
    },
    "paper_link": {
        "value": "https://arxiv.org/abs/2403.05419",
        "confidence": 1.0
    },
    "citations": {
        "value": null,
        "confidence": 0.0
    },
    "repository": {
        "value": "https://github.com/techmn/satmae_pp",
        "confidence": 1.0
    },
    "weights": {
        "value": null,
        "confidence": 0.0
    },
    "backbone": {
        "value": "ViT-Large",
        "confidence": 1.0
    },
    "num_layers": {
        "value": null,
        "confidence": 0.0
    },
    "num_parameters": {
        "value": null,
        "confidence": 0.0
    },
    "pretext_training_type": {
        "value": "Masked Autoencoder (MAE) with multi-scale reconstruction",
        "confidence": 0.8457
    },
    "masking_strategy": {
        "value": "Random masking of 75% of patch tokens; independent masking for multi-spectral channel groups",
        "confidence": 0.758
    },
    "pretraining": {
        "value": "Multi-scale masked autoencoder pretraining on large-scale RGB and multi-spectral satellite datasets (fMoW-RGB, fMoW-Sentinel), reconstructing images at multiple scales using convolution-based upsampling blocks.",
        "confidence": 0.7464
    },
    "domain_knowledge": {
        "value": [
            {
                "value": "Multi-scale information",
                "confidence": 0.6673
            },
            {
                "value": "Channel grouping based on GSD for multi-spectral data",
                "confidence": 0.6411
            }
        ]
    },
    "backbone_modifications": {
        "value": [
            {
                "value": "Convolution-based upsampling blocks for multi-scale reconstruction",
                "confidence": 0.8301
            }
        ]
    },
    "supported_sensors": {
        "value": [
            {
                "value": "Sentinel-2",
                "confidence": 0.9978
            },
            {
                "value": "High-resolution commercial satellites (fMoW-RGB)",
                "confidence": 0.7474
            }
        ]
    },
    "modality_integration_type": {
        "value": "Homogeneous Multimodal",
        "confidence": 0.8985
    },
    "modalities": {
        "value": [
            {
                "value": "Multispectral",
                "confidence": 0.8267
            },
            {
                "value": "Optical (RGB)",
                "confidence": 0.7544
            }
        ]
    },
    "spectral_alignment": {
        "value": "partial",
        "confidence": 0.987
    },
    "temporal_alignment": {
        "value": "none",
        "confidence": 1.0
    },
    "spatial_resolution": {
        "value": "variable (10m, 20m, 60m for Sentinel-2; high-res for fMoW-RGB)",
        "confidence": 0.8384
    },
    "temporal_resolution": {
        "value": null,
        "confidence": 0.0
    },
    "bands": {
        "value": [
            {
                "value": "B2",
                "confidence": 0.9722
            },
            {
                "value": "B3",
                "confidence": 1.0
            },
            {
                "value": "B4",
                "confidence": 1.0
            },
            {
                "value": "B5",
                "confidence": 0.9839
            },
            {
                "value": "B6",
                "confidence": 1.0
            },
            {
                "value": "B7",
                "confidence": 1.0
            },
            {
                "value": "B8",
                "confidence": 1.0
            },
            {
                "value": "B8A",
                "confidence": 1.0
            },
            {
                "value": "B11",
                "confidence": 0.9994
            },
            {
                "value": "B12",
                "confidence": 1.0
            }
        ]
    },
    "pretraining_phases": [
        {
            "dataset": {
                "value": "fMoW-RGB",
                "confidence": 0.9996
            },
            "regions_coverage": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "time_range": {
                "value": null,
                "confidence": 0.0
            },
            "num_images": {
                "value": 363000,
                "confidence": 0.9666
            },
            "token_size": {
                "value": "16x16",
                "confidence": 0.8797
            },
            "image_resolution": {
                "value": "224x224",
                "confidence": 0.9999
            },
            "epochs": {
                "value": 800,
                "confidence": 1.0
            },
            "batch_size": {
                "value": 64,
                "confidence": 0.9947
            },
            "learning_rate": {
                "value": "7e-4",
                "confidence": 0.9988
            },
            "augmentations": {
                "value": [
                    {
                        "value": "Random crop (448x448)",
                        "confidence": 0.4149
                    },
                    {
                        "value": "Resize shorter side to 448",
                        "confidence": 0.5013
                    },
                    {
                        "value": "Bilinear interpolation to 224x224",
                        "confidence": 0.5124
                    }
                ]
            },
            "processing": {
                "value": [
                    {
                        "value": "Resize",
                        "confidence": 0.5042
                    },
                    {
                        "value": "Crop",
                        "confidence": 0.681
                    },
                    {
                        "value": "Downsample",
                        "confidence": 0.5397
                    }
                ]
            },
            "sampling": {
                "value": "Random crop",
                "confidence": 0.4406
            },
            "processing_level": {
                "value": null,
                "confidence": 0.0
            },
            "cloud_cover": {
                "value": null,
                "confidence": 0.0
            },
            "missing_data": {
                "value": null,
                "confidence": 0.0
            },
            "masking_ratio": {
                "value": 0.75,
                "confidence": 0.9998
            }
        },
        {
            "dataset": {
                "value": "fMoW-Sentinel",
                "confidence": 1.0
            },
            "regions_coverage": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "time_range": {
                "value": null,
                "confidence": 0.0
            },
            "num_images": {
                "value": 712874,
                "confidence": 0.9994
            },
            "token_size": {
                "value": "8x8",
                "confidence": 1.0
            },
            "image_resolution": {
                "value": "96x96",
                "confidence": 1.0
            },
            "epochs": {
                "value": 50,
                "confidence": 1.0
            },
            "batch_size": {
                "value": 8,
                "confidence": 1.0
            },
            "learning_rate": {
                "value": "1e-4",
                "confidence": 1.0
            },
            "augmentations": {
                "value": [
                    {
                        "value": "Resize shorter side to 384",
                        "confidence": 0.8338
                    },
                    {
                        "value": "Random crop (384x384)",
                        "confidence": 0.5435
                    },
                    {
                        "value": "Downsample to 192x192 and 96x96",
                        "confidence": 0.7262
                    }
                ]
            },
            "processing": {
                "value": [
                    {
                        "value": "Channel grouping",
                        "confidence": 0.7404
                    },
                    {
                        "value": "Discard B1, B9, B10",
                        "confidence": 0.427
                    }
                ]
            },
            "sampling": {
                "value": "Random crop",
                "confidence": 0.8942
            },
            "processing_level": {
                "value": null,
                "confidence": 0.0
            },
            "cloud_cover": {
                "value": null,
                "confidence": 0.0
            },
            "missing_data": {
                "value": null,
                "confidence": 0.0
            },
            "masking_ratio": {
                "value": 0.75,
                "confidence": 1.0
            }
        }
    ],
    "benchmarks": [
        {
            "task": {
                "value": "Scene classification",
                "confidence": 0.5848
            },
            "application": {
                "value": "Remote sensing scene classification",
                "confidence": 0.4026
            },
            "dataset": {
                "value": "fMoW-RGB",
                "confidence": 1.0
            },
            "metrics": {
                "value": [
                    {
                        "value": "Top1 Acc.",
                        "confidence": 0.8272
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 78.14,
                        "confidence": 1.0
                    }
                ]
            },
            "sensor": {
                "value": [
                    {
                        "value": "High-resolution commercial satellites",
                        "confidence": 0.6715
                    }
                ]
            },
            "regions": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "original_samples": {
                "value": 363000,
                "confidence": 0.6716
            },
            "num_samples": {
                "value": 363000,
                "confidence": 0.9978
            },
            "sampling_percentage": {
                "value": 100,
                "confidence": 1.0
            },
            "num_classes": {
                "value": 62,
                "confidence": 1.0
            },
            "classes": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "image_resolution": {
                "value": "224x224",
                "confidence": 1.0
            },
            "spatial_resolution": {
                "value": "variable",
                "confidence": 0.7613
            },
            "bands_used": {
                "value": [
                    {
                        "value": "RGB",
                        "confidence": 0.7992
                    }
                ]
            },
            "augmentations": {
                "value": [
                    {
                        "value": "Random crop (448x448)",
                        "confidence": 0.7666
                    },
                    {
                        "value": "Resize",
                        "confidence": 0.9714
                    },
                    {
                        "value": "Bilinear interpolation",
                        "confidence": 0.8679
                    }
                ]
            },
            "optimizer": {
                "value": "AdamW",
                "confidence": 1.0
            },
            "batch_size": {
                "value": 64,
                "confidence": 0.8964
            },
            "learning_rate": {
                "value": 0.001,
                "confidence": 0.774
            },
            "epochs": {
                "value": 50,
                "confidence": 0.9992
            },
            "loss_function": {
                "value": "Cross-entropy",
                "confidence": 0.5608
            },
            "split_ratio": {
                "value": null,
                "confidence": 0.0
            }
        },
        {
            "task": {
                "value": "Scene classification",
                "confidence": 0.9989
            },
            "application": {
                "value": "Remote sensing scene classification",
                "confidence": 0.9252
            },
            "dataset": {
                "value": "fMoW-Sentinel",
                "confidence": 0.9999
            },
            "metrics": {
                "value": [
                    {
                        "value": "Top1 Acc.",
                        "confidence": 1.0
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 63.23,
                        "confidence": 1.0
                    }
                ]
            },
            "sensor": {
                "value": [
                    {
                        "value": "Sentinel-2",
                        "confidence": 1.0
                    }
                ]
            },
            "regions": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "original_samples": {
                "value": 712874,
                "confidence": 0.8138
            },
            "num_samples": {
                "value": 712874,
                "confidence": 0.9937
            },
            "sampling_percentage": {
                "value": 100,
                "confidence": 1.0
            },
            "num_classes": {
                "value": 62,
                "confidence": 1.0
            },
            "classes": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "image_resolution": {
                "value": "96x96",
                "confidence": 1.0
            },
            "spatial_resolution": {
                "value": "10m, 20m, 60m",
                "confidence": 0.6442
            },
            "bands_used": {
                "value": [
                    {
                        "value": "B2",
                        "confidence": 1.0
                    },
                    {
                        "value": "B3",
                        "confidence": 1.0
                    },
                    {
                        "value": "B4",
                        "confidence": 1.0
                    },
                    {
                        "value": "B5",
                        "confidence": 0.9985
                    },
                    {
                        "value": "B6",
                        "confidence": 1.0
                    },
                    {
                        "value": "B7",
                        "confidence": 1.0
                    },
                    {
                        "value": "B8",
                        "confidence": 1.0
                    },
                    {
                        "value": "B8A",
                        "confidence": 1.0
                    },
                    {
                        "value": "B11",
                        "confidence": 1.0
                    },
                    {
                        "value": "B12",
                        "confidence": 1.0
                    }
                ]
            },
            "augmentations": {
                "value": [
                    {
                        "value": "Resize",
                        "confidence": 0.8366
                    },
                    {
                        "value": "Random crop",
                        "confidence": 0.9448
                    },
                    {
                        "value": "Downsample",
                        "confidence": 0.8212
                    }
                ]
            },
            "optimizer": {
                "value": "AdamW",
                "confidence": 1.0
            },
            "batch_size": {
                "value": 8,
                "confidence": 1.0
            },
            "learning_rate": {
                "value": 0.0002,
                "confidence": 1.0
            },
            "epochs": {
                "value": 30,
                "confidence": 1.0
            },
            "loss_function": {
                "value": "Cross-entropy",
                "confidence": 0.9984
            },
            "split_ratio": {
                "value": null,
                "confidence": 0.0
            }
        },
        {
            "task": {
                "value": "Land cover classification",
                "confidence": 0.8827
            },
            "application": {
                "value": "Land use/land cover classification",
                "confidence": 0.4782
            },
            "dataset": {
                "value": "EuroSAT",
                "confidence": 1.0
            },
            "metrics": {
                "value": [
                    {
                        "value": "Top1 Acc.",
                        "confidence": 1.0
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 99.04,
                        "confidence": 0.9996
                    }
                ]
            },
            "sensor": {
                "value": [
                    {
                        "value": "Sentinel-2",
                        "confidence": 0.9704
                    }
                ]
            },
            "regions": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "original_samples": {
                "value": 27000,
                "confidence": 1.0
            },
            "num_samples": {
                "value": 27000,
                "confidence": 1.0
            },
            "sampling_percentage": {
                "value": 100,
                "confidence": 1.0
            },
            "num_classes": {
                "value": 10,
                "confidence": 1.0
            },
            "classes": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "image_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "spatial_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "bands_used": {
                "value": [
                    {
                        "value": "RGB",
                        "confidence": 0.6062
                    }
                ]
            },
            "augmentations": {
                "value": null,
                "confidence": 0.0
            },
            "optimizer": {
                "value": null,
                "confidence": 0.0
            },
            "batch_size": {
                "value": null,
                "confidence": 0.0
            },
            "learning_rate": {
                "value": null,
                "confidence": 0.0
            },
            "epochs": {
                "value": null,
                "confidence": 0.0
            },
            "loss_function": {
                "value": null,
                "confidence": 0.0
            },
            "split_ratio": {
                "value": null,
                "confidence": 0.0
            }
        },
        {
            "task": {
                "value": "Scene classification",
                "confidence": 0.9059
            },
            "application": {
                "value": "Remote sensing scene classification",
                "confidence": 0.8417
            },
            "dataset": {
                "value": "RESISC-45",
                "confidence": 1.0
            },
            "metrics": {
                "value": [
                    {
                        "value": "Top1 Acc.",
                        "confidence": 1.0
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 97.48,
                        "confidence": 1.0
                    }
                ]
            },
            "sensor": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "regions": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "original_samples": {
                "value": 31500,
                "confidence": 1.0
            },
            "num_samples": {
                "value": 31500,
                "confidence": 1.0
            },
            "sampling_percentage": {
                "value": 100,
                "confidence": 1.0
            },
            "num_classes": {
                "value": 45,
                "confidence": 1.0
            },
            "classes": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "image_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "spatial_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "bands_used": {
                "value": [
                    {
                        "value": "RGB",
                        "confidence": 0.9763
                    }
                ]
            },
            "augmentations": {
                "value": null,
                "confidence": 0.0
            },
            "optimizer": {
                "value": null,
                "confidence": 0.0
            },
            "batch_size": {
                "value": null,
                "confidence": 0.0
            },
            "learning_rate": {
                "value": null,
                "confidence": 0.0
            },
            "epochs": {
                "value": null,
                "confidence": 0.0
            },
            "loss_function": {
                "value": null,
                "confidence": 0.0
            },
            "split_ratio": {
                "value": null,
                "confidence": 0.0
            }
        },
        {
            "task": {
                "value": "Land cover classification",
                "confidence": 0.8984
            },
            "application": {
                "value": "Land use/land cover classification",
                "confidence": 0.8995
            },
            "dataset": {
                "value": "UC-Merced",
                "confidence": 1.0
            },
            "metrics": {
                "value": [
                    {
                        "value": "Top1 Acc.",
                        "confidence": 1.0
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 97.62,
                        "confidence": 1.0
                    }
                ]
            },
            "sensor": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "regions": {
                "value": [
                    {
                        "value": "US",
                        "confidence": 0.9298
                    }
                ]
            },
            "original_samples": {
                "value": 2100,
                "confidence": 1.0
            },
            "num_samples": {
                "value": 2100,
                "confidence": 1.0
            },
            "sampling_percentage": {
                "value": 100,
                "confidence": 1.0
            },
            "num_classes": {
                "value": 21,
                "confidence": 1.0
            },
            "classes": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "image_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "spatial_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "bands_used": {
                "value": [
                    {
                        "value": "RGB",
                        "confidence": 0.9998
                    }
                ]
            },
            "augmentations": {
                "value": null,
                "confidence": 0.0
            },
            "optimizer": {
                "value": null,
                "confidence": 0.0
            },
            "batch_size": {
                "value": null,
                "confidence": 0.0
            },
            "learning_rate": {
                "value": null,
                "confidence": 0.0
            },
            "epochs": {
                "value": null,
                "confidence": 0.0
            },
            "loss_function": {
                "value": null,
                "confidence": 0.0
            },
            "split_ratio": {
                "value": null,
                "confidence": 0.0
            }
        },
        {
            "task": {
                "value": "Multi-label classification",
                "confidence": 0.9303
            },
            "application": {
                "value": "Land cover multi-label classification",
                "confidence": 0.6315
            },
            "dataset": {
                "value": "BigEarthNet",
                "confidence": 1.0
            },
            "metrics": {
                "value": [
                    {
                        "value": "mAP",
                        "confidence": 0.9361
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 85.11,
                        "confidence": 1.0
                    }
                ]
            },
            "sensor": {
                "value": [
                    {
                        "value": "Sentinel-2",
                        "confidence": 1.0
                    }
                ]
            },
            "regions": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "original_samples": {
                "value": 590326,
                "confidence": 1.0
            },
            "num_samples": {
                "value": 59032,
                "confidence": 0.7746
            },
            "sampling_percentage": {
                "value": 10,
                "confidence": 1.0
            },
            "num_classes": {
                "value": 19,
                "confidence": 1.0
            },
            "classes": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "image_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "spatial_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "bands_used": {
                "value": [
                    {
                        "value": "Sentinel-2 bands",
                        "confidence": 0.5192
                    }
                ]
            },
            "augmentations": {
                "value": null,
                "confidence": 0.0
            },
            "optimizer": {
                "value": null,
                "confidence": 0.0
            },
            "batch_size": {
                "value": null,
                "confidence": 0.0
            },
            "learning_rate": {
                "value": null,
                "confidence": 0.0
            },
            "epochs": {
                "value": null,
                "confidence": 0.0
            },
            "loss_function": {
                "value": "Multi-label soft margin loss",
                "confidence": 0.8896
            },
            "split_ratio": {
                "value": null,
                "confidence": 0.0
            }
        }
    ]
}