{
    "model_id": {
        "value": "feng2023-crossmodal",
        "confidence": 0.4961
    },
    "model_name": {
        "value": "Cross-Modal Remote Sensing Foundation Model",
        "confidence": 0.7109
    },
    "version": {
        "value": null,
        "confidence": 0.0
    },
    "release_date": {
        "value": "2023-07-16",
        "confidence": 0.9386
    },
    "last_updated": {
        "value": "2023-07-16",
        "confidence": 0.9153
    },
    "short_description": {
        "value": "A self-supervised cross-modal remote sensing foundation model with multi-domain representation and cross-domain fusion, capable of extracting generalizable features from massive multi-modal data for various downstream tasks.",
        "confidence": 0.8604
    },
    "paper_link": {
        "value": "https://doi.org/10.1109/IGARSS52108.2023.10282433",
        "confidence": 0.9731
    },
    "citations": {
        "value": null,
        "confidence": 0.0
    },
    "repository": {
        "value": null,
        "confidence": 0.0
    },
    "weights": {
        "value": null,
        "confidence": 0.0
    },
    "backbone": {
        "value": "Transformer encoder with IMTA and MMTI units",
        "confidence": 0.8674
    },
    "num_layers": {
        "value": null,
        "confidence": 0.0
    },
    "num_parameters": {
        "value": null,
        "confidence": 0.0
    },
    "pretext_training_type": {
        "value": "Self-supervised learning with patch-based reconstruction loss and contrastive loss",
        "confidence": 0.8209
    },
    "masking_strategy": {
        "value": "Patch mask-based reconstruction loss",
        "confidence": 0.8405
    },
    "pretraining": {
        "value": "Self-supervised pretraining on millions of multi-modal remote sensing images from various sensors and platforms, using multi-domain representation and cross-domain fusion.",
        "confidence": 0.8115
    },
    "domain_knowledge": {
        "value": [
            {
                "value": "Multi-domain representation",
                "confidence": 0.7652
            },
            {
                "value": "Cross-domain fusion",
                "confidence": 0.9813
            },
            {
                "value": "Complex-valued network for SAR",
                "confidence": 0.7168
            },
            {
                "value": "Hyperbolic network for hyperspectral",
                "confidence": 0.6848
            }
        ]
    },
    "backbone_modifications": {
        "value": [
            {
                "value": "Topologically variable feature clustering",
                "confidence": 0.7666
            },
            {
                "value": "Multi-spatial feature extraction (MSFE)",
                "confidence": 0.603
            },
            {
                "value": "Multi-modal feature hybrid (MMFH)",
                "confidence": 0.7412
            }
        ]
    },
    "supported_sensors": {
        "value": [
            {
                "value": "QuickBird",
                "confidence": 0.9982
            },
            {
                "value": "Gaofen",
                "confidence": 1.0
            },
            {
                "value": "WorldView",
                "confidence": 1.0
            },
            {
                "value": "Sentinel",
                "confidence": 1.0
            },
            {
                "value": "TerraSAR-X",
                "confidence": 1.0
            }
        ]
    },
    "modality_integration_type": {
        "value": "Heterogeneous Multimodal",
        "confidence": 0.8769
    },
    "modalities": {
        "value": [
            {
                "value": "Optical",
                "confidence": 0.8436
            },
            {
                "value": "Infrared",
                "confidence": 0.8385
            },
            {
                "value": "SAR",
                "confidence": 0.9771
            },
            {
                "value": "Multispectral",
                "confidence": 0.9996
            },
            {
                "value": "Hyperspectral",
                "confidence": 1.0
            }
        ]
    },
    "spectral_alignment": {
        "value": "partial",
        "confidence": 0.9831
    },
    "temporal_alignment": {
        "value": "none",
        "confidence": 0.9739
    },
    "spatial_resolution": {
        "value": "variable",
        "confidence": 0.893
    },
    "temporal_resolution": {
        "value": "variable",
        "confidence": 1.0
    },
    "bands": {
        "value": null,
        "confidence": 0.0
    },
    "pretraining_phases": [
        {
            "dataset": {
                "value": "Multi-modal remote sensing images from QuickBird, Gaofen, WorldView, Sentinel, TerraSAR-X, etc.",
                "confidence": 0.3221
            },
            "regions_coverage": {
                "value": [
                    {
                        "value": "Most of the world",
                        "confidence": 0.7105
                    }
                ]
            },
            "time_range": {
                "value": null,
                "confidence": 0.0
            },
            "num_images": {
                "value": null,
                "confidence": 0.0
            },
            "token_size": {
                "value": null,
                "confidence": 0.0
            },
            "image_resolution": {
                "value": "224x224",
                "confidence": 0.9469
            },
            "epochs": {
                "value": 200,
                "confidence": 1.0
            },
            "batch_size": {
                "value": null,
                "confidence": 0.0
            },
            "learning_rate": {
                "value": "1e-4 (cosine decay, warmup 10 epochs)",
                "confidence": 0.3432
            },
            "augmentations": {
                "value": [
                    {
                        "value": "Random crop scaling",
                        "confidence": 0.7354
                    },
                    {
                        "value": "Horizontal flipping",
                        "confidence": 0.9977
                    }
                ]
            },
            "processing": {
                "value": []
            },
            "sampling": {
                "value": null,
                "confidence": 0.0
            },
            "processing_level": {
                "value": null,
                "confidence": 0.0
            },
            "cloud_cover": {
                "value": null,
                "confidence": 0.0
            },
            "missing_data": {
                "value": null,
                "confidence": 0.0
            },
            "masking_ratio": {
                "value": null,
                "confidence": 0.0
            }
        }
    ],
    "benchmarks": [
        {
            "task": {
                "value": "Scene classification",
                "confidence": 0.7979
            },
            "application": {
                "value": "Optical scene classification",
                "confidence": 0.7488
            },
            "dataset": {
                "value": "NWPU",
                "confidence": 1.0
            },
            "metrics": {
                "value": [
                    {
                        "value": "OA (T.R.=10%)",
                        "confidence": 0.7452
                    },
                    {
                        "value": "OA (T.R.=20%)",
                        "confidence": 1.0
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 93.64,
                        "confidence": 1.0
                    },
                    {
                        "value": 95.69,
                        "confidence": 1.0
                    }
                ]
            },
            "sensor": {
                "value": [
                    {
                        "value": "Optical",
                        "confidence": 0.998
                    }
                ]
            },
            "regions": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "original_samples": {
                "value": null,
                "confidence": 0.0
            },
            "num_samples": {
                "value": null,
                "confidence": 0.0
            },
            "sampling_percentage": {
                "value": null,
                "confidence": 0.0
            },
            "num_classes": {
                "value": null,
                "confidence": 0.0
            },
            "classes": {
                "value": null,
                "confidence": 0.0
            },
            "image_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "spatial_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "bands_used": {
                "value": null,
                "confidence": 0.0
            },
            "augmentations": {
                "value": null,
                "confidence": 0.0
            },
            "optimizer": {
                "value": "Adam",
                "confidence": 0.9999
            },
            "batch_size": {
                "value": null,
                "confidence": 0.0
            },
            "learning_rate": {
                "value": 0.0001,
                "confidence": 0.9999
            },
            "epochs": {
                "value": 200,
                "confidence": 1.0
            },
            "loss_function": {
                "value": "Patch mask-based reconstruction loss",
                "confidence": 0.5526
            },
            "split_ratio": {
                "value": null,
                "confidence": 0.0
            }
        },
        {
            "task": {
                "value": "Surface segmentation",
                "confidence": 0.8271
            },
            "application": {
                "value": "SAR surface segmentation",
                "confidence": 0.9607
            },
            "dataset": {
                "value": "WHU-OPT-SAR",
                "confidence": 1.0
            },
            "metrics": {
                "value": [
                    {
                        "value": "Kappa",
                        "confidence": 1.0
                    },
                    {
                        "value": "mUA",
                        "confidence": 0.9998
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 67.6,
                        "confidence": 1.0
                    },
                    {
                        "value": 47.3,
                        "confidence": 1.0
                    }
                ]
            },
            "sensor": {
                "value": [
                    {
                        "value": "SAR",
                        "confidence": 1.0
                    }
                ]
            },
            "regions": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "original_samples": {
                "value": null,
                "confidence": 0.0
            },
            "num_samples": {
                "value": null,
                "confidence": 0.0
            },
            "sampling_percentage": {
                "value": null,
                "confidence": 0.0
            },
            "num_classes": {
                "value": null,
                "confidence": 0.0
            },
            "classes": {
                "value": null,
                "confidence": 0.0
            },
            "image_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "spatial_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "bands_used": {
                "value": null,
                "confidence": 0.0
            },
            "augmentations": {
                "value": null,
                "confidence": 0.0
            },
            "optimizer": {
                "value": "Adam",
                "confidence": 0.9999
            },
            "batch_size": {
                "value": null,
                "confidence": 0.0
            },
            "learning_rate": {
                "value": 0.0001,
                "confidence": 1.0
            },
            "epochs": {
                "value": 200,
                "confidence": 1.0
            },
            "loss_function": {
                "value": "Patch mask-based reconstruction loss",
                "confidence": 0.9995
            },
            "split_ratio": {
                "value": null,
                "confidence": 0.0
            }
        },
        {
            "task": {
                "value": "Target detection",
                "confidence": 0.772
            },
            "application": {
                "value": "Infrared target detection",
                "confidence": 0.8507
            },
            "dataset": {
                "value": "SIRST",
                "confidence": 1.0
            },
            "metrics": {
                "value": [
                    {
                        "value": "OA",
                        "confidence": 0.9996
                    },
                    {
                        "value": "IoU",
                        "confidence": 1.0
                    },
                    {
                        "value": "P d",
                        "confidence": 0.4673
                    },
                    {
                        "value": "Fa",
                        "confidence": 0.8963
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 78.4,
                        "confidence": 1.0
                    },
                    {
                        "value": 79.1,
                        "confidence": 1.0
                    },
                    {
                        "value": 98.5,
                        "confidence": 1.0
                    },
                    {
                        "value": 1.9,
                        "confidence": 1.0
                    }
                ]
            },
            "sensor": {
                "value": [
                    {
                        "value": "Infrared",
                        "confidence": 1.0
                    }
                ]
            },
            "regions": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "original_samples": {
                "value": null,
                "confidence": 0.0
            },
            "num_samples": {
                "value": null,
                "confidence": 0.0
            },
            "sampling_percentage": {
                "value": null,
                "confidence": 0.0
            },
            "num_classes": {
                "value": null,
                "confidence": 0.0
            },
            "classes": {
                "value": null,
                "confidence": 0.0
            },
            "image_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "spatial_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "bands_used": {
                "value": null,
                "confidence": 0.0
            },
            "augmentations": {
                "value": null,
                "confidence": 0.0
            },
            "optimizer": {
                "value": "Adam",
                "confidence": 1.0
            },
            "batch_size": {
                "value": null,
                "confidence": 0.0
            },
            "learning_rate": {
                "value": 0.0001,
                "confidence": 1.0
            },
            "epochs": {
                "value": 200,
                "confidence": 1.0
            },
            "loss_function": {
                "value": "Patch mask-based reconstruction loss",
                "confidence": 0.9994
            },
            "split_ratio": {
                "value": null,
                "confidence": 0.0
            }
        },
        {
            "task": {
                "value": "Building extraction",
                "confidence": 0.9582
            },
            "application": {
                "value": "Multi-modal building extraction",
                "confidence": 0.8911
            },
            "dataset": {
                "value": "UBCv2",
                "confidence": 0.9989
            },
            "metrics": {
                "value": [
                    {
                        "value": "AP50",
                        "confidence": 1.0
                    },
                    {
                        "value": "mAP",
                        "confidence": 1.0
                    },
                    {
                        "value": "Precision",
                        "confidence": 1.0
                    },
                    {
                        "value": "Recall",
                        "confidence": 1.0
                    },
                    {
                        "value": "F1",
                        "confidence": 0.9739
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 52.9,
                        "confidence": 1.0
                    },
                    {
                        "value": 35.3,
                        "confidence": 1.0
                    },
                    {
                        "value": 72.0,
                        "confidence": 0.9818
                    },
                    {
                        "value": 67.9,
                        "confidence": 1.0
                    },
                    {
                        "value": 69.9,
                        "confidence": 1.0
                    }
                ]
            },
            "sensor": {
                "value": [
                    {
                        "value": "Optical",
                        "confidence": 0.9991
                    },
                    {
                        "value": "SAR",
                        "confidence": 1.0
                    }
                ]
            },
            "regions": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "original_samples": {
                "value": null,
                "confidence": 0.0
            },
            "num_samples": {
                "value": null,
                "confidence": 0.0
            },
            "sampling_percentage": {
                "value": null,
                "confidence": 0.0
            },
            "num_classes": {
                "value": null,
                "confidence": 0.0
            },
            "classes": {
                "value": null,
                "confidence": 0.0
            },
            "image_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "spatial_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "bands_used": {
                "value": null,
                "confidence": 0.0
            },
            "augmentations": {
                "value": null,
                "confidence": 0.0
            },
            "optimizer": {
                "value": "Adam",
                "confidence": 0.9795
            },
            "batch_size": {
                "value": null,
                "confidence": 0.0
            },
            "learning_rate": {
                "value": 0.0001,
                "confidence": 1.0
            },
            "epochs": {
                "value": 200,
                "confidence": 1.0
            },
            "loss_function": {
                "value": "Patch mask-based reconstruction loss, contrastive loss",
                "confidence": 0.5174
            },
            "split_ratio": {
                "value": null,
                "confidence": 0.0
            }
        },
        {
            "task": {
                "value": "Change detection",
                "confidence": 0.998
            },
            "application": {
                "value": "Multi-modal change detection",
                "confidence": 0.9921
            },
            "dataset": {
                "value": "multimodalCD",
                "confidence": 0.996
            },
            "metrics": {
                "value": [
                    {
                        "value": "Precision",
                        "confidence": 0.8416
                    },
                    {
                        "value": "Recall",
                        "confidence": 1.0
                    },
                    {
                        "value": "F1",
                        "confidence": 0.9994
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 72.0,
                        "confidence": 0.9973
                    },
                    {
                        "value": 67.9,
                        "confidence": 1.0
                    },
                    {
                        "value": 69.9,
                        "confidence": 1.0
                    }
                ]
            },
            "sensor": {
                "value": [
                    {
                        "value": "Optical",
                        "confidence": 1.0
                    },
                    {
                        "value": "SAR",
                        "confidence": 1.0
                    }
                ]
            },
            "regions": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "original_samples": {
                "value": null,
                "confidence": 0.0
            },
            "num_samples": {
                "value": null,
                "confidence": 0.0
            },
            "sampling_percentage": {
                "value": null,
                "confidence": 0.0
            },
            "num_classes": {
                "value": null,
                "confidence": 0.0
            },
            "classes": {
                "value": null,
                "confidence": 0.0
            },
            "image_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "spatial_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "bands_used": {
                "value": null,
                "confidence": 0.0
            },
            "augmentations": {
                "value": null,
                "confidence": 0.0
            },
            "optimizer": {
                "value": "Adam",
                "confidence": 1.0
            },
            "batch_size": {
                "value": null,
                "confidence": 0.0
            },
            "learning_rate": {
                "value": 0.0001,
                "confidence": 1.0
            },
            "epochs": {
                "value": 200,
                "confidence": 1.0
            },
            "loss_function": {
                "value": "Patch mask-based reconstruction loss, contrastive loss",
                "confidence": 0.9984
            },
            "split_ratio": {
                "value": null,
                "confidence": 0.0
            }
        }
    ]
}