{
    "model_id": {
        "value": "RSPrompter",
        "confidence": 0.9519
    },
    "model_name": {
        "value": "RSPrompter",
        "confidence": 1.0
    },
    "version": {
        "value": null,
        "confidence": 0.0
    },
    "release_date": {
        "value": null,
        "confidence": 0.0
    },
    "last_updated": {
        "value": null,
        "confidence": 0.0
    },
    "short_description": {
        "value": "RSPrompter is a prompt learning method that augments the Segment Anything Model (SAM) for automated instance segmentation in remote sensing imagery by learning to generate category-related prompts for SAM, enabling semantically discernible segmentation results.",
        "confidence": 0.7763
    },
    "paper_link": {
        "value": "https://arxiv.org/abs/2306.16269",
        "confidence": 1.0
    },
    "citations": {
        "value": null,
        "confidence": 0.0
    },
    "repository": {
        "value": "https://kyanchen.github.io/RSPrompter",
        "confidence": 1.0
    },
    "weights": {
        "value": null,
        "confidence": 0.0
    },
    "backbone": {
        "value": "ViT-Huge (SAM)",
        "confidence": 0.7953
    },
    "num_layers": {
        "value": null,
        "confidence": 0.0
    },
    "num_parameters": {
        "value": null,
        "confidence": 0.0
    },
    "pretext_training_type": {
        "value": "Prompt learning on frozen SAM backbone",
        "confidence": 0.7053
    },
    "masking_strategy": {
        "value": "Prompt embedding generation (not masking in pretraining)",
        "confidence": 0.5392
    },
    "pretraining": {
        "value": "Utilizes the SAM foundation model, which is pretrained on over 1 billion masks from 11 million images using a ViT-based MAE. RSPrompter itself does not pretrain from scratch but learns to prompt the frozen SAM.",
        "confidence": 0.6573
    },
    "domain_knowledge": {
        "value": [
            {
                "value": "Remote sensing instance segmentation",
                "confidence": 0.5178
            },
            {
                "value": "Prompt learning",
                "confidence": 0.6492
            },
            {
                "value": "Multi-scale feature aggregation",
                "confidence": 0.5929
            }
        ]
    },
    "backbone_modifications": {
        "value": [
            {
                "value": "Lightweight multi-scale feature enhancer",
                "confidence": 0.94
            },
            {
                "value": "Prompter module for prompt embedding generation",
                "confidence": 0.5475
            }
        ]
    },
    "supported_sensors": {
        "value": [
            {
                "value": "Optical RGB",
                "confidence": 0.7246
            },
            {
                "value": "SAR",
                "confidence": 0.9652
            }
        ]
    },
    "modality_integration_type": {
        "value": "Unimodal",
        "confidence": 0.9998
    },
    "modalities": {
        "value": [
            {
                "value": "Multispectral",
                "confidence": 0.9642
            },
            {
                "value": "SAR",
                "confidence": 0.9964
            }
        ]
    },
    "spectral_alignment": {
        "value": "none",
        "confidence": 0.9579
    },
    "temporal_alignment": {
        "value": "none",
        "confidence": 1.0
    },
    "spatial_resolution": {
        "value": "variable",
        "confidence": 0.7745
    },
    "temporal_resolution": {
        "value": "variable",
        "confidence": 0.8694
    },
    "bands": {
        "value": [
            {
                "value": "RGB",
                "confidence": 0.8412
            }
        ]
    },
    "pretraining_phases": [
        {
            "dataset": {
                "value": "SA-1B (Segment Anything 1 Billion masks dataset)",
                "confidence": 0.3431
            },
            "regions_coverage": {
                "value": []
            },
            "time_range": {
                "value": null,
                "confidence": 0.0
            },
            "num_images": {
                "value": 11000000,
                "confidence": 1.0
            },
            "token_size": {
                "value": null,
                "confidence": 0.0
            },
            "image_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "epochs": {
                "value": null,
                "confidence": 0.0
            },
            "batch_size": {
                "value": null,
                "confidence": 0.0
            },
            "learning_rate": {
                "value": null,
                "confidence": 0.0
            },
            "augmentations": {
                "value": []
            },
            "processing": {
                "value": []
            },
            "sampling": {
                "value": null,
                "confidence": 0.0
            },
            "processing_level": {
                "value": null,
                "confidence": 0.0
            },
            "cloud_cover": {
                "value": null,
                "confidence": 0.0
            },
            "missing_data": {
                "value": null,
                "confidence": 0.0
            },
            "masking_ratio": {
                "value": null,
                "confidence": 0.0
            }
        }
    ],
    "benchmarks": [
        {
            "task": {
                "value": "Instance segmentation",
                "confidence": 0.7645
            },
            "application": {
                "value": "Building extraction",
                "confidence": 0.841
            },
            "dataset": {
                "value": "WHU building extraction dataset",
                "confidence": 0.479
            },
            "metrics": {
                "value": [
                    {
                        "value": "APbox",
                        "confidence": 0.9958
                    },
                    {
                        "value": "AP50_box",
                        "confidence": 0.685
                    },
                    {
                        "value": "AP75_box",
                        "confidence": 1.0
                    },
                    {
                        "value": "APmask",
                        "confidence": 1.0
                    },
                    {
                        "value": "AP50_mask",
                        "confidence": 1.0
                    },
                    {
                        "value": "AP75_mask",
                        "confidence": 1.0
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 72.5,
                        "confidence": 1.2999
                    },
                    {
                        "value": 91.0,
                        "confidence": 0.9889
                    },
                    {
                        "value": 81.7,
                        "confidence": 1.0
                    },
                    {
                        "value": 72.5,
                        "confidence": 1.3
                    },
                    {
                        "value": 92.0,
                        "confidence": 1.0
                    },
                    {
                        "value": 82.9,
                        "confidence": 1.0
                    }
                ]
            },
            "sensor": {
                "value": [
                    {
                        "value": "Optical RGB",
                        "confidence": 0.9398
                    }
                ]
            },
            "regions": {
                "value": [
                    {
                        "value": "Christchurch, New Zealand",
                        "confidence": 0.9997
                    }
                ]
            },
            "original_samples": {
                "value": 8188,
                "confidence": 1.0
            },
            "num_samples": {
                "value": 8188,
                "confidence": 0.9955
            },
            "sampling_percentage": {
                "value": 100,
                "confidence": 1.0
            },
            "num_classes": {
                "value": 1,
                "confidence": 1.0
            },
            "classes": {
                "value": [
                    {
                        "value": "building",
                        "confidence": 0.8113
                    }
                ]
            },
            "image_resolution": {
                "value": "512x512",
                "confidence": 0.9872
            },
            "spatial_resolution": {
                "value": "0.0075m to 0.3m",
                "confidence": 0.5551
            },
            "bands_used": {
                "value": [
                    {
                        "value": "RGB",
                        "confidence": 0.8115
                    }
                ]
            },
            "augmentations": {
                "value": [
                    {
                        "value": "horizontal flipping",
                        "confidence": 0.8697
                    },
                    {
                        "value": "large-scale jittering",
                        "confidence": 0.9869
                    }
                ]
            },
            "optimizer": {
                "value": "AdamW",
                "confidence": 1.0
            },
            "batch_size": {
                "value": 16,
                "confidence": 1.0
            },
            "learning_rate": {
                "value": 0.0001,
                "confidence": 0.9998
            },
            "epochs": {
                "value": 200,
                "confidence": 0.8964
            },
            "loss_function": {
                "value": "Cross-Entropy, SmoothL1, Binary Cross-Entropy, Dice",
                "confidence": 0.3892
            },
            "split_ratio": {
                "value": "4736 train / 1036 val / 2416 test",
                "confidence": 0.6873
            }
        },
        {
            "task": {
                "value": "Instance segmentation",
                "confidence": 0.9999
            },
            "application": {
                "value": "Multi-class object segmentation",
                "confidence": 0.4982
            },
            "dataset": {
                "value": "NWPU VHR-10",
                "confidence": 1.0
            },
            "metrics": {
                "value": [
                    {
                        "value": "APbox",
                        "confidence": 1.0
                    },
                    {
                        "value": "AP50_box",
                        "confidence": 1.0
                    },
                    {
                        "value": "AP75_box",
                        "confidence": 1.0
                    },
                    {
                        "value": "APmask",
                        "confidence": 1.0
                    },
                    {
                        "value": "AP50_mask",
                        "confidence": 1.0
                    },
                    {
                        "value": "AP75_mask",
                        "confidence": 1.0
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 68.4,
                        "confidence": 1.0
                    },
                    {
                        "value": 90.3,
                        "confidence": 1.0
                    },
                    {
                        "value": 74.0,
                        "confidence": 0.9994
                    },
                    {
                        "value": 67.5,
                        "confidence": 1.0
                    },
                    {
                        "value": 91.7,
                        "confidence": 1.0
                    },
                    {
                        "value": 74.8,
                        "confidence": 1.0
                    }
                ]
            },
            "sensor": {
                "value": [
                    {
                        "value": "Optical RGB",
                        "confidence": 0.924
                    },
                    {
                        "value": "Color Infrared",
                        "confidence": 0.5352
                    }
                ]
            },
            "regions": {
                "value": [
                    {
                        "value": "Google Earth (various), Vaihingen",
                        "confidence": 0.3217
                    }
                ]
            },
            "original_samples": {
                "value": 800,
                "confidence": 0.9818
            },
            "num_samples": {
                "value": 800,
                "confidence": 1.0
            },
            "sampling_percentage": {
                "value": 100,
                "confidence": 1.0
            },
            "num_classes": {
                "value": 10,
                "confidence": 1.0
            },
            "classes": {
                "value": [
                    {
                        "value": "airplane",
                        "confidence": 1.0
                    },
                    {
                        "value": "ship",
                        "confidence": 1.0
                    },
                    {
                        "value": "storage tank",
                        "confidence": 1.0
                    },
                    {
                        "value": "baseball diamond",
                        "confidence": 1.0
                    },
                    {
                        "value": "tennis court",
                        "confidence": 1.0
                    },
                    {
                        "value": "basketball court",
                        "confidence": 1.0
                    },
                    {
                        "value": "ground track field",
                        "confidence": 1.0
                    },
                    {
                        "value": "harbor",
                        "confidence": 0.9999
                    },
                    {
                        "value": "bridge",
                        "confidence": 1.0
                    },
                    {
                        "value": "vehicle",
                        "confidence": 1.0
                    }
                ]
            },
            "image_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "spatial_resolution": {
                "value": "0.5m-2m (Google Earth), 0.08m (Vaihingen)",
                "confidence": 0.3495
            },
            "bands_used": {
                "value": [
                    {
                        "value": "RGB",
                        "confidence": 0.9721
                    },
                    {
                        "value": "Color Infrared",
                        "confidence": 0.7177
                    }
                ]
            },
            "augmentations": {
                "value": [
                    {
                        "value": "horizontal flipping",
                        "confidence": 1.0
                    },
                    {
                        "value": "large-scale jittering",
                        "confidence": 1.0
                    }
                ]
            },
            "optimizer": {
                "value": "AdamW",
                "confidence": 1.0
            },
            "batch_size": {
                "value": 16,
                "confidence": 1.0
            },
            "learning_rate": {
                "value": 0.0001,
                "confidence": 1.0
            },
            "epochs": {
                "value": 300,
                "confidence": 0.9739
            },
            "loss_function": {
                "value": "Cross-Entropy, SmoothL1, Binary Cross-Entropy, Dice",
                "confidence": 0.896
            },
            "split_ratio": {
                "value": "80% train / 20% test",
                "confidence": 0.9553
            }
        },
        {
            "task": {
                "value": "Instance segmentation",
                "confidence": 1.0
            },
            "application": {
                "value": "Ship detection",
                "confidence": 0.4601
            },
            "dataset": {
                "value": "SSDD",
                "confidence": 0.9795
            },
            "metrics": {
                "value": [
                    {
                        "value": "APbox",
                        "confidence": 1.0
                    },
                    {
                        "value": "AP50_box",
                        "confidence": 1.0
                    },
                    {
                        "value": "AP75_box",
                        "confidence": 1.0
                    },
                    {
                        "value": "APmask",
                        "confidence": 1.0
                    },
                    {
                        "value": "AP50_mask",
                        "confidence": 1.0
                    },
                    {
                        "value": "AP75_mask",
                        "confidence": 1.0
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 66.0,
                        "confidence": 0.8464
                    },
                    {
                        "value": 95.6,
                        "confidence": 1.3
                    },
                    {
                        "value": 78.7,
                        "confidence": 1.0
                    },
                    {
                        "value": 67.3,
                        "confidence": 1.0
                    },
                    {
                        "value": 95.6,
                        "confidence": 1.3
                    },
                    {
                        "value": 84.3,
                        "confidence": 1.0
                    }
                ]
            },
            "sensor": {
                "value": [
                    {
                        "value": "SAR",
                        "confidence": 0.9996
                    }
                ]
            },
            "regions": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "original_samples": {
                "value": 1160,
                "confidence": 1.0
            },
            "num_samples": {
                "value": 1160,
                "confidence": 1.0
            },
            "sampling_percentage": {
                "value": 100,
                "confidence": 1.0
            },
            "num_classes": {
                "value": 1,
                "confidence": 1.0
            },
            "classes": {
                "value": [
                    {
                        "value": "ship",
                        "confidence": 1.0
                    }
                ]
            },
            "image_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "spatial_resolution": {
                "value": "1m-15m",
                "confidence": 0.654
            },
            "bands_used": {
                "value": [
                    {
                        "value": "SAR",
                        "confidence": 1.0
                    }
                ]
            },
            "augmentations": {
                "value": [
                    {
                        "value": "horizontal flipping",
                        "confidence": 1.0
                    },
                    {
                        "value": "large-scale jittering",
                        "confidence": 1.0
                    }
                ]
            },
            "optimizer": {
                "value": "AdamW",
                "confidence": 1.0
            },
            "batch_size": {
                "value": 16,
                "confidence": 1.0
            },
            "learning_rate": {
                "value": 0.0001,
                "confidence": 1.0
            },
            "epochs": {
                "value": 300,
                "confidence": 0.9874
            },
            "loss_function": {
                "value": "Cross-Entropy, SmoothL1, Binary Cross-Entropy, Dice",
                "confidence": 0.9999
            },
            "split_ratio": {
                "value": "80% train / 20% test",
                "confidence": 0.9995
            }
        }
    ]
}