{
    "model_id": {
        "value": "RSUniVLM",
        "confidence": 0.9158
    },
    "model_name": {
        "value": "RSUniVLM",
        "confidence": 1.0
    },
    "version": {
        "value": "1B",
        "confidence": 0.9968
    },
    "release_date": {
        "value": "2024-12-10",
        "confidence": 0.9888
    },
    "last_updated": {
        "value": "2024-12-10",
        "confidence": 0.9912
    },
    "short_description": {
        "value": "RSUniVLM is a unified, end-to-end remote sensing vision-language model capable of multi-level visual understanding (image-level, region-level, pixel-level) and multi-image analysis, using a novel Granularity-oriented Mixture of Experts (G-MoE) architecture. It supports tasks such as visual question answering, visual grounding, semantic segmentation, change captioning, and change detection, with about 1 billion parameters.",
        "confidence": 0.8221
    },
    "paper_link": {
        "value": "https://arxiv.org/abs/2412.05679",
        "confidence": 1.0
    },
    "citations": {
        "value": null,
        "confidence": 0.0
    },
    "repository": {
        "value": null,
        "confidence": 0.0
    },
    "weights": {
        "value": null,
        "confidence": 0.0
    },
    "backbone": {
        "value": "SigLIP-400m (image encoder), QWen2-0.5B (LLM)",
        "confidence": 0.9181
    },
    "num_layers": {
        "value": null,
        "confidence": 0.0
    },
    "num_parameters": {
        "value": 1000000000,
        "confidence": 1.0
    },
    "pretext_training_type": {
        "value": "Instruction-following, two-stage coarse-to-fine training",
        "confidence": 0.7164
    },
    "masking_strategy": {
        "value": "Semantic descriptors for segmentation masks (24x24), region bounding boxes normalized to [0,100]",
        "confidence": 0.6359
    },
    "pretraining": {
        "value": "Two-stage: Stage 1 full-parameter fine-tuning on 1.2M multi-task instruction-following data (RS and general domain); Stage 2 G-MoE fine-tuning on RS-specific data split by granularity",
        "confidence": 0.743
    },
    "domain_knowledge": {
        "value": [
            {
                "value": "Remote sensing instruction-following datasets",
                "confidence": 0.6642
            },
            {
                "value": "Semantic descriptors for mask representation",
                "confidence": 0.5567
            },
            {
                "value": "Task-specific tokens",
                "confidence": 0.7913
            }
        ]
    },
    "backbone_modifications": {
        "value": [
            {
                "value": "Granularity-oriented Mixture of Experts (G-MoE) in LLM",
                "confidence": 0.7584
            },
            {
                "value": "Task router for expert selection",
                "confidence": 0.6034
            }
        ]
    },
    "supported_sensors": {
        "value": [
            {
                "value": null,
                "confidence": 0.2
            }
        ]
    },
    "modality_integration_type": {
        "value": "Unimodal",
        "confidence": 0.9838
    },
    "modalities": {
        "value": [
            {
                "value": "Multispectral",
                "confidence": 0.9944
            },
            {
                "value": "RGB",
                "confidence": 0.852
            }
        ]
    },
    "spectral_alignment": {
        "value": "none",
        "confidence": 0.9668
    },
    "temporal_alignment": {
        "value": "partial",
        "confidence": 0.5914
    },
    "spatial_resolution": {
        "value": null,
        "confidence": 0.0
    },
    "temporal_resolution": {
        "value": null,
        "confidence": 0.0
    },
    "bands": {
        "value": [
            {
                "value": null,
                "confidence": 0.2
            }
        ]
    },
    "pretraining_phases": [
        {
            "dataset": {
                "value": "Multi-task instruction-following dataset (RS and general domain, see Tab. 10, 11)",
                "confidence": 0.3
            },
            "regions_coverage": {
                "value": []
            },
            "time_range": {
                "value": null,
                "confidence": 0.0
            },
            "num_images": {
                "value": null,
                "confidence": 0.0
            },
            "token_size": {
                "value": null,
                "confidence": 0.0
            },
            "image_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "epochs": {
                "value": 1,
                "confidence": 1.0
            },
            "batch_size": {
                "value": 4,
                "confidence": 1.0
            },
            "learning_rate": {
                "value": "1e-5",
                "confidence": 0.9978
            },
            "augmentations": {
                "value": []
            },
            "processing": {
                "value": []
            },
            "sampling": {
                "value": null,
                "confidence": 0.0
            },
            "processing_level": {
                "value": null,
                "confidence": 0.0
            },
            "cloud_cover": {
                "value": null,
                "confidence": 0.0
            },
            "missing_data": {
                "value": null,
                "confidence": 0.0
            },
            "masking_ratio": {
                "value": null,
                "confidence": 0.0
            }
        },
        {
            "dataset": {
                "value": "RS-specific instruction-following dataset (see Tab. 12)",
                "confidence": 0.318
            },
            "regions_coverage": {
                "value": []
            },
            "time_range": {
                "value": null,
                "confidence": 0.0
            },
            "num_images": {
                "value": null,
                "confidence": 0.0
            },
            "token_size": {
                "value": null,
                "confidence": 0.0
            },
            "image_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "epochs": {
                "value": 1,
                "confidence": 1.0
            },
            "batch_size": {
                "value": 4,
                "confidence": 1.0
            },
            "learning_rate": {
                "value": "1e-5",
                "confidence": 1.0
            },
            "augmentations": {
                "value": []
            },
            "processing": {
                "value": []
            },
            "sampling": {
                "value": null,
                "confidence": 0.0
            },
            "processing_level": {
                "value": null,
                "confidence": 0.0
            },
            "cloud_cover": {
                "value": null,
                "confidence": 0.0
            },
            "missing_data": {
                "value": null,
                "confidence": 0.0
            },
            "masking_ratio": {
                "value": null,
                "confidence": 0.0
            }
        }
    ],
    "benchmarks": [
        {
            "task": {
                "value": "Visual Question Answering",
                "confidence": 0.9837
            },
            "application": {
                "value": "Remote sensing VQA",
                "confidence": 0.4422
            },
            "dataset": {
                "value": "RSVQA-LR",
                "confidence": 0.9999
            },
            "metrics": {
                "value": [
                    {
                        "value": "Accuracy",
                        "confidence": 0.9058
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 92.05,
                        "confidence": 0.9946
                    }
                ]
            },
            "sensor": {
                "value": []
            },
            "regions": {
                "value": []
            },
            "original_samples": {
                "value": 57000,
                "confidence": 1.0
            },
            "num_samples": {
                "value": null,
                "confidence": 0.0
            },
            "sampling_percentage": {
                "value": null,
                "confidence": 0.0
            },
            "num_classes": {
                "value": null,
                "confidence": 0.0
            },
            "classes": {
                "value": []
            },
            "image_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "spatial_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "bands_used": {
                "value": []
            },
            "augmentations": {
                "value": []
            },
            "optimizer": {
                "value": "AdamW",
                "confidence": 0.9999
            },
            "batch_size": {
                "value": 4,
                "confidence": 1.0
            },
            "learning_rate": {
                "value": 1e-05,
                "confidence": 0.994
            },
            "epochs": {
                "value": 1,
                "confidence": 1.0
            },
            "loss_function": {
                "value": null,
                "confidence": 0.0
            },
            "split_ratio": {
                "value": null,
                "confidence": 0.0
            }
        },
        {
            "task": {
                "value": "Visual Question Answering",
                "confidence": 0.9996
            },
            "application": {
                "value": "Remote sensing VQA",
                "confidence": 1.0
            },
            "dataset": {
                "value": "RSVQA-HR",
                "confidence": 1.0
            },
            "metrics": {
                "value": [
                    {
                        "value": "Accuracy",
                        "confidence": 1.0
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 90.85,
                        "confidence": 1.0
                    }
                ]
            },
            "sensor": {
                "value": []
            },
            "regions": {
                "value": []
            },
            "original_samples": {
                "value": 100000,
                "confidence": 1.0
            },
            "num_samples": {
                "value": null,
                "confidence": 0.0
            },
            "sampling_percentage": {
                "value": null,
                "confidence": 0.0
            },
            "num_classes": {
                "value": null,
                "confidence": 0.0
            },
            "classes": {
                "value": []
            },
            "image_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "spatial_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "bands_used": {
                "value": []
            },
            "augmentations": {
                "value": []
            },
            "optimizer": {
                "value": "AdamW",
                "confidence": 1.0
            },
            "batch_size": {
                "value": 4,
                "confidence": 1.0
            },
            "learning_rate": {
                "value": 1e-05,
                "confidence": 1.0
            },
            "epochs": {
                "value": 1,
                "confidence": 1.0
            },
            "loss_function": {
                "value": null,
                "confidence": 0.0
            },
            "split_ratio": {
                "value": null,
                "confidence": 0.0
            }
        },
        {
            "task": {
                "value": "Scene Classification",
                "confidence": 0.9962
            },
            "application": {
                "value": "Remote sensing scene classification",
                "confidence": 0.6135
            },
            "dataset": {
                "value": "AID",
                "confidence": 0.8687
            },
            "metrics": {
                "value": [
                    {
                        "value": "Accuracy",
                        "confidence": 1.0
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 81.18,
                        "confidence": 1.0
                    }
                ]
            },
            "sensor": {
                "value": []
            },
            "regions": {
                "value": []
            },
            "original_samples": {
                "value": null,
                "confidence": 0.0
            },
            "num_samples": {
                "value": null,
                "confidence": 0.0
            },
            "sampling_percentage": {
                "value": null,
                "confidence": 0.0
            },
            "num_classes": {
                "value": null,
                "confidence": 0.0
            },
            "classes": {
                "value": []
            },
            "image_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "spatial_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "bands_used": {
                "value": []
            },
            "augmentations": {
                "value": []
            },
            "optimizer": {
                "value": "AdamW",
                "confidence": 0.9469
            },
            "batch_size": {
                "value": 4,
                "confidence": 1.0
            },
            "learning_rate": {
                "value": 1e-05,
                "confidence": 1.0
            },
            "epochs": {
                "value": 1,
                "confidence": 1.0
            },
            "loss_function": {
                "value": null,
                "confidence": 0.0
            },
            "split_ratio": {
                "value": null,
                "confidence": 0.0
            }
        },
        {
            "task": {
                "value": "Scene Classification",
                "confidence": 1.0
            },
            "application": {
                "value": "Remote sensing scene classification",
                "confidence": 0.9995
            },
            "dataset": {
                "value": "WHU-RS19",
                "confidence": 1.0
            },
            "metrics": {
                "value": [
                    {
                        "value": "Accuracy",
                        "confidence": 1.0
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 84.91,
                        "confidence": 1.0
                    }
                ]
            },
            "sensor": {
                "value": []
            },
            "regions": {
                "value": []
            },
            "original_samples": {
                "value": null,
                "confidence": 0.0
            },
            "num_samples": {
                "value": null,
                "confidence": 0.0
            },
            "sampling_percentage": {
                "value": null,
                "confidence": 0.0
            },
            "num_classes": {
                "value": null,
                "confidence": 0.0
            },
            "classes": {
                "value": []
            },
            "image_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "spatial_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "bands_used": {
                "value": []
            },
            "augmentations": {
                "value": []
            },
            "optimizer": {
                "value": "AdamW",
                "confidence": 1.0
            },
            "batch_size": {
                "value": 4,
                "confidence": 1.0
            },
            "learning_rate": {
                "value": 1e-05,
                "confidence": 1.0
            },
            "epochs": {
                "value": 1,
                "confidence": 1.0
            },
            "loss_function": {
                "value": null,
                "confidence": 0.0
            },
            "split_ratio": {
                "value": null,
                "confidence": 0.0
            }
        },
        {
            "task": {
                "value": "Scene Classification",
                "confidence": 1.0
            },
            "application": {
                "value": "Remote sensing scene classification",
                "confidence": 0.9999
            },
            "dataset": {
                "value": "SIRI-WHU",
                "confidence": 0.9739
            },
            "metrics": {
                "value": [
                    {
                        "value": "Accuracy",
                        "confidence": 1.0
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 86.86,
                        "confidence": 1.0
                    }
                ]
            },
            "sensor": {
                "value": []
            },
            "regions": {
                "value": []
            },
            "original_samples": {
                "value": null,
                "confidence": 0.0
            },
            "num_samples": {
                "value": null,
                "confidence": 0.0
            },
            "sampling_percentage": {
                "value": null,
                "confidence": 0.0
            },
            "num_classes": {
                "value": null,
                "confidence": 0.0
            },
            "classes": {
                "value": []
            },
            "image_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "spatial_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "bands_used": {
                "value": []
            },
            "augmentations": {
                "value": []
            },
            "optimizer": {
                "value": "AdamW",
                "confidence": 1.0
            },
            "batch_size": {
                "value": 4,
                "confidence": 1.0
            },
            "learning_rate": {
                "value": 1e-05,
                "confidence": 1.0
            },
            "epochs": {
                "value": 1,
                "confidence": 1.0
            },
            "loss_function": {
                "value": null,
                "confidence": 0.0
            },
            "split_ratio": {
                "value": null,
                "confidence": 0.0
            }
        },
        {
            "task": {
                "value": "Scene Classification",
                "confidence": 1.0
            },
            "application": {
                "value": "Remote sensing scene classification",
                "confidence": 1.0
            },
            "dataset": {
                "value": "NWPU-RESISC45",
                "confidence": 0.6933
            },
            "metrics": {
                "value": [
                    {
                        "value": "Accuracy",
                        "confidence": 1.0
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 68.13,
                        "confidence": 1.0
                    }
                ]
            },
            "sensor": {
                "value": []
            },
            "regions": {
                "value": []
            },
            "original_samples": {
                "value": null,
                "confidence": 0.0
            },
            "num_samples": {
                "value": null,
                "confidence": 0.0
            },
            "sampling_percentage": {
                "value": null,
                "confidence": 0.0
            },
            "num_classes": {
                "value": null,
                "confidence": 0.0
            },
            "classes": {
                "value": []
            },
            "image_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "spatial_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "bands_used": {
                "value": []
            },
            "augmentations": {
                "value": []
            },
            "optimizer": {
                "value": "AdamW",
                "confidence": 1.0
            },
            "batch_size": {
                "value": 4,
                "confidence": 1.0
            },
            "learning_rate": {
                "value": 1e-05,
                "confidence": 1.0
            },
            "epochs": {
                "value": 1,
                "confidence": 1.0
            },
            "loss_function": {
                "value": null,
                "confidence": 0.0
            },
            "split_ratio": {
                "value": null,
                "confidence": 0.0
            }
        },
        {
            "task": {
                "value": "Visual Grounding",
                "confidence": 0.9998
            },
            "application": {
                "value": "Object localization in RS images",
                "confidence": 0.3658
            },
            "dataset": {
                "value": "DIOR-RSVG",
                "confidence": 1.0
            },
            "metrics": {
                "value": [
                    {
                        "value": "acc@0.5",
                        "confidence": 0.8345
                    },
                    {
                        "value": "acc@0.7",
                        "confidence": 1.0
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 72.47,
                        "confidence": 1.0
                    },
                    {
                        "value": 56.17,
                        "confidence": 1.0
                    }
                ]
            },
            "sensor": {
                "value": []
            },
            "regions": {
                "value": []
            },
            "original_samples": {
                "value": 30000,
                "confidence": 1.0
            },
            "num_samples": {
                "value": null,
                "confidence": 0.0
            },
            "sampling_percentage": {
                "value": null,
                "confidence": 0.0
            },
            "num_classes": {
                "value": null,
                "confidence": 0.0
            },
            "classes": {
                "value": []
            },
            "image_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "spatial_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "bands_used": {
                "value": []
            },
            "augmentations": {
                "value": []
            },
            "optimizer": {
                "value": "AdamW",
                "confidence": 0.9668
            },
            "batch_size": {
                "value": 4,
                "confidence": 1.0
            },
            "learning_rate": {
                "value": 1e-05,
                "confidence": 1.0
            },
            "epochs": {
                "value": 1,
                "confidence": 1.0
            },
            "loss_function": {
                "value": null,
                "confidence": 0.0
            },
            "split_ratio": {
                "value": null,
                "confidence": 0.0
            }
        },
        {
            "task": {
                "value": "Visual Grounding",
                "confidence": 1.0
            },
            "application": {
                "value": "Object localization in RS images",
                "confidence": 0.8631
            },
            "dataset": {
                "value": "VRSBench-Ref",
                "confidence": 1.0
            },
            "metrics": {
                "value": [
                    {
                        "value": "acc@0.5",
                        "confidence": 1.0
                    },
                    {
                        "value": "acc@0.7",
                        "confidence": 1.0
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 69.31,
                        "confidence": 1.0
                    },
                    {
                        "value": 47.47,
                        "confidence": 1.0
                    }
                ]
            },
            "sensor": {
                "value": []
            },
            "regions": {
                "value": []
            },
            "original_samples": {
                "value": null,
                "confidence": 0.0
            },
            "num_samples": {
                "value": null,
                "confidence": 0.0
            },
            "sampling_percentage": {
                "value": null,
                "confidence": 0.0
            },
            "num_classes": {
                "value": null,
                "confidence": 0.0
            },
            "classes": {
                "value": []
            },
            "image_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "spatial_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "bands_used": {
                "value": []
            },
            "augmentations": {
                "value": []
            },
            "optimizer": {
                "value": "AdamW",
                "confidence": 1.0
            },
            "batch_size": {
                "value": 4,
                "confidence": 1.0
            },
            "learning_rate": {
                "value": 1e-05,
                "confidence": 1.0
            },
            "epochs": {
                "value": 1,
                "confidence": 1.0
            },
            "loss_function": {
                "value": null,
                "confidence": 0.0
            },
            "split_ratio": {
                "value": null,
                "confidence": 0.0
            }
        },
        {
            "task": {
                "value": "Change Captioning",
                "confidence": 0.9977
            },
            "application": {
                "value": "Change captioning in RS images",
                "confidence": 0.3248
            },
            "dataset": {
                "value": "LEVIR-MCI",
                "confidence": 1.0
            },
            "metrics": {
                "value": [
                    {
                        "value": "BLEU-1",
                        "confidence": 1.0
                    },
                    {
                        "value": "BLEU-2",
                        "confidence": 1.0
                    },
                    {
                        "value": "BLEU-3",
                        "confidence": 1.0
                    },
                    {
                        "value": "BLEU-4",
                        "confidence": 1.0
                    },
                    {
                        "value": "METEOR",
                        "confidence": 1.0
                    },
                    {
                        "value": "ROUGE-L",
                        "confidence": 1.0
                    },
                    {
                        "value": "CIDEr",
                        "confidence": 1.0
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 86.3,
                        "confidence": 0.7754
                    },
                    {
                        "value": 75.86,
                        "confidence": 1.0
                    },
                    {
                        "value": 67.49,
                        "confidence": 1.0
                    },
                    {
                        "value": 60.27,
                        "confidence": 1.0
                    },
                    {
                        "value": 40.14,
                        "confidence": 1.0
                    },
                    {
                        "value": 80.09,
                        "confidence": 1.0
                    },
                    {
                        "value": 139.8,
                        "confidence": 0.994
                    }
                ]
            },
            "sensor": {
                "value": []
            },
            "regions": {
                "value": []
            },
            "original_samples": {
                "value": 40000,
                "confidence": 1.0
            },
            "num_samples": {
                "value": null,
                "confidence": 0.0
            },
            "sampling_percentage": {
                "value": null,
                "confidence": 0.0
            },
            "num_classes": {
                "value": null,
                "confidence": 0.0
            },
            "classes": {
                "value": []
            },
            "image_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "spatial_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "bands_used": {
                "value": []
            },
            "augmentations": {
                "value": []
            },
            "optimizer": {
                "value": "AdamW",
                "confidence": 0.9998
            },
            "batch_size": {
                "value": 4,
                "confidence": 1.0
            },
            "learning_rate": {
                "value": 1e-05,
                "confidence": 1.0
            },
            "epochs": {
                "value": 1,
                "confidence": 1.0
            },
            "loss_function": {
                "value": null,
                "confidence": 0.0
            },
            "split_ratio": {
                "value": null,
                "confidence": 0.0
            }
        },
        {
            "task": {
                "value": "Change Detection",
                "confidence": 0.9991
            },
            "application": {
                "value": "Change detection in RS images",
                "confidence": 0.8236
            },
            "dataset": {
                "value": "WHU-CD",
                "confidence": 1.0
            },
            "metrics": {
                "value": [
                    {
                        "value": "F1",
                        "confidence": 1.0
                    },
                    {
                        "value": "Precision",
                        "confidence": 0.7732
                    },
                    {
                        "value": "Recall",
                        "confidence": 1.0
                    },
                    {
                        "value": "OA",
                        "confidence": 0.9839
                    },
                    {
                        "value": "IoU",
                        "confidence": 1.0
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 71.38,
                        "confidence": 1.0
                    },
                    {
                        "value": 73.13,
                        "confidence": 1.0
                    },
                    {
                        "value": 67.8,
                        "confidence": 0.9469
                    },
                    {
                        "value": 92.64,
                        "confidence": 1.0
                    },
                    {
                        "value": 54.19,
                        "confidence": 1.0
                    }
                ]
            },
            "sensor": {
                "value": []
            },
            "regions": {
                "value": []
            },
            "original_samples": {
                "value": null,
                "confidence": 0.0
            },
            "num_samples": {
                "value": null,
                "confidence": 0.0
            },
            "sampling_percentage": {
                "value": null,
                "confidence": 0.0
            },
            "num_classes": {
                "value": null,
                "confidence": 0.0
            },
            "classes": {
                "value": []
            },
            "image_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "spatial_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "bands_used": {
                "value": []
            },
            "augmentations": {
                "value": []
            },
            "optimizer": {
                "value": "AdamW",
                "confidence": 0.9978
            },
            "batch_size": {
                "value": 4,
                "confidence": 1.0
            },
            "learning_rate": {
                "value": 1e-05,
                "confidence": 1.0
            },
            "epochs": {
                "value": 1,
                "confidence": 1.0
            },
            "loss_function": {
                "value": null,
                "confidence": 0.0
            },
            "split_ratio": {
                "value": null,
                "confidence": 0.0
            }
        },
        {
            "task": {
                "value": "Semantic Segmentation",
                "confidence": 0.9164
            },
            "application": {
                "value": "Zero-shot semantic segmentation",
                "confidence": 0.7751
            },
            "dataset": {
                "value": "Vaihingen",
                "confidence": 1.0
            },
            "metrics": {
                "value": [
                    {
                        "value": "mIoU",
                        "confidence": 0.9883
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 54.46,
                        "confidence": 1.0
                    }
                ]
            },
            "sensor": {
                "value": []
            },
            "regions": {
                "value": []
            },
            "original_samples": {
                "value": null,
                "confidence": 0.0
            },
            "num_samples": {
                "value": null,
                "confidence": 0.0
            },
            "sampling_percentage": {
                "value": null,
                "confidence": 0.0
            },
            "num_classes": {
                "value": null,
                "confidence": 0.0
            },
            "classes": {
                "value": []
            },
            "image_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "spatial_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "bands_used": {
                "value": []
            },
            "augmentations": {
                "value": []
            },
            "optimizer": {
                "value": "AdamW",
                "confidence": 0.998
            },
            "batch_size": {
                "value": 4,
                "confidence": 1.0
            },
            "learning_rate": {
                "value": 1e-05,
                "confidence": 1.0
            },
            "epochs": {
                "value": 1,
                "confidence": 1.0
            },
            "loss_function": {
                "value": null,
                "confidence": 0.0
            },
            "split_ratio": {
                "value": null,
                "confidence": 0.0
            }
        },
        {
            "task": {
                "value": "Semantic Segmentation",
                "confidence": 1.0
            },
            "application": {
                "value": "Zero-shot semantic segmentation",
                "confidence": 1.0
            },
            "dataset": {
                "value": "UDD5",
                "confidence": 1.0
            },
            "metrics": {
                "value": [
                    {
                        "value": "mIoU",
                        "confidence": 1.0
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 65.41,
                        "confidence": 1.0
                    }
                ]
            },
            "sensor": {
                "value": []
            },
            "regions": {
                "value": []
            },
            "original_samples": {
                "value": null,
                "confidence": 0.0
            },
            "num_samples": {
                "value": null,
                "confidence": 0.0
            },
            "sampling_percentage": {
                "value": null,
                "confidence": 0.0
            },
            "num_classes": {
                "value": null,
                "confidence": 0.0
            },
            "classes": {
                "value": []
            },
            "image_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "spatial_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "bands_used": {
                "value": []
            },
            "augmentations": {
                "value": []
            },
            "optimizer": {
                "value": "AdamW",
                "confidence": 1.0
            },
            "batch_size": {
                "value": 4,
                "confidence": 1.0
            },
            "learning_rate": {
                "value": 1e-05,
                "confidence": 1.0
            },
            "epochs": {
                "value": 1,
                "confidence": 1.0
            },
            "loss_function": {
                "value": null,
                "confidence": 0.0
            },
            "split_ratio": {
                "value": null,
                "confidence": 0.0
            }
        },
        {
            "task": {
                "value": "Semantic Segmentation",
                "confidence": 1.0
            },
            "application": {
                "value": "Zero-shot semantic segmentation",
                "confidence": 1.0
            },
            "dataset": {
                "value": "VDD",
                "confidence": 1.0
            },
            "metrics": {
                "value": [
                    {
                        "value": "mIoU",
                        "confidence": 1.0
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 49.6,
                        "confidence": 0.9992
                    }
                ]
            },
            "sensor": {
                "value": []
            },
            "regions": {
                "value": []
            },
            "original_samples": {
                "value": null,
                "confidence": 0.0
            },
            "num_samples": {
                "value": null,
                "confidence": 0.0
            },
            "sampling_percentage": {
                "value": null,
                "confidence": 0.0
            },
            "num_classes": {
                "value": null,
                "confidence": 0.0
            },
            "classes": {
                "value": []
            },
            "image_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "spatial_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "bands_used": {
                "value": []
            },
            "augmentations": {
                "value": []
            },
            "optimizer": {
                "value": "AdamW",
                "confidence": 1.0
            },
            "batch_size": {
                "value": 4,
                "confidence": 1.0
            },
            "learning_rate": {
                "value": 1e-05,
                "confidence": 1.0
            },
            "epochs": {
                "value": 1,
                "confidence": 1.0
            },
            "loss_function": {
                "value": null,
                "confidence": 0.0
            },
            "split_ratio": {
                "value": null,
                "confidence": 0.0
            }
        }
    ]
}