{
    "model_id": {
        "value": "GeoGround",
        "confidence": 0.9164
    },
    "model_name": {
        "value": "GeoGround",
        "confidence": 1.0
    },
    "version": {
        "value": null,
        "confidence": 0.0
    },
    "release_date": {
        "value": null,
        "confidence": 0.0
    },
    "last_updated": {
        "value": null,
        "confidence": 0.0
    },
    "short_description": {
        "value": "GeoGround is a unified large vision-language model for remote sensing visual grounding, supporting horizontal bounding box (HBB), oriented bounding box (OBB), and segmentation mask outputs via a text-based approach. It introduces the Text-Mask paradigm and hybrid supervision, and is trained on the large-scale refGeo dataset.",
        "confidence": 0.8162
    },
    "paper_link": {
        "value": "https://arxiv.org/abs/2411.11904",
        "confidence": 0.9999
    },
    "citations": {
        "value": null,
        "confidence": 0.0
    },
    "repository": {
        "value": "https://github.com/zytx121/GeoGround",
        "confidence": 1.0
    },
    "weights": {
        "value": null,
        "confidence": 0.0
    },
    "backbone": {
        "value": "CLIP-ViT",
        "confidence": 1.0
    },
    "num_layers": {
        "value": null,
        "confidence": 0.0
    },
    "num_parameters": {
        "value": null,
        "confidence": 0.0
    },
    "pretext_training_type": {
        "value": "Text regression loss with hybrid supervision (Text-HBB, Text-OBB, Text-Mask)",
        "confidence": 0.8197
    },
    "masking_strategy": {
        "value": "Textualization of mask via downsampling and R-RLE compression",
        "confidence": 0.6547
    },
    "pretraining": {
        "value": "Unified training pipeline using textualized signals (Text-HBB, Text-OBB, Text-Mask) and hybrid supervision (prompt-assisted and geometry-guided learning)",
        "confidence": 0.7582
    },
    "domain_knowledge": {
        "value": [
            {
                "value": "Prompt-assisted learning",
                "confidence": 0.7914
            },
            {
                "value": "Geometry-guided learning",
                "confidence": 0.9988
            },
            {
                "value": "Hybrid supervision",
                "confidence": 0.7461
            },
            {
                "value": "Text-Mask paradigm",
                "confidence": 0.5469
            }
        ]
    },
    "backbone_modifications": {
        "value": [
            {
                "value": "Connector (two-layer MLPs) between visual encoder and LLM",
                "confidence": 0.8661
            }
        ]
    },
    "supported_sensors": {
        "value": [
            {
                "value": null,
                "confidence": 0.3
            }
        ]
    },
    "modality_integration_type": {
        "value": "Unimodal",
        "confidence": 0.9331
    },
    "modalities": {
        "value": [
            {
                "value": "Multispectral",
                "confidence": 0.8459
            }
        ]
    },
    "spectral_alignment": {
        "value": "none",
        "confidence": 0.9838
    },
    "temporal_alignment": {
        "value": "none",
        "confidence": 1.0
    },
    "spatial_resolution": {
        "value": "variable (0.007m to 30m GSD)",
        "confidence": 0.7685
    },
    "temporal_resolution": {
        "value": null,
        "confidence": 0.0
    },
    "bands": {
        "value": [
            {
                "value": null,
                "confidence": 0.3
            }
        ]
    },
    "pretraining_phases": [
        {
            "dataset": {
                "value": "refGeo (integrates RSVG, DIOR-RSVG, GeoChat, VRSBench, and new aerial vehicle dataset)",
                "confidence": 0.3092
            },
            "regions_coverage": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "time_range": {
                "value": null,
                "confidence": 0.0
            },
            "num_images": {
                "value": 80000,
                "confidence": 1.0
            },
            "token_size": {
                "value": null,
                "confidence": 0.0
            },
            "image_resolution": {
                "value": "512?4000 (varies by dataset)",
                "confidence": 0.4685
            },
            "epochs": {
                "value": 5,
                "confidence": 1.0
            },
            "batch_size": {
                "value": 128,
                "confidence": 1.0
            },
            "learning_rate": {
                "value": "2e-4 (linear decay after warm-up, warm-up ratio 0.03)",
                "confidence": 0.3083
            },
            "augmentations": {
                "value": []
            },
            "processing": {
                "value": []
            },
            "sampling": {
                "value": null,
                "confidence": 0.0
            },
            "processing_level": {
                "value": null,
                "confidence": 0.0
            },
            "cloud_cover": {
                "value": null,
                "confidence": 0.0
            },
            "missing_data": {
                "value": null,
                "confidence": 0.0
            },
            "masking_ratio": {
                "value": null,
                "confidence": 0.0
            }
        }
    ],
    "benchmarks": [
        {
            "task": {
                "value": "Referring Expression Comprehension (REC)",
                "confidence": 0.9356
            },
            "application": {
                "value": "Remote sensing visual grounding (HBB)",
                "confidence": 0.3543
            },
            "dataset": {
                "value": "DIOR-RSVG-Test",
                "confidence": 0.9717
            },
            "metrics": {
                "value": [
                    {
                        "value": "Acc@0.5",
                        "confidence": 0.9901
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 77.73,
                        "confidence": 0.9971
                    }
                ]
            },
            "sensor": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "regions": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "original_samples": {
                "value": null,
                "confidence": 0.0
            },
            "num_samples": {
                "value": null,
                "confidence": 0.0
            },
            "sampling_percentage": {
                "value": null,
                "confidence": 0.0
            },
            "num_classes": {
                "value": null,
                "confidence": 0.0
            },
            "classes": {
                "value": []
            },
            "image_resolution": {
                "value": "800",
                "confidence": 0.8982
            },
            "spatial_resolution": {
                "value": "0.5?30m",
                "confidence": 0.9394
            },
            "bands_used": {
                "value": []
            },
            "augmentations": {
                "value": []
            },
            "optimizer": {
                "value": "AdamW",
                "confidence": 1.0
            },
            "batch_size": {
                "value": 128,
                "confidence": 1.0
            },
            "learning_rate": {
                "value": 0.0002,
                "confidence": 0.9999
            },
            "epochs": {
                "value": 5,
                "confidence": 1.0
            },
            "loss_function": {
                "value": "Text regression loss",
                "confidence": 0.9015
            },
            "split_ratio": {
                "value": null,
                "confidence": 0.0
            }
        },
        {
            "task": {
                "value": "Referring Expression Comprehension (REC)",
                "confidence": 0.9547
            },
            "application": {
                "value": "Remote sensing visual grounding (HBB)",
                "confidence": 0.9816
            },
            "dataset": {
                "value": "DIOR-RSVG-Val",
                "confidence": 0.7147
            },
            "metrics": {
                "value": [
                    {
                        "value": "Acc@0.5",
                        "confidence": 0.9999
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 77.18,
                        "confidence": 1.0
                    }
                ]
            },
            "sensor": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "regions": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "original_samples": {
                "value": null,
                "confidence": 0.0
            },
            "num_samples": {
                "value": null,
                "confidence": 0.0
            },
            "sampling_percentage": {
                "value": null,
                "confidence": 0.0
            },
            "num_classes": {
                "value": null,
                "confidence": 0.0
            },
            "classes": {
                "value": []
            },
            "image_resolution": {
                "value": "800",
                "confidence": 1.0
            },
            "spatial_resolution": {
                "value": "0.5?30m",
                "confidence": 0.9999
            },
            "bands_used": {
                "value": []
            },
            "augmentations": {
                "value": []
            },
            "optimizer": {
                "value": "AdamW",
                "confidence": 1.0
            },
            "batch_size": {
                "value": 128,
                "confidence": 1.0
            },
            "learning_rate": {
                "value": 0.0002,
                "confidence": 1.0
            },
            "epochs": {
                "value": 5,
                "confidence": 1.0
            },
            "loss_function": {
                "value": "Text regression loss",
                "confidence": 1.0
            },
            "split_ratio": {
                "value": null,
                "confidence": 0.0
            }
        },
        {
            "task": {
                "value": "Referring Expression Comprehension (REC)",
                "confidence": 0.9955
            },
            "application": {
                "value": "Remote sensing visual grounding (HBB)",
                "confidence": 0.9987
            },
            "dataset": {
                "value": "RSVG-Test",
                "confidence": 0.9999
            },
            "metrics": {
                "value": [
                    {
                        "value": "Acc@0.5",
                        "confidence": 0.9998
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 26.65,
                        "confidence": 1.0
                    }
                ]
            },
            "sensor": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "regions": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "original_samples": {
                "value": null,
                "confidence": 0.0
            },
            "num_samples": {
                "value": null,
                "confidence": 0.0
            },
            "sampling_percentage": {
                "value": null,
                "confidence": 0.0
            },
            "num_classes": {
                "value": null,
                "confidence": 0.0
            },
            "classes": {
                "value": []
            },
            "image_resolution": {
                "value": "1024",
                "confidence": 1.0
            },
            "spatial_resolution": {
                "value": "0.24?4.88m",
                "confidence": 0.999
            },
            "bands_used": {
                "value": []
            },
            "augmentations": {
                "value": []
            },
            "optimizer": {
                "value": "AdamW",
                "confidence": 1.0
            },
            "batch_size": {
                "value": 128,
                "confidence": 1.0
            },
            "learning_rate": {
                "value": 0.0002,
                "confidence": 1.0
            },
            "epochs": {
                "value": 5,
                "confidence": 1.0
            },
            "loss_function": {
                "value": "Text regression loss",
                "confidence": 1.0
            },
            "split_ratio": {
                "value": null,
                "confidence": 0.0
            }
        },
        {
            "task": {
                "value": "Referring Expression Comprehension (REC)",
                "confidence": 0.995
            },
            "application": {
                "value": "Remote sensing visual grounding (HBB)",
                "confidence": 0.9901
            },
            "dataset": {
                "value": "RSVG-Val",
                "confidence": 0.9997
            },
            "metrics": {
                "value": [
                    {
                        "value": "Acc@0.5",
                        "confidence": 0.9997
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 27.64,
                        "confidence": 1.0
                    }
                ]
            },
            "sensor": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "regions": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "original_samples": {
                "value": null,
                "confidence": 0.0
            },
            "num_samples": {
                "value": null,
                "confidence": 0.0
            },
            "sampling_percentage": {
                "value": null,
                "confidence": 0.0
            },
            "num_classes": {
                "value": null,
                "confidence": 0.0
            },
            "classes": {
                "value": []
            },
            "image_resolution": {
                "value": "1024",
                "confidence": 1.0
            },
            "spatial_resolution": {
                "value": "0.24?4.88m",
                "confidence": 0.9999
            },
            "bands_used": {
                "value": []
            },
            "augmentations": {
                "value": []
            },
            "optimizer": {
                "value": "AdamW",
                "confidence": 1.0
            },
            "batch_size": {
                "value": 128,
                "confidence": 1.0
            },
            "learning_rate": {
                "value": 0.0002,
                "confidence": 1.0
            },
            "epochs": {
                "value": 5,
                "confidence": 1.0
            },
            "loss_function": {
                "value": "Text regression loss",
                "confidence": 1.0
            },
            "split_ratio": {
                "value": null,
                "confidence": 0.0
            }
        },
        {
            "task": {
                "value": "Referring Expression Comprehension (REC)",
                "confidence": 0.999
            },
            "application": {
                "value": "Remote sensing visual grounding (HBB)",
                "confidence": 0.9889
            },
            "dataset": {
                "value": "GeoChat*",
                "confidence": 1.0
            },
            "metrics": {
                "value": [
                    {
                        "value": "Acc@0.5",
                        "confidence": 0.9999
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 70.24,
                        "confidence": 1.0
                    }
                ]
            },
            "sensor": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "regions": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "original_samples": {
                "value": null,
                "confidence": 0.0
            },
            "num_samples": {
                "value": null,
                "confidence": 0.0
            },
            "sampling_percentage": {
                "value": null,
                "confidence": 0.0
            },
            "num_classes": {
                "value": null,
                "confidence": 0.0
            },
            "classes": {
                "value": []
            },
            "image_resolution": {
                "value": "600?1024",
                "confidence": 0.9833
            },
            "spatial_resolution": {
                "value": "0.3?0.8m",
                "confidence": 0.9997
            },
            "bands_used": {
                "value": []
            },
            "augmentations": {
                "value": []
            },
            "optimizer": {
                "value": "AdamW",
                "confidence": 1.0
            },
            "batch_size": {
                "value": 128,
                "confidence": 1.0
            },
            "learning_rate": {
                "value": 0.0002,
                "confidence": 1.0
            },
            "epochs": {
                "value": 5,
                "confidence": 1.0
            },
            "loss_function": {
                "value": "Text regression loss",
                "confidence": 1.0
            },
            "split_ratio": {
                "value": null,
                "confidence": 0.0
            }
        },
        {
            "task": {
                "value": "Referring Expression Comprehension (REC)",
                "confidence": 1.0
            },
            "application": {
                "value": "Remote sensing visual grounding (HBB)",
                "confidence": 0.9963
            },
            "dataset": {
                "value": "VRSBench*",
                "confidence": 1.0
            },
            "metrics": {
                "value": [
                    {
                        "value": "Acc@0.5",
                        "confidence": 1.0
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 66.04,
                        "confidence": 1.0
                    }
                ]
            },
            "sensor": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "regions": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "original_samples": {
                "value": null,
                "confidence": 0.0
            },
            "num_samples": {
                "value": null,
                "confidence": 0.0
            },
            "sampling_percentage": {
                "value": null,
                "confidence": 0.0
            },
            "num_classes": {
                "value": null,
                "confidence": 0.0
            },
            "classes": {
                "value": []
            },
            "image_resolution": {
                "value": "512",
                "confidence": 1.0
            },
            "spatial_resolution": {
                "value": "0.1?30m",
                "confidence": 0.9996
            },
            "bands_used": {
                "value": []
            },
            "augmentations": {
                "value": []
            },
            "optimizer": {
                "value": "AdamW",
                "confidence": 1.0
            },
            "batch_size": {
                "value": 128,
                "confidence": 1.0
            },
            "learning_rate": {
                "value": 0.0002,
                "confidence": 1.0
            },
            "epochs": {
                "value": 5,
                "confidence": 1.0
            },
            "loss_function": {
                "value": "Text regression loss",
                "confidence": 1.0
            },
            "split_ratio": {
                "value": null,
                "confidence": 0.0
            }
        },
        {
            "task": {
                "value": "Referring Expression Comprehension (REC)",
                "confidence": 0.9976
            },
            "application": {
                "value": "Remote sensing visual grounding (HBB)",
                "confidence": 0.983
            },
            "dataset": {
                "value": "A VVG",
                "confidence": 0.9815
            },
            "metrics": {
                "value": [
                    {
                        "value": "Acc@0.5",
                        "confidence": 1.0
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 21.58,
                        "confidence": 1.0
                    }
                ]
            },
            "sensor": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "regions": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "original_samples": {
                "value": null,
                "confidence": 0.0
            },
            "num_samples": {
                "value": null,
                "confidence": 0.0
            },
            "sampling_percentage": {
                "value": null,
                "confidence": 0.0
            },
            "num_classes": {
                "value": null,
                "confidence": 0.0
            },
            "classes": {
                "value": []
            },
            "image_resolution": {
                "value": "4000",
                "confidence": 1.0
            },
            "spatial_resolution": {
                "value": "0.007?0.04m",
                "confidence": 0.9993
            },
            "bands_used": {
                "value": []
            },
            "augmentations": {
                "value": []
            },
            "optimizer": {
                "value": "AdamW",
                "confidence": 1.0
            },
            "batch_size": {
                "value": 128,
                "confidence": 1.0
            },
            "learning_rate": {
                "value": 0.0002,
                "confidence": 1.0
            },
            "epochs": {
                "value": 5,
                "confidence": 1.0
            },
            "loss_function": {
                "value": "Text regression loss",
                "confidence": 1.0
            },
            "split_ratio": {
                "value": null,
                "confidence": 0.0
            }
        },
        {
            "task": {
                "value": "Referring Expression Comprehension (REC)",
                "confidence": 0.9113
            },
            "application": {
                "value": "Remote sensing visual grounding (HBB)",
                "confidence": 0.9792
            },
            "dataset": {
                "value": "A VG",
                "confidence": 0.9992
            },
            "metrics": {
                "value": [
                    {
                        "value": "Acc@0.5",
                        "confidence": 1.0
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 52.44,
                        "confidence": 1.0
                    }
                ]
            },
            "sensor": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "regions": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "original_samples": {
                "value": null,
                "confidence": 0.0
            },
            "num_samples": {
                "value": null,
                "confidence": 0.0
            },
            "sampling_percentage": {
                "value": null,
                "confidence": 0.0
            },
            "num_classes": {
                "value": null,
                "confidence": 0.0
            },
            "classes": {
                "value": []
            },
            "image_resolution": {
                "value": "4000",
                "confidence": 0.9468
            },
            "spatial_resolution": {
                "value": "0.007?0.04m",
                "confidence": 1.0
            },
            "bands_used": {
                "value": []
            },
            "augmentations": {
                "value": []
            },
            "optimizer": {
                "value": "AdamW",
                "confidence": 1.0
            },
            "batch_size": {
                "value": 128,
                "confidence": 1.0
            },
            "learning_rate": {
                "value": 0.0002,
                "confidence": 1.0
            },
            "epochs": {
                "value": 5,
                "confidence": 1.0
            },
            "loss_function": {
                "value": "Text regression loss",
                "confidence": 1.0
            },
            "split_ratio": {
                "value": null,
                "confidence": 0.0
            }
        },
        {
            "task": {
                "value": "Referring Expression Comprehension (REC) with OBB",
                "confidence": 0.4625
            },
            "application": {
                "value": "Remote sensing visual grounding (OBB)",
                "confidence": 0.9822
            },
            "dataset": {
                "value": "GeoChat*",
                "confidence": 0.9999
            },
            "metrics": {
                "value": [
                    {
                        "value": "Acc@0.5 (rotated IoU)",
                        "confidence": 0.6482
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 59.72,
                        "confidence": 1.0
                    }
                ]
            },
            "sensor": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "regions": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "original_samples": {
                "value": null,
                "confidence": 0.0
            },
            "num_samples": {
                "value": null,
                "confidence": 0.0
            },
            "sampling_percentage": {
                "value": null,
                "confidence": 0.0
            },
            "num_classes": {
                "value": null,
                "confidence": 0.0
            },
            "classes": {
                "value": []
            },
            "image_resolution": {
                "value": "600?1024",
                "confidence": 0.9908
            },
            "spatial_resolution": {
                "value": "0.3?0.8m",
                "confidence": 0.9979
            },
            "bands_used": {
                "value": []
            },
            "augmentations": {
                "value": []
            },
            "optimizer": {
                "value": "AdamW",
                "confidence": 0.9975
            },
            "batch_size": {
                "value": 128,
                "confidence": 1.0
            },
            "learning_rate": {
                "value": 0.0002,
                "confidence": 1.0
            },
            "epochs": {
                "value": 5,
                "confidence": 1.0
            },
            "loss_function": {
                "value": "Text regression loss",
                "confidence": 1.0
            },
            "split_ratio": {
                "value": null,
                "confidence": 0.0
            }
        },
        {
            "task": {
                "value": "Referring Expression Comprehension (REC) with OBB",
                "confidence": 0.9996
            },
            "application": {
                "value": "Remote sensing visual grounding (OBB)",
                "confidence": 1.0
            },
            "dataset": {
                "value": "VRSBench*",
                "confidence": 1.0
            },
            "metrics": {
                "value": [
                    {
                        "value": "Acc@0.5 (rotated IoU)",
                        "confidence": 1.0
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 53.22,
                        "confidence": 1.0
                    }
                ]
            },
            "sensor": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "regions": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "original_samples": {
                "value": null,
                "confidence": 0.0
            },
            "num_samples": {
                "value": null,
                "confidence": 0.0
            },
            "sampling_percentage": {
                "value": null,
                "confidence": 0.0
            },
            "num_classes": {
                "value": null,
                "confidence": 0.0
            },
            "classes": {
                "value": []
            },
            "image_resolution": {
                "value": "512",
                "confidence": 1.0
            },
            "spatial_resolution": {
                "value": "0.1?30m",
                "confidence": 0.9998
            },
            "bands_used": {
                "value": []
            },
            "augmentations": {
                "value": []
            },
            "optimizer": {
                "value": "AdamW",
                "confidence": 1.0
            },
            "batch_size": {
                "value": 128,
                "confidence": 1.0
            },
            "learning_rate": {
                "value": 0.0002,
                "confidence": 1.0
            },
            "epochs": {
                "value": 5,
                "confidence": 1.0
            },
            "loss_function": {
                "value": "Text regression loss",
                "confidence": 1.0
            },
            "split_ratio": {
                "value": null,
                "confidence": 0.0
            }
        },
        {
            "task": {
                "value": "Referring Expression Comprehension (REC) with OBB",
                "confidence": 0.9998
            },
            "application": {
                "value": "Remote sensing visual grounding (OBB)",
                "confidence": 1.0
            },
            "dataset": {
                "value": "A VVG",
                "confidence": 0.9998
            },
            "metrics": {
                "value": [
                    {
                        "value": "Acc@0.5 (rotated IoU)",
                        "confidence": 1.0
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 13.93,
                        "confidence": 1.0
                    }
                ]
            },
            "sensor": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "regions": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "original_samples": {
                "value": null,
                "confidence": 0.0
            },
            "num_samples": {
                "value": null,
                "confidence": 0.0
            },
            "sampling_percentage": {
                "value": null,
                "confidence": 0.0
            },
            "num_classes": {
                "value": null,
                "confidence": 0.0
            },
            "classes": {
                "value": []
            },
            "image_resolution": {
                "value": "4000",
                "confidence": 0.9995
            },
            "spatial_resolution": {
                "value": "0.007?0.04m",
                "confidence": 0.9999
            },
            "bands_used": {
                "value": []
            },
            "augmentations": {
                "value": []
            },
            "optimizer": {
                "value": "AdamW",
                "confidence": 1.0
            },
            "batch_size": {
                "value": 128,
                "confidence": 1.0
            },
            "learning_rate": {
                "value": 0.0002,
                "confidence": 1.0
            },
            "epochs": {
                "value": 5,
                "confidence": 1.0
            },
            "loss_function": {
                "value": "Text regression loss",
                "confidence": 1.0
            },
            "split_ratio": {
                "value": null,
                "confidence": 0.0
            }
        },
        {
            "task": {
                "value": "Referring Expression Comprehension (REC) with OBB",
                "confidence": 0.9995
            },
            "application": {
                "value": "Remote sensing visual grounding (OBB)",
                "confidence": 1.0
            },
            "dataset": {
                "value": "A VG",
                "confidence": 1.0
            },
            "metrics": {
                "value": [
                    {
                        "value": "Acc@0.5 (rotated IoU)",
                        "confidence": 1.0
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 42.29,
                        "confidence": 1.0
                    }
                ]
            },
            "sensor": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "regions": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "original_samples": {
                "value": null,
                "confidence": 0.0
            },
            "num_samples": {
                "value": null,
                "confidence": 0.0
            },
            "sampling_percentage": {
                "value": null,
                "confidence": 0.0
            },
            "num_classes": {
                "value": null,
                "confidence": 0.0
            },
            "classes": {
                "value": []
            },
            "image_resolution": {
                "value": "4000",
                "confidence": 1.0
            },
            "spatial_resolution": {
                "value": "0.007?0.04m",
                "confidence": 1.0
            },
            "bands_used": {
                "value": []
            },
            "augmentations": {
                "value": []
            },
            "optimizer": {
                "value": "AdamW",
                "confidence": 1.0
            },
            "batch_size": {
                "value": 128,
                "confidence": 1.0
            },
            "learning_rate": {
                "value": 0.0002,
                "confidence": 1.0
            },
            "epochs": {
                "value": 5,
                "confidence": 1.0
            },
            "loss_function": {
                "value": "Text regression loss",
                "confidence": 1.0
            },
            "split_ratio": {
                "value": null,
                "confidence": 0.0
            }
        },
        {
            "task": {
                "value": "Referring Expression Segmentation (RES)",
                "confidence": 0.9999
            },
            "application": {
                "value": "Remote sensing referring segmentation",
                "confidence": 0.4931
            },
            "dataset": {
                "value": "RRSIS-D",
                "confidence": 1.0
            },
            "metrics": {
                "value": [
                    {
                        "value": "Acc@0.5",
                        "confidence": 0.9995
                    },
                    {
                        "value": "mIoU",
                        "confidence": 0.9997
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 60.97,
                        "confidence": 0.8438
                    },
                    {
                        "value": 54.92,
                        "confidence": 1.0
                    }
                ]
            },
            "sensor": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "regions": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "original_samples": {
                "value": null,
                "confidence": 0.0
            },
            "num_samples": {
                "value": null,
                "confidence": 0.0
            },
            "sampling_percentage": {
                "value": null,
                "confidence": 0.0
            },
            "num_classes": {
                "value": null,
                "confidence": 0.0
            },
            "classes": {
                "value": []
            },
            "image_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "spatial_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "bands_used": {
                "value": []
            },
            "augmentations": {
                "value": []
            },
            "optimizer": {
                "value": "AdamW",
                "confidence": 0.9253
            },
            "batch_size": {
                "value": 128,
                "confidence": 0.9997
            },
            "learning_rate": {
                "value": 0.0002,
                "confidence": 1.0
            },
            "epochs": {
                "value": 5,
                "confidence": 1.0
            },
            "loss_function": {
                "value": "Text regression loss",
                "confidence": 1.0
            },
            "split_ratio": {
                "value": null,
                "confidence": 0.0
            }
        }
    ]
}