{
    "model_id": {
        "value": "SkyCLIP",
        "confidence": 0.7978
    },
    "model_name": {
        "value": "SkyCLIP",
        "confidence": 1.0
    },
    "version": {
        "value": "SkyCLIP-50 (ViT-L-14)",
        "confidence": 0.6742
    },
    "release_date": {
        "value": "2023-12-20",
        "confidence": 0.998
    },
    "last_updated": {
        "value": "2023-12-20",
        "confidence": 0.9768
    },
    "short_description": {
        "value": "SkyCLIP is a remote-sensing-specialized CLIP model developed via continual pre-training on the large-scale, semantically diverse SkyScript vision-language dataset for remote sensing. It outperforms baseline models in zero-shot scene classification, fine-grained attribute classification, and cross-modal retrieval.",
        "confidence": 0.8231
    },
    "paper_link": {
        "value": "https://arxiv.org/abs/2312.12856",
        "confidence": 0.9998
    },
    "citations": {
        "value": null,
        "confidence": 0.0
    },
    "repository": {
        "value": "https://github.com/wangzhecheng/SkyScript",
        "confidence": 1.0
    },
    "weights": {
        "value": null,
        "confidence": 0.0
    },
    "backbone": {
        "value": "ViT-L-14",
        "confidence": 0.9912
    },
    "num_layers": {
        "value": 24,
        "confidence": 1.0
    },
    "num_parameters": {
        "value": null,
        "confidence": 0.0
    },
    "pretext_training_type": {
        "value": "Contrastive image-text learning",
        "confidence": 0.8022
    },
    "masking_strategy": {
        "value": null,
        "confidence": 0.0
    },
    "pretraining": {
        "value": "Continual pre-training of CLIP initialized from OpenAI weights on the SkyScript dataset using image-text contrastive learning.",
        "confidence": 0.8
    },
    "domain_knowledge": {
        "value": [
            {
                "value": "Geo-coordinates for image-text pairing",
                "confidence": 0.487
            },
            {
                "value": "OSM semantic tag filtering",
                "confidence": 0.5243
            },
            {
                "value": "Remote sensing imagery",
                "confidence": 0.4706
            }
        ]
    },
    "backbone_modifications": {
        "value": [],
        "confidence": 0.0
    },
    "supported_sensors": {
        "value": [
            {
                "value": "SWISSIMAGE",
                "confidence": 0.7998
            },
            {
                "value": "Spain RGB orthophotos",
                "confidence": 0.9967
            },
            {
                "value": "Brandenburg RGBN orthophotos",
                "confidence": 0.8726
            },
            {
                "value": "Finland RGB NLS orthophotos",
                "confidence": 0.9811
            },
            {
                "value": "National Agriculture Imagery Program",
                "confidence": 0.9856
            },
            {
                "value": "Planet SkySat Public Ortho Imagery",
                "confidence": 0.9889
            },
            {
                "value": "Harmonized Sentinel-2 MSI",
                "confidence": 0.997
            },
            {
                "value": "Landsat 8",
                "confidence": 1.0
            },
            {
                "value": "Landsat 9",
                "confidence": 1.0
            }
        ]
    },
    "modality_integration_type": {
        "value": "Unimodal",
        "confidence": 0.997
    },
    "modalities": {
        "value": [
            {
                "value": "Multispectral",
                "confidence": 0.9328
            },
            {
                "value": "RGB",
                "confidence": 0.9268
            }
        ]
    },
    "spectral_alignment": {
        "value": "none",
        "confidence": 0.8963
    },
    "temporal_alignment": {
        "value": "none",
        "confidence": 1.0
    },
    "spatial_resolution": {
        "value": "0.1m to 30m",
        "confidence": 0.8468
    },
    "temporal_resolution": {
        "value": null,
        "confidence": 0.0
    },
    "bands": {
        "value": [
            {
                "value": "R",
                "confidence": 0.8811
            },
            {
                "value": "G",
                "confidence": 1.0
            },
            {
                "value": "B",
                "confidence": 1.0
            }
        ]
    },
    "pretraining_phases": [
        {
            "dataset": {
                "value": "SkyScript",
                "confidence": 1.0
            },
            "regions_coverage": {
                "value": [
                    {
                        "value": "Global (except Antarctica), with high-resolution concentration in US and Europe",
                        "confidence": 0.3004
                    }
                ]
            },
            "time_range": {
                "value": null,
                "confidence": 0.0
            },
            "num_images": {
                "value": 2600000,
                "confidence": 0.9998
            },
            "token_size": {
                "value": "14x14 (ViT-L-14)",
                "confidence": 0.4302
            },
            "image_resolution": {
                "value": "Variable, 0.1m to 30m GSD, images resized to 224x224 or variable",
                "confidence": 0.3
            },
            "epochs": {
                "value": 20,
                "confidence": 1.0
            },
            "batch_size": {
                "value": 512,
                "confidence": 1.0
            },
            "learning_rate": {
                "value": null,
                "confidence": 0.0
            },
            "augmentations": {
                "value": null,
                "confidence": 0.0
            },
            "processing": {
                "value": [
                    {
                        "value": "Filtering by CLIP similarity",
                        "confidence": 0.3223
                    },
                    {
                        "value": "Tag-based semantic filtering",
                        "confidence": 0.3099
                    },
                    {
                        "value": "Manual validation for subsets",
                        "confidence": 0.3042
                    }
                ]
            },
            "sampling": {
                "value": "Top 50% of image-text pairs by CLIP similarity",
                "confidence": 0.406
            },
            "processing_level": {
                "value": null,
                "confidence": 0.0
            },
            "cloud_cover": {
                "value": null,
                "confidence": 0.0
            },
            "missing_data": {
                "value": null,
                "confidence": 0.0
            },
            "masking_ratio": {
                "value": null,
                "confidence": 0.0
            }
        }
    ],
    "benchmarks": [
        {
            "task": {
                "value": "Scene classification",
                "confidence": 0.5563
            },
            "application": {
                "value": "Zero-shot scene classification",
                "confidence": 0.4988
            },
            "dataset": {
                "value": "AID",
                "confidence": 0.9839
            },
            "metrics": {
                "value": [
                    {
                        "value": "Top-1 Accuracy",
                        "confidence": 0.6461
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 71.7,
                        "confidence": 0.7744
                    }
                ]
            },
            "sensor": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "regions": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "original_samples": {
                "value": 2000,
                "confidence": 0.9999
            },
            "num_samples": {
                "value": 2000,
                "confidence": 1.0
            },
            "sampling_percentage": {
                "value": 100,
                "confidence": 1.0
            },
            "num_classes": {
                "value": 30,
                "confidence": 1.0
            },
            "classes": {
                "value": null,
                "confidence": 0.0
            },
            "image_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "spatial_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "bands_used": {
                "value": [
                    {
                        "value": "R",
                        "confidence": 0.9386
                    },
                    {
                        "value": "G",
                        "confidence": 1.0
                    },
                    {
                        "value": "B",
                        "confidence": 1.0
                    }
                ]
            },
            "augmentations": {
                "value": null,
                "confidence": 0.0
            },
            "optimizer": {
                "value": null,
                "confidence": 0.0
            },
            "batch_size": {
                "value": null,
                "confidence": 0.0
            },
            "learning_rate": {
                "value": null,
                "confidence": 0.0
            },
            "epochs": {
                "value": null,
                "confidence": 0.0
            },
            "loss_function": {
                "value": null,
                "confidence": 0.0
            },
            "split_ratio": {
                "value": "test",
                "confidence": 0.6817
            }
        },
        {
            "task": {
                "value": "Scene classification",
                "confidence": 1.0
            },
            "application": {
                "value": "Zero-shot scene classification",
                "confidence": 1.0
            },
            "dataset": {
                "value": "EuroSAT",
                "confidence": 1.0
            },
            "metrics": {
                "value": [
                    {
                        "value": "Top-1 Accuracy",
                        "confidence": 1.0
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 51.33,
                        "confidence": 1.0
                    }
                ]
            },
            "sensor": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "regions": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "original_samples": {
                "value": 2700,
                "confidence": 1.0
            },
            "num_samples": {
                "value": 2700,
                "confidence": 1.0
            },
            "sampling_percentage": {
                "value": 100,
                "confidence": 1.0
            },
            "num_classes": {
                "value": 10,
                "confidence": 1.0
            },
            "classes": {
                "value": null,
                "confidence": 0.0
            },
            "image_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "spatial_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "bands_used": {
                "value": [
                    {
                        "value": "R",
                        "confidence": 1.0
                    },
                    {
                        "value": "G",
                        "confidence": 1.0
                    },
                    {
                        "value": "B",
                        "confidence": 1.0
                    }
                ]
            },
            "augmentations": {
                "value": null,
                "confidence": 0.0
            },
            "optimizer": {
                "value": null,
                "confidence": 0.0
            },
            "batch_size": {
                "value": null,
                "confidence": 0.0
            },
            "learning_rate": {
                "value": null,
                "confidence": 0.0
            },
            "epochs": {
                "value": null,
                "confidence": 0.0
            },
            "loss_function": {
                "value": null,
                "confidence": 0.0
            },
            "split_ratio": {
                "value": "test",
                "confidence": 1.0
            }
        },
        {
            "task": {
                "value": "Scene classification",
                "confidence": 1.0
            },
            "application": {
                "value": "Zero-shot scene classification",
                "confidence": 1.0
            },
            "dataset": {
                "value": "fMoW",
                "confidence": 0.9998
            },
            "metrics": {
                "value": [
                    {
                        "value": "Top-1 Accuracy",
                        "confidence": 1.0
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 27.12,
                        "confidence": 1.0
                    }
                ]
            },
            "sensor": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "regions": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "original_samples": {
                "value": 106081,
                "confidence": 1.0
            },
            "num_samples": {
                "value": 106081,
                "confidence": 1.0
            },
            "sampling_percentage": {
                "value": 100,
                "confidence": 1.0
            },
            "num_classes": {
                "value": 62,
                "confidence": 1.0
            },
            "classes": {
                "value": null,
                "confidence": 0.0
            },
            "image_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "spatial_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "bands_used": {
                "value": [
                    {
                        "value": "R",
                        "confidence": 1.0
                    },
                    {
                        "value": "G",
                        "confidence": 1.0
                    },
                    {
                        "value": "B",
                        "confidence": 1.0
                    }
                ]
            },
            "augmentations": {
                "value": null,
                "confidence": 0.0
            },
            "optimizer": {
                "value": null,
                "confidence": 0.0
            },
            "batch_size": {
                "value": null,
                "confidence": 0.0
            },
            "learning_rate": {
                "value": null,
                "confidence": 0.0
            },
            "epochs": {
                "value": null,
                "confidence": 0.0
            },
            "loss_function": {
                "value": null,
                "confidence": 0.0
            },
            "split_ratio": {
                "value": "val",
                "confidence": 0.9953
            }
        },
        {
            "task": {
                "value": "Scene classification",
                "confidence": 1.0
            },
            "application": {
                "value": "Zero-shot scene classification",
                "confidence": 1.0
            },
            "dataset": {
                "value": "Million-AID",
                "confidence": 0.9983
            },
            "metrics": {
                "value": [
                    {
                        "value": "Top-1 Accuracy",
                        "confidence": 1.0
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 67.45,
                        "confidence": 1.0
                    }
                ]
            },
            "sensor": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "regions": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "original_samples": {
                "value": 10000,
                "confidence": 1.0
            },
            "num_samples": {
                "value": 10000,
                "confidence": 1.0
            },
            "sampling_percentage": {
                "value": 100,
                "confidence": 1.0
            },
            "num_classes": {
                "value": 51,
                "confidence": 1.0
            },
            "classes": {
                "value": null,
                "confidence": 0.0
            },
            "image_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "spatial_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "bands_used": {
                "value": [
                    {
                        "value": "R",
                        "confidence": 1.0
                    },
                    {
                        "value": "G",
                        "confidence": 1.0
                    },
                    {
                        "value": "B",
                        "confidence": 1.0
                    }
                ]
            },
            "augmentations": {
                "value": null,
                "confidence": 0.0
            },
            "optimizer": {
                "value": null,
                "confidence": 0.0
            },
            "batch_size": {
                "value": null,
                "confidence": 0.0
            },
            "learning_rate": {
                "value": null,
                "confidence": 0.0
            },
            "epochs": {
                "value": null,
                "confidence": 0.0
            },
            "loss_function": {
                "value": null,
                "confidence": 0.0
            },
            "split_ratio": {
                "value": "train",
                "confidence": 1.0
            }
        },
        {
            "task": {
                "value": "Scene classification",
                "confidence": 1.0
            },
            "application": {
                "value": "Zero-shot scene classification",
                "confidence": 1.0
            },
            "dataset": {
                "value": "PatternNet",
                "confidence": 1.0
            },
            "metrics": {
                "value": [
                    {
                        "value": "Top-1 Accuracy",
                        "confidence": 1.0
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 80.88,
                        "confidence": 1.0
                    }
                ]
            },
            "sensor": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "regions": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "original_samples": {
                "value": 30400,
                "confidence": 1.0
            },
            "num_samples": {
                "value": 30400,
                "confidence": 1.0
            },
            "sampling_percentage": {
                "value": 100,
                "confidence": 1.0
            },
            "num_classes": {
                "value": 38,
                "confidence": 1.0
            },
            "classes": {
                "value": null,
                "confidence": 0.0
            },
            "image_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "spatial_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "bands_used": {
                "value": [
                    {
                        "value": "R",
                        "confidence": 1.0
                    },
                    {
                        "value": "G",
                        "confidence": 1.0
                    },
                    {
                        "value": "B",
                        "confidence": 1.0
                    }
                ]
            },
            "augmentations": {
                "value": null,
                "confidence": 0.0
            },
            "optimizer": {
                "value": null,
                "confidence": 0.0
            },
            "batch_size": {
                "value": null,
                "confidence": 0.0
            },
            "learning_rate": {
                "value": null,
                "confidence": 0.0
            },
            "epochs": {
                "value": null,
                "confidence": 0.0
            },
            "loss_function": {
                "value": null,
                "confidence": 0.0
            },
            "split_ratio": {
                "value": "train",
                "confidence": 1.0
            }
        },
        {
            "task": {
                "value": "Scene classification",
                "confidence": 1.0
            },
            "application": {
                "value": "Zero-shot scene classification",
                "confidence": 1.0
            },
            "dataset": {
                "value": "RESISC45",
                "confidence": 0.9762
            },
            "metrics": {
                "value": [
                    {
                        "value": "Top-1 Accuracy",
                        "confidence": 1.0
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 70.94,
                        "confidence": 1.0
                    }
                ]
            },
            "sensor": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "regions": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "original_samples": {
                "value": 31500,
                "confidence": 1.0
            },
            "num_samples": {
                "value": 31500,
                "confidence": 1.0
            },
            "sampling_percentage": {
                "value": 100,
                "confidence": 1.0
            },
            "num_classes": {
                "value": 45,
                "confidence": 1.0
            },
            "classes": {
                "value": null,
                "confidence": 0.0
            },
            "image_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "spatial_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "bands_used": {
                "value": [
                    {
                        "value": "R",
                        "confidence": 1.0
                    },
                    {
                        "value": "G",
                        "confidence": 1.0
                    },
                    {
                        "value": "B",
                        "confidence": 1.0
                    }
                ]
            },
            "augmentations": {
                "value": null,
                "confidence": 0.0
            },
            "optimizer": {
                "value": null,
                "confidence": 0.0
            },
            "batch_size": {
                "value": null,
                "confidence": 0.0
            },
            "learning_rate": {
                "value": null,
                "confidence": 0.0
            },
            "epochs": {
                "value": null,
                "confidence": 0.0
            },
            "loss_function": {
                "value": null,
                "confidence": 0.0
            },
            "split_ratio": {
                "value": "train",
                "confidence": 1.0
            }
        },
        {
            "task": {
                "value": "Scene classification",
                "confidence": 1.0
            },
            "application": {
                "value": "Zero-shot scene classification",
                "confidence": 1.0
            },
            "dataset": {
                "value": "RSI-CB256",
                "confidence": 1.0
            },
            "metrics": {
                "value": [
                    {
                        "value": "Top-1 Accuracy",
                        "confidence": 1.0
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 50.09,
                        "confidence": 1.0
                    }
                ]
            },
            "sensor": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "regions": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "original_samples": {
                "value": 24747,
                "confidence": 1.0
            },
            "num_samples": {
                "value": 24747,
                "confidence": 1.0
            },
            "sampling_percentage": {
                "value": 100,
                "confidence": 1.0
            },
            "num_classes": {
                "value": 35,
                "confidence": 1.0
            },
            "classes": {
                "value": null,
                "confidence": 0.0
            },
            "image_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "spatial_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "bands_used": {
                "value": [
                    {
                        "value": "R",
                        "confidence": 1.0
                    },
                    {
                        "value": "G",
                        "confidence": 1.0
                    },
                    {
                        "value": "B",
                        "confidence": 1.0
                    }
                ]
            },
            "augmentations": {
                "value": null,
                "confidence": 0.0
            },
            "optimizer": {
                "value": null,
                "confidence": 0.0
            },
            "batch_size": {
                "value": null,
                "confidence": 0.0
            },
            "learning_rate": {
                "value": null,
                "confidence": 0.0
            },
            "epochs": {
                "value": null,
                "confidence": 0.0
            },
            "loss_function": {
                "value": null,
                "confidence": 0.0
            },
            "split_ratio": {
                "value": "train",
                "confidence": 1.0
            }
        },
        {
            "task": {
                "value": "Scene classification",
                "confidence": 0.8245
            },
            "application": {
                "value": "Zero-shot scene classification",
                "confidence": 0.9832
            },
            "dataset": {
                "value": "SkyScript-classification",
                "confidence": 0.9632
            },
            "metrics": {
                "value": [
                    {
                        "value": "Top-1 Accuracy",
                        "confidence": 1.0
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 59.93,
                        "confidence": 1.0
                    }
                ]
            },
            "sensor": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "regions": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "original_samples": {
                "value": 7000,
                "confidence": 0.9998
            },
            "num_samples": {
                "value": 7000,
                "confidence": 1.0
            },
            "sampling_percentage": {
                "value": 100,
                "confidence": 1.0
            },
            "num_classes": {
                "value": 70,
                "confidence": 1.0
            },
            "classes": {
                "value": null,
                "confidence": 0.0
            },
            "image_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "spatial_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "bands_used": {
                "value": [
                    {
                        "value": "R",
                        "confidence": 1.0
                    },
                    {
                        "value": "G",
                        "confidence": 1.0
                    },
                    {
                        "value": "B",
                        "confidence": 1.0
                    }
                ]
            },
            "augmentations": {
                "value": null,
                "confidence": 0.0
            },
            "optimizer": {
                "value": null,
                "confidence": 0.0
            },
            "batch_size": {
                "value": null,
                "confidence": 0.0
            },
            "learning_rate": {
                "value": null,
                "confidence": 0.0
            },
            "epochs": {
                "value": null,
                "confidence": 0.0
            },
            "loss_function": {
                "value": null,
                "confidence": 0.0
            },
            "split_ratio": {
                "value": "test",
                "confidence": 0.6775
            }
        },
        {
            "task": {
                "value": "Fine-grained attribute classification",
                "confidence": 0.6698
            },
            "application": {
                "value": "Zero-shot fine-grained classification",
                "confidence": 0.4371
            },
            "dataset": {
                "value": "Roof shape",
                "confidence": 0.7972
            },
            "metrics": {
                "value": [
                    {
                        "value": "Top-1 Accuracy",
                        "confidence": 1.0
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 46.83,
                        "confidence": 1.0
                    }
                ]
            },
            "sensor": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "regions": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "original_samples": {
                "value": null,
                "confidence": 0.0
            },
            "num_samples": {
                "value": null,
                "confidence": 0.0
            },
            "sampling_percentage": {
                "value": null,
                "confidence": 0.0
            },
            "num_classes": {
                "value": 6,
                "confidence": 1.0
            },
            "classes": {
                "value": [
                    {
                        "value": "flat",
                        "confidence": 0.9999
                    },
                    {
                        "value": "hipped",
                        "confidence": 1.0
                    },
                    {
                        "value": "gabled",
                        "confidence": 1.0
                    },
                    {
                        "value": "dome",
                        "confidence": 0.9999
                    },
                    {
                        "value": "pyramidal",
                        "confidence": 1.0
                    },
                    {
                        "value": "round",
                        "confidence": 1.0
                    }
                ]
            },
            "image_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "spatial_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "bands_used": {
                "value": [
                    {
                        "value": "R",
                        "confidence": 0.9971
                    },
                    {
                        "value": "G",
                        "confidence": 1.0
                    },
                    {
                        "value": "B",
                        "confidence": 1.0
                    }
                ]
            },
            "augmentations": {
                "value": null,
                "confidence": 0.0
            },
            "optimizer": {
                "value": null,
                "confidence": 0.0
            },
            "batch_size": {
                "value": null,
                "confidence": 0.0
            },
            "learning_rate": {
                "value": null,
                "confidence": 0.0
            },
            "epochs": {
                "value": null,
                "confidence": 0.0
            },
            "loss_function": {
                "value": null,
                "confidence": 0.0
            },
            "split_ratio": {
                "value": null,
                "confidence": 0.0
            }
        },
        {
            "task": {
                "value": "Fine-grained attribute classification",
                "confidence": 1.0
            },
            "application": {
                "value": "Zero-shot fine-grained classification",
                "confidence": 1.0
            },
            "dataset": {
                "value": "Road smoothness",
                "confidence": 0.925
            },
            "metrics": {
                "value": [
                    {
                        "value": "Top-1 Accuracy",
                        "confidence": 1.0
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 35.8,
                        "confidence": 0.8114
                    }
                ]
            },
            "sensor": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "regions": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "original_samples": {
                "value": null,
                "confidence": 0.0
            },
            "num_samples": {
                "value": null,
                "confidence": 0.0
            },
            "sampling_percentage": {
                "value": null,
                "confidence": 0.0
            },
            "num_classes": {
                "value": 5,
                "confidence": 1.0
            },
            "classes": {
                "value": [
                    {
                        "value": "excellent",
                        "confidence": 0.9989
                    },
                    {
                        "value": "good",
                        "confidence": 1.0
                    },
                    {
                        "value": "intermediate",
                        "confidence": 0.9999
                    },
                    {
                        "value": "bad",
                        "confidence": 1.0
                    },
                    {
                        "value": "very bad",
                        "confidence": 1.0
                    }
                ]
            },
            "image_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "spatial_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "bands_used": {
                "value": [
                    {
                        "value": "R",
                        "confidence": 1.0
                    },
                    {
                        "value": "G",
                        "confidence": 1.0
                    },
                    {
                        "value": "B",
                        "confidence": 1.0
                    }
                ]
            },
            "augmentations": {
                "value": null,
                "confidence": 0.0
            },
            "optimizer": {
                "value": null,
                "confidence": 0.0
            },
            "batch_size": {
                "value": null,
                "confidence": 0.0
            },
            "learning_rate": {
                "value": null,
                "confidence": 0.0
            },
            "epochs": {
                "value": null,
                "confidence": 0.0
            },
            "loss_function": {
                "value": null,
                "confidence": 0.0
            },
            "split_ratio": {
                "value": null,
                "confidence": 0.0
            }
        },
        {
            "task": {
                "value": "Fine-grained attribute classification",
                "confidence": 1.0
            },
            "application": {
                "value": "Zero-shot fine-grained classification",
                "confidence": 1.0
            },
            "dataset": {
                "value": "Road surface",
                "confidence": 0.9998
            },
            "metrics": {
                "value": [
                    {
                        "value": "Top-1 Accuracy",
                        "confidence": 1.0
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 67.5,
                        "confidence": 0.998
                    }
                ]
            },
            "sensor": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "regions": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "original_samples": {
                "value": null,
                "confidence": 0.0
            },
            "num_samples": {
                "value": null,
                "confidence": 0.0
            },
            "sampling_percentage": {
                "value": null,
                "confidence": 0.0
            },
            "num_classes": {
                "value": 5,
                "confidence": 1.0
            },
            "classes": {
                "value": [
                    {
                        "value": "asphalt",
                        "confidence": 1.0
                    },
                    {
                        "value": "concrete",
                        "confidence": 0.9999
                    },
                    {
                        "value": "grass",
                        "confidence": 0.9986
                    },
                    {
                        "value": "gravel",
                        "confidence": 1.0
                    },
                    {
                        "value": "sand",
                        "confidence": 1.0
                    }
                ]
            },
            "image_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "spatial_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "bands_used": {
                "value": [
                    {
                        "value": "R",
                        "confidence": 1.0
                    },
                    {
                        "value": "G",
                        "confidence": 1.0
                    },
                    {
                        "value": "B",
                        "confidence": 1.0
                    }
                ]
            },
            "augmentations": {
                "value": null,
                "confidence": 0.0
            },
            "optimizer": {
                "value": null,
                "confidence": 0.0
            },
            "batch_size": {
                "value": null,
                "confidence": 0.0
            },
            "learning_rate": {
                "value": null,
                "confidence": 0.0
            },
            "epochs": {
                "value": null,
                "confidence": 0.0
            },
            "loss_function": {
                "value": null,
                "confidence": 0.0
            },
            "split_ratio": {
                "value": null,
                "confidence": 0.0
            }
        },
        {
            "task": {
                "value": "Cross-modal retrieval",
                "confidence": 0.9974
            },
            "application": {
                "value": "Zero-shot cross-modal retrieval",
                "confidence": 0.5124
            },
            "dataset": {
                "value": "SkyScript-retrieval",
                "confidence": 0.92
            },
            "metrics": {
                "value": [
                    {
                        "value": "Mean Recall (img2txt)",
                        "confidence": 0.4841
                    },
                    {
                        "value": "Mean Recall (txt2img)",
                        "confidence": 1.0
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 8.53,
                        "confidence": 0.9985
                    },
                    {
                        "value": 7.73,
                        "confidence": 1.0
                    }
                ]
            },
            "sensor": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "regions": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "original_samples": {
                "value": 30000,
                "confidence": 1.0
            },
            "num_samples": {
                "value": 30000,
                "confidence": 1.0
            },
            "sampling_percentage": {
                "value": 100,
                "confidence": 1.0
            },
            "num_classes": {
                "value": null,
                "confidence": 0.0
            },
            "classes": {
                "value": null,
                "confidence": 0.0
            },
            "image_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "spatial_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "bands_used": {
                "value": [
                    {
                        "value": "R",
                        "confidence": 0.994
                    },
                    {
                        "value": "G",
                        "confidence": 1.0
                    },
                    {
                        "value": "B",
                        "confidence": 1.0
                    }
                ]
            },
            "augmentations": {
                "value": null,
                "confidence": 0.0
            },
            "optimizer": {
                "value": null,
                "confidence": 0.0
            },
            "batch_size": {
                "value": null,
                "confidence": 0.0
            },
            "learning_rate": {
                "value": null,
                "confidence": 0.0
            },
            "epochs": {
                "value": null,
                "confidence": 0.0
            },
            "loss_function": {
                "value": null,
                "confidence": 0.0
            },
            "split_ratio": {
                "value": "test",
                "confidence": 0.9665
            }
        },
        {
            "task": {
                "value": "Cross-modal retrieval",
                "confidence": 1.0
            },
            "application": {
                "value": "Zero-shot cross-modal retrieval",
                "confidence": 1.0
            },
            "dataset": {
                "value": "RSICD",
                "confidence": 0.9953
            },
            "metrics": {
                "value": [
                    {
                        "value": "Mean Recall (img2txt)",
                        "confidence": 0.9997
                    },
                    {
                        "value": "Mean Recall (txt2img)",
                        "confidence": 1.0
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 23.7,
                        "confidence": 0.9858
                    },
                    {
                        "value": 19.97,
                        "confidence": 1.0
                    }
                ]
            },
            "sensor": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "regions": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "original_samples": {
                "value": null,
                "confidence": 0.0
            },
            "num_samples": {
                "value": null,
                "confidence": 0.0
            },
            "sampling_percentage": {
                "value": null,
                "confidence": 0.0
            },
            "num_classes": {
                "value": null,
                "confidence": 0.0
            },
            "classes": {
                "value": null,
                "confidence": 0.0
            },
            "image_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "spatial_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "bands_used": {
                "value": [
                    {
                        "value": "R",
                        "confidence": 0.9982
                    },
                    {
                        "value": "G",
                        "confidence": 1.0
                    },
                    {
                        "value": "B",
                        "confidence": 1.0
                    }
                ]
            },
            "augmentations": {
                "value": null,
                "confidence": 0.0
            },
            "optimizer": {
                "value": null,
                "confidence": 0.0
            },
            "batch_size": {
                "value": null,
                "confidence": 0.0
            },
            "learning_rate": {
                "value": null,
                "confidence": 0.0
            },
            "epochs": {
                "value": null,
                "confidence": 0.0
            },
            "loss_function": {
                "value": null,
                "confidence": 0.0
            },
            "split_ratio": {
                "value": null,
                "confidence": 0.0
            }
        },
        {
            "task": {
                "value": "Cross-modal retrieval",
                "confidence": 1.0
            },
            "application": {
                "value": "Zero-shot cross-modal retrieval",
                "confidence": 1.0
            },
            "dataset": {
                "value": "RSITMD",
                "confidence": 1.0
            },
            "metrics": {
                "value": [
                    {
                        "value": "Mean Recall (img2txt)",
                        "confidence": 1.0
                    },
                    {
                        "value": "Mean Recall (txt2img)",
                        "confidence": 1.0
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 30.75,
                        "confidence": 1.0
                    },
                    {
                        "value": 30.58,
                        "confidence": 1.0
                    }
                ]
            },
            "sensor": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "regions": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "original_samples": {
                "value": null,
                "confidence": 0.0
            },
            "num_samples": {
                "value": null,
                "confidence": 0.0
            },
            "sampling_percentage": {
                "value": null,
                "confidence": 0.0
            },
            "num_classes": {
                "value": null,
                "confidence": 0.0
            },
            "classes": {
                "value": null,
                "confidence": 0.0
            },
            "image_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "spatial_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "bands_used": {
                "value": [
                    {
                        "value": "R",
                        "confidence": 1.0
                    },
                    {
                        "value": "G",
                        "confidence": 1.0
                    },
                    {
                        "value": "B",
                        "confidence": 1.0
                    }
                ]
            },
            "augmentations": {
                "value": null,
                "confidence": 0.0
            },
            "optimizer": {
                "value": null,
                "confidence": 0.0
            },
            "batch_size": {
                "value": null,
                "confidence": 0.0
            },
            "learning_rate": {
                "value": null,
                "confidence": 0.0
            },
            "epochs": {
                "value": null,
                "confidence": 0.0
            },
            "loss_function": {
                "value": null,
                "confidence": 0.0
            },
            "split_ratio": {
                "value": null,
                "confidence": 0.0
            }
        },
        {
            "task": {
                "value": "Cross-modal retrieval",
                "confidence": 1.0
            },
            "application": {
                "value": "Zero-shot cross-modal retrieval",
                "confidence": 1.0
            },
            "dataset": {
                "value": "UCM-Captions",
                "confidence": 0.9987
            },
            "metrics": {
                "value": [
                    {
                        "value": "Mean Recall (img2txt)",
                        "confidence": 1.0
                    },
                    {
                        "value": "Mean Recall (txt2img)",
                        "confidence": 1.0
                    }
                ]
            },
            "metrics_value": {
                "value": [
                    {
                        "value": 72.22,
                        "confidence": 1.0
                    },
                    {
                        "value": 59.33,
                        "confidence": 1.0
                    }
                ]
            },
            "sensor": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "regions": {
                "value": [
                    {
                        "value": null,
                        "confidence": 0.3
                    }
                ]
            },
            "original_samples": {
                "value": null,
                "confidence": 0.0
            },
            "num_samples": {
                "value": null,
                "confidence": 0.0
            },
            "sampling_percentage": {
                "value": null,
                "confidence": 0.0
            },
            "num_classes": {
                "value": null,
                "confidence": 0.0
            },
            "classes": {
                "value": null,
                "confidence": 0.0
            },
            "image_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "spatial_resolution": {
                "value": null,
                "confidence": 0.0
            },
            "bands_used": {
                "value": [
                    {
                        "value": "R",
                        "confidence": 1.0
                    },
                    {
                        "value": "G",
                        "confidence": 1.0
                    },
                    {
                        "value": "B",
                        "confidence": 1.0
                    }
                ]
            },
            "augmentations": {
                "value": null,
                "confidence": 0.0
            },
            "optimizer": {
                "value": null,
                "confidence": 0.0
            },
            "batch_size": {
                "value": null,
                "confidence": 0.0
            },
            "learning_rate": {
                "value": null,
                "confidence": 0.0
            },
            "epochs": {
                "value": null,
                "confidence": 0.0
            },
            "loss_function": {
                "value": null,
                "confidence": 0.0
            },
            "split_ratio": {
                "value": null,
                "confidence": 0.0
            }
        }
    ]
}