{
    "id": 3,
    "domain": "machine learning",
    "datasets": [
        {
            "name": "data.csv",
            "description": "Related to algorithms, data patterns, and artificial intelligence.",
            "columns": [
                {
                    "name": "application_type",
                    "description": "Type of application using the model"
                },
                {
                    "name": "preprocessing_complexity",
                    "description": "Represents the number of preprocessing steps required before model training."
                },
                {
                    "name": "uses_regularization",
                    "description": "Whether the model uses regularization techniques"
                },
                {
                    "name": "average_user_rating",
                    "description": "Average rating provided by users on the complexity and utility of the dataset."
                },
                {
                    "name": "requires_normalization",
                    "description": "Indicates whether the dataset requires normalization."
                },
                {
                    "name": "pretrained_models_count",
                    "description": "Number of pretrained models integrated into the dataset creation process"
                },
                {
                    "name": "data_update_frequency",
                    "description": "Frequency of data updates needed by the application, measured in updates per minute"
                },
                {
                    "name": "algorithm_depth",
                    "description": "Depth of the trees or layers in the algorithm"
                },
                {
                    "name": "data_diversity_score",
                    "description": "Numerical measure of the diversity of the training dataset"
                },
                {
                    "name": "feature_extraction_technique",
                    "description": "Type of feature extraction applied in the model"
                },
                {
                    "name": "includes_time_series",
                    "description": "Indicates if the dataset includes time-series data"
                },
                {
                    "name": "data_noise_level",
                    "description": "Amount of noise in the input data, rated on a scale"
                },
                {
                    "name": "training_cycles",
                    "description": "Number of cycles to train the model"
                },
                {
                    "name": "incorporates_AI_enhancements",
                    "description": "Indicates if the model incorporates artificial intelligence enhancements"
                },
                {
                    "name": "spans_multiple_time_zones",
                    "description": "Indicates if the data collection covers multiple time zones"
                },
                {
                    "name": "uses_feature_scaling",
                    "description": "Indicates whether feature scaling is applied to the input features"
                },
                {
                    "name": "response_urgency",
                    "description": "Categorical urgency of responding to data input, ranging from low to critical"
                },
                {
                    "name": "algorithm_type",
                    "description": "Type of machine learning algorithm used"
                },
                {
                    "name": "has_special_characters_in_name",
                    "description": "Indicates if the dataset name includes special characters (e.g., @, #, $)."
                },
                {
                    "name": "input_sensitivity",
                    "description": "Measure of how much output changes in response to changes in input"
                },
                {
                    "name": "percentage_of_binary_features",
                    "description": "The percentage of features in the dataset that are binary."
                },
                {
                    "name": "adaptive_learning",
                    "description": "Indicates if the model can adaptively learn from incoming data in real-time"
                },
                {
                    "name": "algorithm_upgrade_threshold",
                    "description": "Minimum threshold of improvements needed for considering an algorithm upgrade"
                },
                {
                    "name": "computation_load_ratio",
                    "description": "Ratio of system's computation power to model's computation demand"
                },
                {
                    "name": "has_missing_values",
                    "description": "Indicates whether the dataset has missing values."
                },
                {
                    "name": "includes_image_data",
                    "description": "Indicates if the dataset includes image data"
                },
                {
                    "name": "model_accuracy",
                    "description": "Accuracy of the machine learning model"
                },
                {
                    "name": "performance_stability",
                    "description": "Measure of how stable the model's performance is over different datasets"
                },
                {
                    "name": "data_coverage_percentage",
                    "description": "Percentage of total possible data points actually present in the dataset."
                },
                {
                    "name": "requires_complex_features",
                    "description": "Indicates if the dataset requires complex feature types like polynomial or interaction features to model."
                },
                {
                    "name": "model_complexity",
                    "description": "Categorical measure of model complexity (low, medium, high)"
                },
                {
                    "name": "data_size",
                    "description": "Size of the dataset used"
                },
                {
                    "name": "computational_complexity_percentile",
                    "description": "Percentile ranking of models based on computational complexity"
                },
                {
                    "name": "uses_dropout",
                    "description": "Indicates whether dropout regularization is used in the model"
                },
                {
                    "name": "dataset_size",
                    "description": "Represents the total number of records in the dataset."
                },
                {
                    "name": "data_type_diversity",
                    "description": "Counts the number of unique data types used in the dataset"
                }
            ]
        }
    ],
    "queries": [
        {
            "qid": 381,
            "question": "What is the relationship between the diversity of data types, the size of the dataset, and the need for normalization in machine learning datasets?",
            "question_type": "general",
            "difficulty": 1
        }
    ]
}