{
    "@context": {
        "@language": "en",
        "@vocab": "https://schema.org/",
        "arrayShape": "cr:arrayShape",
        "citeAs": "cr:citeAs",
        "column": "cr:column",
        "conformsTo": "dct:conformsTo",
        "cr": "http://mlcommons.org/croissant/",
        "data": {
            "@id": "cr:data",
            "@type": "@json"
        },
        "dataBiases": "cr:dataBiases",
        "dataCollection": "cr:dataCollection",
        "dataType": {
            "@id": "cr:dataType",
            "@type": "@vocab"
        },
        "dct": "http://purl.org/dc/terms/",
        "extract": "cr:extract",
        "field": "cr:field",
        "fileProperty": "cr:fileProperty",
        "fileObject": "cr:fileObject",
        "fileSet": "cr:fileSet",
        "format": "cr:format",
        "includes": "cr:includes",
        "isArray": "cr:isArray",
        "isLiveDataset": "cr:isLiveDataset",
        "jsonPath": "cr:jsonPath",
        "key": "cr:key",
        "md5": "cr:md5",
        "parentField": "cr:parentField",
        "path": "cr:path",
        "personalSensitiveInformation": "cr:personalSensitiveInformation",
        "recordSet": "cr:recordSet",
        "references": "cr:references",
        "regex": "cr:regex",
        "repeated": "cr:repeated",
        "replace": "cr:replace",
        "sc": "https://schema.org/",
        "separator": "cr:separator",
        "source": "cr:source",
        "subField": "cr:subField",
        "transform": "cr:transform"
    },
    "@type": "sc:Dataset",
    "distribution": [
        {
            "@type": "cr:FileObject",
            "@id": "repo",
            "name": "repo",
            "description": "The Hugging Face git repository.",
            "contentUrl": "https://huggingface.co/datasets/colabfit/perov-5_polymorph_split/tree/refs%2Fconvert%2Fparquet",
            "encodingFormat": "git+https",
            "sha256": "https://github.com/mlcommons/croissant/issues/80"
        },
        {
            "@type": "cr:FileSet",
            "@id": "parquet-files-for-config-default",
            "containedIn": {
                "@id": "repo"
            },
            "encodingFormat": "application/x-parquet",
            "includes": "default/*/*.parquet"
        }
    ],
    "recordSet": [
        {
            "@type": "cr:RecordSet",
            "dataType": "cr:Split",
            "key": {
                "@id": "default_splits/split_name"
            },
            "@id": "default_splits",
            "name": "default_splits",
            "description": "Splits for the default config.",
            "field": [
                {
                    "@type": "cr:Field",
                    "@id": "default_splits/split_name",
                    "dataType": "sc:Text"
                }
            ],
            "data": [
                {
                    "default_splits/split_name": "train"
                },
                {
                    "default_splits/split_name": "test"
                },
                {
                    "default_splits/split_name": "val"
                }
            ]
        },
        {
            "@type": "cr:RecordSet",
            "@id": "default",
            "description": "colabfit/perov-5_polymorph_split - 'default' subset\n\nAdditional information:\n- 3 splits: train, test, val",
            "field": [
                {
                    "@type": "cr:Field",
                    "@id": "default/split",
                    "dataType": "sc:Text",
                    "source": {
                        "fileSet": {
                            "@id": "parquet-files-for-config-default"
                        },
                        "extract": {
                            "fileProperty": "fullpath"
                        },
                        "transform": {
                            "regex": "default/(?:partial-)?(train|test|val)/.+parquet$"
                        }
                    },
                    "references": {
                        "field": {
                            "@id": "default_splits/split_name"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "default/id",
                    "dataType": "sc:Text",
                    "source": {
                        "fileSet": {
                            "@id": "parquet-files-for-config-default"
                        },
                        "extract": {
                            "column": "id"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "default/hash",
                    "dataType": "sc:Text",
                    "source": {
                        "fileSet": {
                            "@id": "parquet-files-for-config-default"
                        },
                        "extract": {
                            "column": "hash"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "default/last_modified",
                    "dataType": "sc:Date",
                    "source": {
                        "fileSet": {
                            "@id": "parquet-files-for-config-default"
                        },
                        "extract": {
                            "column": "last_modified"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "default/dataset_ids",
                    "dataType": "sc:Text",
                    "source": {
                        "fileSet": {
                            "@id": "parquet-files-for-config-default"
                        },
                        "extract": {
                            "column": "dataset_ids"
                        }
                    },
                    "isArray": true,
                    "arrayShape": "-1"
                },
                {
                    "@type": "cr:Field",
                    "@id": "default/chemical_formula_hill",
                    "dataType": "sc:Text",
                    "source": {
                        "fileSet": {
                            "@id": "parquet-files-for-config-default"
                        },
                        "extract": {
                            "column": "chemical_formula_hill"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "default/chemical_formula_reduced",
                    "dataType": "sc:Text",
                    "source": {
                        "fileSet": {
                            "@id": "parquet-files-for-config-default"
                        },
                        "extract": {
                            "column": "chemical_formula_reduced"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "default/chemical_formula_anonymous",
                    "dataType": "sc:Text",
                    "source": {
                        "fileSet": {
                            "@id": "parquet-files-for-config-default"
                        },
                        "extract": {
                            "column": "chemical_formula_anonymous"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "default/elements",
                    "dataType": "sc:Text",
                    "source": {
                        "fileSet": {
                            "@id": "parquet-files-for-config-default"
                        },
                        "extract": {
                            "column": "elements"
                        }
                    },
                    "isArray": true,
                    "arrayShape": "-1"
                },
                {
                    "@type": "cr:Field",
                    "@id": "default/elements_ratios",
                    "dataType": "cr:Float64",
                    "source": {
                        "fileSet": {
                            "@id": "parquet-files-for-config-default"
                        },
                        "extract": {
                            "column": "elements_ratios"
                        }
                    },
                    "isArray": true,
                    "arrayShape": "-1"
                },
                {
                    "@type": "cr:Field",
                    "@id": "default/atomic_numbers",
                    "dataType": "cr:Int32",
                    "source": {
                        "fileSet": {
                            "@id": "parquet-files-for-config-default"
                        },
                        "extract": {
                            "column": "atomic_numbers"
                        }
                    },
                    "isArray": true,
                    "arrayShape": "-1"
                },
                {
                    "@type": "cr:Field",
                    "@id": "default/nsites",
                    "dataType": "cr:Int32",
                    "source": {
                        "fileSet": {
                            "@id": "parquet-files-for-config-default"
                        },
                        "extract": {
                            "column": "nsites"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "default/nelements",
                    "dataType": "cr:Int32",
                    "source": {
                        "fileSet": {
                            "@id": "parquet-files-for-config-default"
                        },
                        "extract": {
                            "column": "nelements"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "default/nperiodic_dimensions",
                    "dataType": "cr:Int32",
                    "source": {
                        "fileSet": {
                            "@id": "parquet-files-for-config-default"
                        },
                        "extract": {
                            "column": "nperiodic_dimensions"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "default/cell",
                    "dataType": "cr:Float64",
                    "source": {
                        "fileSet": {
                            "@id": "parquet-files-for-config-default"
                        },
                        "extract": {
                            "column": "cell"
                        }
                    },
                    "isArray": true,
                    "arrayShape": "-1,-1"
                },
                {
                    "@type": "cr:Field",
                    "@id": "default/dimension_types",
                    "dataType": "cr:Int32",
                    "source": {
                        "fileSet": {
                            "@id": "parquet-files-for-config-default"
                        },
                        "extract": {
                            "column": "dimension_types"
                        }
                    },
                    "isArray": true,
                    "arrayShape": "-1"
                },
                {
                    "@type": "cr:Field",
                    "@id": "default/pbc",
                    "dataType": "sc:Boolean",
                    "source": {
                        "fileSet": {
                            "@id": "parquet-files-for-config-default"
                        },
                        "extract": {
                            "column": "pbc"
                        }
                    },
                    "isArray": true,
                    "arrayShape": "-1"
                },
                {
                    "@type": "cr:Field",
                    "@id": "default/names",
                    "dataType": "sc:Text",
                    "source": {
                        "fileSet": {
                            "@id": "parquet-files-for-config-default"
                        },
                        "extract": {
                            "column": "names"
                        }
                    },
                    "isArray": true,
                    "arrayShape": "-1"
                },
                {
                    "@type": "cr:Field",
                    "@id": "default/labels",
                    "dataType": "sc:Text",
                    "source": {
                        "fileSet": {
                            "@id": "parquet-files-for-config-default"
                        },
                        "extract": {
                            "column": "labels"
                        }
                    },
                    "isArray": true,
                    "arrayShape": "-1"
                },
                {
                    "@type": "cr:Field",
                    "@id": "default/metadata_id",
                    "dataType": "sc:Text",
                    "source": {
                        "fileSet": {
                            "@id": "parquet-files-for-config-default"
                        },
                        "extract": {
                            "column": "metadata_id"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "default/metadata_path",
                    "dataType": "sc:Text",
                    "source": {
                        "fileSet": {
                            "@id": "parquet-files-for-config-default"
                        },
                        "extract": {
                            "column": "metadata_path"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "default/metadata_size",
                    "dataType": "cr:Int32",
                    "source": {
                        "fileSet": {
                            "@id": "parquet-files-for-config-default"
                        },
                        "extract": {
                            "column": "metadata_size"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "default/structure_hash",
                    "dataType": "sc:Text",
                    "source": {
                        "fileSet": {
                            "@id": "parquet-files-for-config-default"
                        },
                        "extract": {
                            "column": "structure_hash"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "default/positions_00",
                    "dataType": "cr:Float64",
                    "source": {
                        "fileSet": {
                            "@id": "parquet-files-for-config-default"
                        },
                        "extract": {
                            "column": "positions_00"
                        }
                    },
                    "isArray": true,
                    "arrayShape": "-1,-1"
                }
            ]
        }
    ],
    "conformsTo": "http://mlcommons.org/croissant/1.1",
    "name": "perov-5_polymorph_split",
    "description": "\n\t\n\t\t\n\t\tDataset Name\n\t\n\nPerov-5 Polymorph Split  \n\n\t\n\t\t\n\t\tDescription\n\t\n\nA new split of the perov-5 dataset which contains 18,928 perovskite structures within which there are only 9,646 unique compositions (9,282 compositions show up twice in the dataset, while 364 compositions show up once). The new splits ensure that polymorph pairs are grouped within the training, validation, and test sets.The following dataset split has been cultivated from the perov-5 (Castelli 2011, doi:… See the full description on the dataset page: https://huggingface.co/datasets/colabfit/perov-5_polymorph_split.",
    "alternateName": [
        "colabfit/perov-5_polymorph_split",
        "Perov-5 Polymorph Split"
    ],
    "creator": {
        "@type": "Organization",
        "name": "ColabFit",
        "url": "https://huggingface.co/colabfit"
    },
    "keywords": [
        "cc-by-4.0",
        "10K - 100K",
        "parquet",
        "Tabular",
        "Text",
        "Datasets",
        "pandas",
        "Croissant",
        "Polars",
        "🇺🇸 Region: US",
        "generative modeling",
        "materials discovery",
        "DFT"
    ],
    "license": "https://choosealicense.com/licenses/cc-by-4.0/",
    "url": "https://huggingface.co/datasets/colabfit/perov-5_polymorph_split"
}