{"@context": {"@language": "en", "@vocab": "https://schema.org/", "arrayShape": "cr:arrayShape", "citeAs": "cr:citeAs", "column": "cr:column", "conformsTo": "dct:conformsTo", "cr": "http://mlcommons.org/croissant/", "data": {"@id": "cr:data", "@type": "@json"}, "dataBiases": "cr:dataBiases", "dataCollection": "cr:dataCollection", "dataType": {"@id": "cr:dataType", "@type": "@vocab"}, "dct": "http://purl.org/dc/terms/", "extract": "cr:extract", "field": "cr:field", "fileProperty": "cr:fileProperty", "fileObject": "cr:fileObject", "fileSet": "cr:fileSet", "format": "cr:format", "includes": "cr:includes", "isArray": "cr:isArray", "isLiveDataset": "cr:isLiveDataset", "jsonPath": "cr:jsonPath", "key": "cr:key", "md5": "cr:md5", "parentField": "cr:parentField", "path": "cr:path", "personalSensitiveInformation": "cr:personalSensitiveInformation", "recordSet": "cr:recordSet", "references": "cr:references", "regex": "cr:regex", "repeated": "cr:repeated", "replace": "cr:replace", "sc": "https://schema.org/", "separator": "cr:separator", "source": "cr:source", "subField": "cr:subField", "transform": "cr:transform"}, "@type": "sc:Dataset", "distribution": [{"@type": "cr:FileObject", "@id": "repo", "name": "repo", "description": "The Hugging Face git repository.", "contentUrl": "https://huggingface.co/datasets/colabfit/Alex-MP-20_Polymorph_Split/tree/refs%2Fconvert%2Fparquet", "encodingFormat": "git+https", "sha256": "https://github.com/mlcommons/croissant/issues/80"}, {"@type": "cr:FileSet", "@id": "parquet-files-for-config-default", "containedIn": {"@id": "repo"}, "encodingFormat": "application/x-parquet", "includes": "default/*/*.parquet"}], "recordSet": [{"@type": "cr:RecordSet", "dataType": "cr:Split", "key": {"@id": "default_splits/split_name"}, "@id": "default_splits", "name": "default_splits", "description": "Splits for the default config.", "field": [{"@type": "cr:Field", "@id": "default_splits/split_name", "dataType": "sc:Text"}], "data": [{"default_splits/split_name": "train"}, {"default_splits/split_name": "val"}, {"default_splits/split_name": "test"}]}, {"@type": "cr:RecordSet", "@id": "default", "description": "colabfit/Alex-MP-20_Polymorph_Split - 'default' subset\n\nAdditional information:\n- 3 splits: train, val, test", "field": [{"@type": "cr:Field", "@id": "default/split", "dataType": "sc:Text", "source": {"fileSet": {"@id": "parquet-files-for-config-default"}, "extract": {"fileProperty": "fullpath"}, "transform": {"regex": "default/(?:partial-)?(train|val|test)/.+parquet$"}}, "references": {"field": {"@id": "default_splits/split_name"}}}, {"@type": "cr:Field", "@id": "default/positions", "dataType": "cr:Float64", "source": {"fileSet": {"@id": "parquet-files-for-config-default"}, "extract": {"column": "positions"}}, "isArray": true, "arrayShape": "-1"}, {"@type": "cr:Field", "@id": "default/cell", "dataType": "cr:Float64", "source": {"fileSet": {"@id": "parquet-files-for-config-default"}, "extract": {"column": "cell"}}, "isArray": true, "arrayShape": "-1"}, {"@type": "cr:Field", "@id": "default/atomic_numbers", "dataType": "cr:Int64", "source": {"fileSet": {"@id": "parquet-files-for-config-default"}, "extract": {"column": "atomic_numbers"}}, "isArray": true, "arrayShape": "-1"}, {"@type": "cr:Field", "@id": "default/pbc", "dataType": "cr:Int64", "source": {"fileSet": {"@id": "parquet-files-for-config-default"}, "extract": {"column": "pbc"}}, "isArray": true, "arrayShape": "-1"}, {"@type": "cr:Field", "@id": "default/material_id", "dataType": "sc:Text", "source": {"fileSet": {"@id": "parquet-files-for-config-default"}, "extract": {"column": "material_id"}}}, {"@type": "cr:Field", "@id": "default/reduced_formula", "dataType": "sc:Text", "source": {"fileSet": {"@id": "parquet-files-for-config-default"}, "extract": {"column": "reduced_formula"}}}, {"@type": "cr:Field", "@id": "default/space_group", "dataType": "sc:Text", "source": {"fileSet": {"@id": "parquet-files-for-config-default"}, "extract": {"column": "space_group"}}}, {"@type": "cr:Field", "@id": "default/chemical_system", "dataType": "sc:Text", "source": {"fileSet": {"@id": "parquet-files-for-config-default"}, "extract": {"column": "chemical_system"}}}, {"@type": "cr:Field", "@id": "default/num_sites", "dataType": "cr:Int64", "source": {"fileSet": {"@id": "parquet-files-for-config-default"}, "extract": {"column": "num_sites"}}}, {"@type": "cr:Field", "@id": "default/cif", "dataType": "sc:Text", "source": {"fileSet": {"@id": "parquet-files-for-config-default"}, "extract": {"column": "cif"}}}, {"@type": "cr:Field", "@id": "default/energy_above_hull", "dataType": "cr:Float64", "source": {"fileSet": {"@id": "parquet-files-for-config-default"}, "extract": {"column": "energy_above_hull"}}}, {"@type": "cr:Field", "@id": "default/dft_band_gap", "dataType": "cr:Float64", "source": {"fileSet": {"@id": "parquet-files-for-config-default"}, "extract": {"column": "dft_band_gap"}}}, {"@type": "cr:Field", "@id": "default/dft_bulk_modulus", "dataType": "cr:Float64", "source": {"fileSet": {"@id": "parquet-files-for-config-default"}, "extract": {"column": "dft_bulk_modulus"}}}, {"@type": "cr:Field", "@id": "default/dft_mag_density", "dataType": "cr:Float64", "source": {"fileSet": {"@id": "parquet-files-for-config-default"}, "extract": {"column": "dft_mag_density"}}}, {"@type": "cr:Field", "@id": "default/hhi_score", "dataType": "cr:Float64", "source": {"fileSet": {"@id": "parquet-files-for-config-default"}, "extract": {"column": "hhi_score"}}}, {"@type": "cr:Field", "@id": "default/ml_bulk_modulus", "dataType": "cr:Float64", "source": {"fileSet": {"@id": "parquet-files-for-config-default"}, "extract": {"column": "ml_bulk_modulus"}}}]}], "conformsTo": "http://mlcommons.org/croissant/1.1", "name": "Alex-MP-20_Polymorph_Split", "description": "\n\t\n\t\t\n\t\tDataset Name\n\t\n\nAlex-MP-20-polymorph-split  \n\n\t\n\t\t\n\t\tCitation\n\t\n\nPlease cite Martirossyan et al. (https://arxiv.org/abs/2509.12178) if your work utilizes this dataset.  \n\n\t\n\t\t\n\t\tDescription\n\t\n\nA new split for performing crystal structure prediction on the Alex-MP-20 dataset (https://github.com/microsoft/mattergen/tree/main/data-release/alex-mp) which contains structures from MP-20 (Jain 2013, doi: 10.1063/1.4812323) and Alexandria (Schmidt 2022, doi: 10.24435/materialscloud:m7-50).This\u2026 See the full description on the dataset page: https://huggingface.co/datasets/colabfit/Alex-MP-20_Polymorph_Split.", "alternateName": ["colabfit/Alex-MP-20_Polymorph_Split", "Alex-MP-20-polymorph-split"], "creator": {"@type": "Organization", "name": "ColabFit", "url": "https://huggingface.co/colabfit"}, "keywords": ["cc-by-4.0", "100K - 1M", "parquet", "Tabular", "Text", "Datasets", "pandas", "Croissant", "Polars", "arxiv:2509.12178", "\ud83c\uddfa\ud83c\uddf8 Region: US", "generative modeling", "materials discovery", "DFT"], "license": "https://choosealicense.com/licenses/cc-by-4.0/", "url": "https://huggingface.co/datasets/colabfit/Alex-MP-20_Polymorph_Split"}