{
    "@context": {
        "@language": "en",
        "@vocab": "https://schema.org/",
        "arrayShape": "cr:arrayShape",
        "citeAs": "cr:citeAs",
        "column": "cr:column",
        "conformsTo": "dct:conformsTo",
        "cr": "http://mlcommons.org/croissant/",
        "data": {
            "@id": "cr:data",
            "@type": "@json"
        },
        "dataBiases": "cr:dataBiases",
        "dataCollection": "cr:dataCollection",
        "dataType": {
            "@id": "cr:dataType",
            "@type": "@vocab"
        },
        "dct": "http://purl.org/dc/terms/",
        "extract": "cr:extract",
        "field": "cr:field",
        "fileProperty": "cr:fileProperty",
        "fileObject": "cr:fileObject",
        "fileSet": "cr:fileSet",
        "format": "cr:format",
        "includes": "cr:includes",
        "isArray": "cr:isArray",
        "isLiveDataset": "cr:isLiveDataset",
        "jsonPath": "cr:jsonPath",
        "key": "cr:key",
        "md5": "cr:md5",
        "parentField": "cr:parentField",
        "path": "cr:path",
        "personalSensitiveInformation": "cr:personalSensitiveInformation",
        "recordSet": "cr:recordSet",
        "references": "cr:references",
        "regex": "cr:regex",
        "repeated": "cr:repeated",
        "replace": "cr:replace",
        "sc": "https://schema.org/",
        "separator": "cr:separator",
        "source": "cr:source",
        "subField": "cr:subField",
        "transform": "cr:transform"
    },
    "@type": "sc:Dataset",
    "distribution": [
        {
            "@type": "cr:FileObject",
            "@id": "repo",
            "name": "repo",
            "description": "The Hugging Face git repository.",
            "contentUrl": "https://huggingface.co/datasets/grandiflorum/LithoSim/tree/refs%2Fconvert%2Fparquet",
            "encodingFormat": "git+https",
            "sha256": "https://github.com/mlcommons/croissant/issues/80"
        },
        {
            "@type": "cr:FileSet",
            "@id": "parquet-files-for-config-metal",
            "containedIn": {
                "@id": "repo"
            },
            "encodingFormat": "application/x-parquet",
            "includes": "metal/*/*.parquet"
        },
        {
            "@type": "cr:FileSet",
            "@id": "parquet-files-for-config-ood",
            "containedIn": {
                "@id": "repo"
            },
            "encodingFormat": "application/x-parquet",
            "includes": "ood/*/*.parquet"
        },
        {
            "@type": "cr:FileSet",
            "@id": "parquet-files-for-config-opc_metal",
            "containedIn": {
                "@id": "repo"
            },
            "encodingFormat": "application/x-parquet",
            "includes": "opc_metal/*/*.parquet"
        },
        {
            "@type": "cr:FileSet",
            "@id": "parquet-files-for-config-opc_via",
            "containedIn": {
                "@id": "repo"
            },
            "encodingFormat": "application/x-parquet",
            "includes": "opc_via/*/*.parquet"
        },
        {
            "@type": "cr:FileSet",
            "@id": "parquet-files-for-config-via",
            "containedIn": {
                "@id": "repo"
            },
            "encodingFormat": "application/x-parquet",
            "includes": "via/*/*.parquet"
        }
    ],
    "recordSet": [
        {
            "@type": "cr:RecordSet",
            "dataType": "cr:Split",
            "key": {
                "@id": "metal_splits/split_name"
            },
            "@id": "metal_splits",
            "name": "metal_splits",
            "description": "Splits for the metal config.",
            "field": [
                {
                    "@type": "cr:Field",
                    "@id": "metal_splits/split_name",
                    "dataType": "sc:Text"
                }
            ],
            "data": [
                {
                    "metal_splits/split_name": "train"
                },
                {
                    "metal_splits/split_name": "test"
                }
            ]
        },
        {
            "@type": "cr:RecordSet",
            "@id": "metal",
            "description": "grandiflorum/LithoSim - 'metal' subset\n\nAdditional information:\n- 2 splits: train, test",
            "field": [
                {
                    "@type": "cr:Field",
                    "@id": "metal/split",
                    "dataType": "sc:Text",
                    "source": {
                        "fileSet": {
                            "@id": "parquet-files-for-config-metal"
                        },
                        "extract": {
                            "fileProperty": "fullpath"
                        },
                        "transform": {
                            "regex": "metal/(?:partial-)?(train|test)/.+parquet$"
                        }
                    },
                    "references": {
                        "field": {
                            "@id": "metal_splits/split_name"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "metal/src_path",
                    "dataType": "sc:Text",
                    "source": {
                        "fileSet": {
                            "@id": "parquet-files-for-config-metal"
                        },
                        "extract": {
                            "column": "src_path"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "metal/mask_path",
                    "dataType": "sc:Text",
                    "source": {
                        "fileSet": {
                            "@id": "parquet-files-for-config-metal"
                        },
                        "extract": {
                            "column": "mask_path"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "metal/dose",
                    "dataType": "cr:Float64",
                    "source": {
                        "fileSet": {
                            "@id": "parquet-files-for-config-metal"
                        },
                        "extract": {
                            "column": "dose"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "metal/defocus",
                    "dataType": "cr:Int64",
                    "source": {
                        "fileSet": {
                            "@id": "parquet-files-for-config-metal"
                        },
                        "extract": {
                            "column": "defocus"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "metal/RI_path",
                    "dataType": "sc:Text",
                    "source": {
                        "fileSet": {
                            "@id": "parquet-files-for-config-metal"
                        },
                        "extract": {
                            "column": "RI_path"
                        }
                    }
                }
            ]
        },
        {
            "@type": "cr:RecordSet",
            "dataType": "cr:Split",
            "key": {
                "@id": "ood_splits/split_name"
            },
            "@id": "ood_splits",
            "name": "ood_splits",
            "description": "Splits for the ood config.",
            "field": [
                {
                    "@type": "cr:Field",
                    "@id": "ood_splits/split_name",
                    "dataType": "sc:Text"
                }
            ],
            "data": [
                {
                    "ood_splits/split_name": "test"
                }
            ]
        },
        {
            "@type": "cr:RecordSet",
            "@id": "ood",
            "description": "grandiflorum/LithoSim - 'ood' subset",
            "field": [
                {
                    "@type": "cr:Field",
                    "@id": "ood/split",
                    "dataType": "sc:Text",
                    "source": {
                        "fileSet": {
                            "@id": "parquet-files-for-config-ood"
                        },
                        "extract": {
                            "fileProperty": "fullpath"
                        },
                        "transform": {
                            "regex": "ood/(?:partial-)?(test)/.+parquet$"
                        }
                    },
                    "references": {
                        "field": {
                            "@id": "ood_splits/split_name"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "ood/src_path",
                    "dataType": "sc:Text",
                    "source": {
                        "fileSet": {
                            "@id": "parquet-files-for-config-ood"
                        },
                        "extract": {
                            "column": "src_path"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "ood/mask_path",
                    "dataType": "sc:Text",
                    "source": {
                        "fileSet": {
                            "@id": "parquet-files-for-config-ood"
                        },
                        "extract": {
                            "column": "mask_path"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "ood/dose",
                    "dataType": "cr:Float64",
                    "source": {
                        "fileSet": {
                            "@id": "parquet-files-for-config-ood"
                        },
                        "extract": {
                            "column": "dose"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "ood/defocus",
                    "dataType": "cr:Int64",
                    "source": {
                        "fileSet": {
                            "@id": "parquet-files-for-config-ood"
                        },
                        "extract": {
                            "column": "defocus"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "ood/RI_path",
                    "dataType": "sc:Text",
                    "source": {
                        "fileSet": {
                            "@id": "parquet-files-for-config-ood"
                        },
                        "extract": {
                            "column": "RI_path"
                        }
                    }
                }
            ]
        },
        {
            "@type": "cr:RecordSet",
            "dataType": "cr:Split",
            "key": {
                "@id": "opc_metal_splits/split_name"
            },
            "@id": "opc_metal_splits",
            "name": "opc_metal_splits",
            "description": "Splits for the opc_metal config.",
            "field": [
                {
                    "@type": "cr:Field",
                    "@id": "opc_metal_splits/split_name",
                    "dataType": "sc:Text"
                }
            ],
            "data": [
                {
                    "opc_metal_splits/split_name": "train"
                },
                {
                    "opc_metal_splits/split_name": "test"
                }
            ]
        },
        {
            "@type": "cr:RecordSet",
            "@id": "opc_metal",
            "description": "grandiflorum/LithoSim - 'opc_metal' subset\n\nAdditional information:\n- 2 splits: train, test",
            "field": [
                {
                    "@type": "cr:Field",
                    "@id": "opc_metal/split",
                    "dataType": "sc:Text",
                    "source": {
                        "fileSet": {
                            "@id": "parquet-files-for-config-opc_metal"
                        },
                        "extract": {
                            "fileProperty": "fullpath"
                        },
                        "transform": {
                            "regex": "opc_metal/(?:partial-)?(train|test)/.+parquet$"
                        }
                    },
                    "references": {
                        "field": {
                            "@id": "opc_metal_splits/split_name"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "opc_metal/src_path",
                    "dataType": "sc:Text",
                    "source": {
                        "fileSet": {
                            "@id": "parquet-files-for-config-opc_metal"
                        },
                        "extract": {
                            "column": "src_path"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "opc_metal/mask_path",
                    "dataType": "sc:Text",
                    "source": {
                        "fileSet": {
                            "@id": "parquet-files-for-config-opc_metal"
                        },
                        "extract": {
                            "column": "mask_path"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "opc_metal/dose",
                    "dataType": "cr:Float64",
                    "source": {
                        "fileSet": {
                            "@id": "parquet-files-for-config-opc_metal"
                        },
                        "extract": {
                            "column": "dose"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "opc_metal/defocus",
                    "dataType": "cr:Int64",
                    "source": {
                        "fileSet": {
                            "@id": "parquet-files-for-config-opc_metal"
                        },
                        "extract": {
                            "column": "defocus"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "opc_metal/RI_path",
                    "dataType": "sc:Text",
                    "source": {
                        "fileSet": {
                            "@id": "parquet-files-for-config-opc_metal"
                        },
                        "extract": {
                            "column": "RI_path"
                        }
                    }
                }
            ]
        },
        {
            "@type": "cr:RecordSet",
            "dataType": "cr:Split",
            "key": {
                "@id": "opc_via_splits/split_name"
            },
            "@id": "opc_via_splits",
            "name": "opc_via_splits",
            "description": "Splits for the opc_via config.",
            "field": [
                {
                    "@type": "cr:Field",
                    "@id": "opc_via_splits/split_name",
                    "dataType": "sc:Text"
                }
            ],
            "data": [
                {
                    "opc_via_splits/split_name": "train"
                },
                {
                    "opc_via_splits/split_name": "test"
                }
            ]
        },
        {
            "@type": "cr:RecordSet",
            "@id": "opc_via",
            "description": "grandiflorum/LithoSim - 'opc_via' subset\n\nAdditional information:\n- 2 splits: train, test",
            "field": [
                {
                    "@type": "cr:Field",
                    "@id": "opc_via/split",
                    "dataType": "sc:Text",
                    "source": {
                        "fileSet": {
                            "@id": "parquet-files-for-config-opc_via"
                        },
                        "extract": {
                            "fileProperty": "fullpath"
                        },
                        "transform": {
                            "regex": "opc_via/(?:partial-)?(train|test)/.+parquet$"
                        }
                    },
                    "references": {
                        "field": {
                            "@id": "opc_via_splits/split_name"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "opc_via/src_path",
                    "dataType": "sc:Text",
                    "source": {
                        "fileSet": {
                            "@id": "parquet-files-for-config-opc_via"
                        },
                        "extract": {
                            "column": "src_path"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "opc_via/mask_path",
                    "dataType": "sc:Text",
                    "source": {
                        "fileSet": {
                            "@id": "parquet-files-for-config-opc_via"
                        },
                        "extract": {
                            "column": "mask_path"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "opc_via/dose",
                    "dataType": "cr:Float64",
                    "source": {
                        "fileSet": {
                            "@id": "parquet-files-for-config-opc_via"
                        },
                        "extract": {
                            "column": "dose"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "opc_via/defocus",
                    "dataType": "cr:Int64",
                    "source": {
                        "fileSet": {
                            "@id": "parquet-files-for-config-opc_via"
                        },
                        "extract": {
                            "column": "defocus"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "opc_via/RI_path",
                    "dataType": "sc:Text",
                    "source": {
                        "fileSet": {
                            "@id": "parquet-files-for-config-opc_via"
                        },
                        "extract": {
                            "column": "RI_path"
                        }
                    }
                }
            ]
        },
        {
            "@type": "cr:RecordSet",
            "dataType": "cr:Split",
            "key": {
                "@id": "via_splits/split_name"
            },
            "@id": "via_splits",
            "name": "via_splits",
            "description": "Splits for the via config.",
            "field": [
                {
                    "@type": "cr:Field",
                    "@id": "via_splits/split_name",
                    "dataType": "sc:Text"
                }
            ],
            "data": [
                {
                    "via_splits/split_name": "train"
                },
                {
                    "via_splits/split_name": "test"
                }
            ]
        },
        {
            "@type": "cr:RecordSet",
            "@id": "via",
            "description": "grandiflorum/LithoSim - 'via' subset\n\nAdditional information:\n- 2 splits: train, test",
            "field": [
                {
                    "@type": "cr:Field",
                    "@id": "via/split",
                    "dataType": "sc:Text",
                    "source": {
                        "fileSet": {
                            "@id": "parquet-files-for-config-via"
                        },
                        "extract": {
                            "fileProperty": "fullpath"
                        },
                        "transform": {
                            "regex": "via/(?:partial-)?(train|test)/.+parquet$"
                        }
                    },
                    "references": {
                        "field": {
                            "@id": "via_splits/split_name"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "via/src_path",
                    "dataType": "sc:Text",
                    "source": {
                        "fileSet": {
                            "@id": "parquet-files-for-config-via"
                        },
                        "extract": {
                            "column": "src_path"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "via/mask_path",
                    "dataType": "sc:Text",
                    "source": {
                        "fileSet": {
                            "@id": "parquet-files-for-config-via"
                        },
                        "extract": {
                            "column": "mask_path"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "via/dose",
                    "dataType": "cr:Float64",
                    "source": {
                        "fileSet": {
                            "@id": "parquet-files-for-config-via"
                        },
                        "extract": {
                            "column": "dose"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "via/defocus",
                    "dataType": "cr:Int64",
                    "source": {
                        "fileSet": {
                            "@id": "parquet-files-for-config-via"
                        },
                        "extract": {
                            "column": "defocus"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "via/RI_path",
                    "dataType": "sc:Text",
                    "source": {
                        "fileSet": {
                            "@id": "parquet-files-for-config-via"
                        },
                        "extract": {
                            "column": "RI_path"
                        }
                    }
                }
            ]
        }
    ],
    "conformsTo": "http://mlcommons.org/croissant/1.1",
    "name": "LithoSim",
    "description": "The benchmark of \"LithoSim: A Large, Holistic Lithography Simulation Benchmark for AI-Driven Semiconductor Manufacturing\"\nThe corresponding GitHub repo can be found at https://github.com/dw-hongquan/LithoSim\n\n\t\n\t\t\n\t\tData Construction\n\t\n\n\n4 in-distributed dataset (OPC_Metal/Metal/OPC_Via/Via).\n1 out-of-distribution (OOD) dataset.\nEach main dataset has a train_val and a test folder with compressed data file.\nEach set of data contains a source_simple.src description of the source, a layout.png, a… See the full description on the dataset page: https://huggingface.co/datasets/grandiflorum/LithoSim.",
    "alternateName": [
        "grandiflorum/LithoSim",
        "litho"
    ],
    "creator": {
        "@type": "Person",
        "name": "Hongquan He",
        "url": "https://huggingface.co/grandiflorum"
    },
    "keywords": [
        "feature-extraction",
        "English",
        "apache-2.0",
        "1M - 10M",
        "csv",
        "Tabular",
        "Text",
        "Datasets",
        "pandas",
        "Croissant",
        "Polars",
        "🇺🇸 Region: US",
        "code"
    ],
    "license": "https://choosealicense.com/licenses/apache-2.0/",
    "url": "https://huggingface.co/datasets/grandiflorum/LithoSim"
}