{"penn_treebank": {"description": "This is the Penn Treebank Project: Release 2 CDROM, featuring a million words of 1989 Wall Street Journal material. This corpus has been annotated for part-of-speech (POS) information. In addition, over half of it has been annotated for skeletal syntactic structure.\n", "citation": "@article{marcus-etal-1993-building,\n    title = \"Building a Large Annotated Corpus of {E}nglish: The {P}enn {T}reebank\",\n    author = \"Marcus, Mitchell P.  and\n      Santorini, Beatrice  and\n      Marcinkiewicz, Mary Ann\",\n    journal = \"Computational Linguistics\",\n    volume = \"19\",\n    number = \"2\",\n    year = \"1993\",\n    url = \"https://www.aclweb.org/anthology/J93-2004\",\n    pages = \"313--330\",\n}\n", "homepage": "https://catalog.ldc.upenn.edu/LDC99T42", "license": "LDC User Agreement for Non-Members", "features": {"sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "builder_name": "ptb_text_only", "config_name": "penn_treebank", "version": {"version_str": "1.1.0", "description": null, "major": 1, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 5143706, "num_examples": 42068, "dataset_name": "ptb_text_only"}, "test": {"name": "test", "num_bytes": 453710, "num_examples": 3761, "dataset_name": "ptb_text_only"}, "validation": {"name": "validation", "num_bytes": 403156, "num_examples": 3370, "dataset_name": "ptb_text_only"}}, "download_checksums": {"https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.train.txt": {"num_bytes": 5101618, "checksum": "fcea919f6cf83f35d4d00c6cbf08040d13d4155226340912e2fef9c9c4102cbf"}, "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.valid.txt": {"num_bytes": 399782, "checksum": "c9fe6985fe0d4ccb578183407d7668fc6066c20700cb4cf87d8ff1cc34df1bf2"}, "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.test.txt": {"num_bytes": 449945, "checksum": "dd65dff31e70846b2a6030a87482edcd5d199130cdcfa1f3dccbb033728deee0"}}, "download_size": 5951345, "post_processing_size": null, "dataset_size": 6000572, "size_in_bytes": 11951917}}