{"@context":{"@language":"en","@vocab":"https://schema.org/","arrayShape":"cr:arrayShape","citeAs":"cr:citeAs","column":"cr:column","conformsTo":"dct:conformsTo","cr":"http://mlcommons.org/croissant/","data":{"@id":"cr:data","@type":"@json"},"dataBiases":"cr:dataBiases","dataCollection":"cr:dataCollection","dataType":{"@id":"cr:dataType","@type":"@vocab"},"dct":"http://purl.org/dc/terms/","extract":"cr:extract","field":"cr:field","fileProperty":"cr:fileProperty","fileObject":"cr:fileObject","fileSet":"cr:fileSet","format":"cr:format","includes":"cr:includes","isArray":"cr:isArray","isLiveDataset":"cr:isLiveDataset","jsonPath":"cr:jsonPath","key":"cr:key","md5":"cr:md5","parentField":"cr:parentField","path":"cr:path","personalSensitiveInformation":"cr:personalSensitiveInformation","recordSet":"cr:recordSet","references":"cr:references","regex":"cr:regex","repeated":"cr:repeated","replace":"cr:replace","sc":"https://schema.org/","separator":"cr:separator","source":"cr:source","subField":"cr:subField","transform":"cr:transform"},"@type":"sc:Dataset","distribution":[{"@type":"cr:FileObject","@id":"repo","name":"repo","description":"The Hugging Face git repository.","contentUrl":"https://huggingface.co/datasets/chychiu/VivaBench/tree/refs%2Fconvert%2Fparquet","encodingFormat":"git+https","sha256":"https://github.com/mlcommons/croissant/issues/80"},{"@type":"cr:FileSet","@id":"parquet-files-for-config-benchmark_pubmed","containedIn":{"@id":"repo"},"encodingFormat":"application/x-parquet","includes":"benchmark_pubmed/*/*.parquet"},{"@type":"cr:FileSet","@id":"parquet-files-for-config-generated_raw","containedIn":{"@id":"repo"},"encodingFormat":"application/x-parquet","includes":"generated_raw/*/*.parquet"}],"recordSet":[{"@type":"cr:RecordSet","dataType":"cr:Split","key":{"@id":"benchmark_pubmed_splits/split_name"},"@id":"benchmark_pubmed_splits","name":"benchmark_pubmed_splits","description":"Splits for the benchmark_pubmed config.","field":[{"@type":"cr:Field","@id":"benchmark_pubmed_splits/split_name","dataType":"sc:Text"}],"data":[{"benchmark_pubmed_splits/split_name":"test"}]},{"@type":"cr:RecordSet","@id":"benchmark_pubmed","description":"chychiu/VivaBench - 'benchmark_pubmed' subset","field":[{"@type":"cr:Field","@id":"benchmark_pubmed/split","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-benchmark_pubmed"},"extract":{"fileProperty":"fullpath"},"transform":{"regex":"benchmark_pubmed/(?:partial-)?(test)/.+parquet$"}},"references":{"field":{"@id":"benchmark_pubmed_splits/split_name"}}},{"@type":"cr:Field","@id":"benchmark_pubmed/uid","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-benchmark_pubmed"},"extract":{"column":"uid"}}},{"@type":"cr:Field","@id":"benchmark_pubmed/source","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-benchmark_pubmed"},"extract":{"column":"source"}}},{"@type":"cr:Field","@id":"benchmark_pubmed/vignette","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-benchmark_pubmed"},"extract":{"column":"vignette"}}},{"@type":"cr:Field","@id":"benchmark_pubmed/specialty_group","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-benchmark_pubmed"},"extract":{"column":"specialty_group"}}},{"@type":"cr:Field","@id":"benchmark_pubmed/diagnosis","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-benchmark_pubmed"},"extract":{"column":"diagnosis"}}},{"@type":"cr:Field","@id":"benchmark_pubmed/differentials","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-benchmark_pubmed"},"extract":{"column":"differentials"}}},{"@type":"cr:Field","@id":"benchmark_pubmed/clinicalcase","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-benchmark_pubmed"},"extract":{"column":"clinicalcase"}}}]},{"@type":"cr:RecordSet","dataType":"cr:Split","key":{"@id":"generated_raw_splits/split_name"},"@id":"generated_raw_splits","name":"generated_raw_splits","description":"Splits for the generated_raw config.","field":[{"@type":"cr:Field","@id":"generated_raw_splits/split_name","dataType":"sc:Text"}],"data":[{"generated_raw_splits/split_name":"test"}]},{"@type":"cr:RecordSet","@id":"generated_raw","description":"chychiu/VivaBench - 'generated_raw' subset","field":[{"@type":"cr:Field","@id":"generated_raw/split","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-generated_raw"},"extract":{"fileProperty":"fullpath"},"transform":{"regex":"generated_raw/(?:partial-)?(test)/.+parquet$"}},"references":{"field":{"@id":"generated_raw_splits/split_name"}}},{"@type":"cr:Field","@id":"generated_raw/uid","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-generated_raw"},"extract":{"column":"uid"}}},{"@type":"cr:Field","@id":"generated_raw/source","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-generated_raw"},"extract":{"column":"source"}}},{"@type":"cr:Field","@id":"generated_raw/vignette","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-generated_raw"},"extract":{"column":"vignette"}}},{"@type":"cr:Field","@id":"generated_raw/specialty_group","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-generated_raw"},"extract":{"column":"specialty_group"}}},{"@type":"cr:Field","@id":"generated_raw/diagnosis","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-generated_raw"},"extract":{"column":"diagnosis"}}},{"@type":"cr:Field","@id":"generated_raw/differentials","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-generated_raw"},"extract":{"column":"differentials"}}},{"@type":"cr:Field","@id":"generated_raw/clinicalcase","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-generated_raw"},"extract":{"column":"clinicalcase"}}}]}],"conformsTo":"http://mlcommons.org/croissant/1.1","name":"VivaBench","description":"\n\t\n\t\t\n\t\tVivaBench: Simulating Viva Voce Examinations to Evaluate Clinical Reasoning in LLMs\n\t\n\nThis repository is the official implementation of VivaBench—“Simulating Viva Voce Examinations to Evaluate Clinical Reasoning in Large Language Models.”\nVivaBench is a multi-turn benchmark of 1,152 physician-curated clinical vignettes that simulates a viva voce (oral) exam: agents must iteratively gather H&P findings and order investigations to arrive at a diagnosis.\n\n\t\n\t\t\n\t\n\t\n\t\t📋 Requirements… See the full description on the dataset page: https://huggingface.co/datasets/chychiu/VivaBench.","alternateName":["chychiu/VivaBench","VivaBench"],"creator":{"@type":"Person","name":"C Chiu","url":"https://huggingface.co/chychiu"},"keywords":["question-answering","English","cc-by-4.0","1K - 10K","csv","Text","Datasets","pandas","Croissant","Polars","🇺🇸 Region: US","medical"],"license":"https://choosealicense.com/licenses/cc-by-4.0/","url":"https://huggingface.co/datasets/chychiu/VivaBench"}