{"@context":{"@language":"en","@vocab":"https://schema.org/","arrayShape":"cr:arrayShape","citeAs":"cr:citeAs","column":"cr:column","conformsTo":"dct:conformsTo","cr":"http://mlcommons.org/croissant/","data":{"@id":"cr:data","@type":"@json"},"dataBiases":"cr:dataBiases","dataCollection":"cr:dataCollection","dataType":{"@id":"cr:dataType","@type":"@vocab"},"dct":"http://purl.org/dc/terms/","extract":"cr:extract","field":"cr:field","fileProperty":"cr:fileProperty","fileObject":"cr:fileObject","fileSet":"cr:fileSet","format":"cr:format","includes":"cr:includes","isArray":"cr:isArray","isLiveDataset":"cr:isLiveDataset","jsonPath":"cr:jsonPath","key":"cr:key","md5":"cr:md5","parentField":"cr:parentField","path":"cr:path","personalSensitiveInformation":"cr:personalSensitiveInformation","recordSet":"cr:recordSet","references":"cr:references","regex":"cr:regex","repeated":"cr:repeated","replace":"cr:replace","sc":"https://schema.org/","separator":"cr:separator","source":"cr:source","subField":"cr:subField","transform":"cr:transform"},"@type":"sc:Dataset","distribution":[{"@type":"cr:FileObject","@id":"repo","name":"repo","description":"The Hugging Face git repository.","contentUrl":"https://huggingface.co/datasets/aladinDJ/tulutalk-annotated/tree/refs%2Fconvert%2Fparquet","encodingFormat":"git+https","sha256":"https://github.com/mlcommons/croissant/issues/80"},{"@type":"cr:FileSet","@id":"parquet-files-for-config-default","containedIn":{"@id":"repo"},"encodingFormat":"application/x-parquet","includes":"default/*/*.parquet"}],"recordSet":[{"@type":"cr:RecordSet","dataType":"cr:Split","key":{"@id":"default_splits/split_name"},"@id":"default_splits","name":"default_splits","description":"Splits for the default config.","field":[{"@type":"cr:Field","@id":"default_splits/split_name","dataType":"sc:Text"}],"data":[{"default_splits/split_name":"train"}]},{"@type":"cr:RecordSet","@id":"default","description":"aladinDJ/tulutalk-annotated - 'default' subset","field":[{"@type":"cr:Field","@id":"default/split","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-default"},"extract":{"fileProperty":"fullpath"},"transform":{"regex":"default/(?:partial-)?(train)/.+parquet$"}},"references":{"field":{"@id":"default_splits/split_name"}}},{"@type":"cr:Field","@id":"default/conversation_id","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-default"},"extract":{"column":"conversation_id"}}},{"@type":"cr:Field","@id":"default/source","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-default"},"extract":{"column":"source"}}},{"@type":"cr:Field","@id":"default/tulu_id","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-default"},"extract":{"column":"tulu_id"}}},{"@type":"cr:Field","@id":"default/instruction","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-default"},"extract":{"column":"instruction"}}},{"@type":"cr:Field","@id":"default/response","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-default"},"extract":{"column":"response"}}},{"@type":"cr:Field","@id":"default/messages","subField":[{"@type":"cr:Field","@id":"default/messages/content","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-default"},"extract":{"column":"messages"},"transform":{"jsonPath":"content"}}},{"@type":"cr:Field","@id":"default/messages/role","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-default"},"extract":{"column":"messages"},"transform":{"jsonPath":"role"}}}],"isArray":true,"arrayShape":"-1"},{"@type":"cr:Field","@id":"default/intent","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-default"},"extract":{"column":"intent"}}},{"@type":"cr:Field","@id":"default/knowledge","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-default"},"extract":{"column":"knowledge"}}},{"@type":"cr:Field","@id":"default/difficulty","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-default"},"extract":{"column":"difficulty"}}},{"@type":"cr:Field","@id":"default/difficulty_generator","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-default"},"extract":{"column":"difficulty_generator"}}},{"@type":"cr:Field","@id":"default/input_quality","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-default"},"extract":{"column":"input_quality"}}},{"@type":"cr:Field","@id":"default/quality_explanation","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-default"},"extract":{"column":"quality_explanation"}}},{"@type":"cr:Field","@id":"default/quality_generator","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-default"},"extract":{"column":"quality_generator"}}},{"@type":"cr:Field","@id":"default/task_category","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-default"},"extract":{"column":"task_category"}}},{"@type":"cr:Field","@id":"default/other_task_category","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-default"},"extract":{"column":"other_task_category"}},"isArray":true,"arrayShape":"-1"},{"@type":"cr:Field","@id":"default/task_category_generator","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-default"},"extract":{"column":"task_category_generator"}}},{"@type":"cr:Field","@id":"default/Turn","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-default"},"extract":{"column":"Turn"}}},{"@type":"cr:Field","@id":"default/mt_instruct_reward","dataType":"cr:Int64","source":{"fileSet":{"@id":"parquet-files-for-config-default"},"extract":{"column":"mt_instruct_reward"}}},{"@type":"cr:Field","@id":"default/mt_reward_explanation","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-default"},"extract":{"column":"mt_reward_explanation"}}},{"@type":"cr:Field","@id":"default/st_instruct_reward","dataType":"cr:Float64","source":{"fileSet":{"@id":"parquet-files-for-config-default"},"extract":{"column":"st_instruct_reward"}}},{"@type":"cr:Field","@id":"default/st_reward_model","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-default"},"extract":{"column":"st_reward_model"}}},{"@type":"cr:Field","@id":"default/llama_guard_2","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-default"},"extract":{"column":"llama_guard_2"}}},{"@type":"cr:Field","@id":"default/language","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-default"},"extract":{"column":"language"}}}]}],"conformsTo":"http://mlcommons.org/croissant/1.1","name":"tulutalk-annotated","description":"\n\t\n\t\t\n\t\t🐪💬 TuluTalk: Magpie-Annotated Tülu + SmolTalk Mixture\n\t\n\n\n\t\n\t\t\n\t\t🌟 Overview\n\t\n\nTuluTalk is a lean, high-quality post-training dataset created by merging and filtering two flagship open corpora — Tülu-3 SFT-Mix and SmolTalk — using the Magpie Annotation Framework. Through quality-aware and task-aware curation, TuluTalk achieves 14 % fewer samples than Tülu and 23 % fewer than SmolTalk, yet matches or exceeds their downstream performance across reasoning, math, and coding benchmarks.… See the full description on the dataset page: https://huggingface.co/datasets/aladinDJ/tulutalk-annotated.","alternateName":["aladinDJ/tulutalk-annotated"],"creator":{"@type":"Person","name":"Aladin D.","url":"https://huggingface.co/aladinDJ"},"keywords":["100K - 1M","parquet","Tabular","Text","Datasets","Dask","Croissant","Polars","arxiv:2506.06522","🇺🇸 Region: US"],"url":"https://huggingface.co/datasets/aladinDJ/tulutalk-annotated"}