{
  "@context": {
    "@language": "en",
    "@vocab": "https://schema.org/",
    "arrayShape": "cr:arrayShape",
    "citeAs": "cr:citeAs",
    "column": "cr:column",
    "conformsTo": "dct:conformsTo",
    "cr": "http://mlcommons.org/croissant/",
    "data": {
      "@id": "cr:data",
      "@type": "@json"
    },
    "dataBiases": "cr:dataBiases",
    "dataCollection": "cr:dataCollection",
    "dataType": {
      "@id": "cr:dataType",
      "@type": "@vocab"
    },
    "dct": "http://purl.org/dc/terms/",
    "extract": "cr:extract",
    "field": "cr:field",
    "fileProperty": "cr:fileProperty",
    "fileObject": "cr:fileObject",
    "fileSet": "cr:fileSet",
    "format": "cr:format",
    "includes": "cr:includes",
    "isArray": "cr:isArray",
    "isLiveDataset": "cr:isLiveDataset",
    "jsonPath": "cr:jsonPath",
    "key": "cr:key",
    "md5": "cr:md5",
    "parentField": "cr:parentField",
    "path": "cr:path",
    "personalSensitiveInformation": "cr:personalSensitiveInformation",
    "recordSet": "cr:recordSet",
    "references": "cr:references",
    "regex": "cr:regex",
    "repeated": "cr:repeated",
    "replace": "cr:replace",
    "sc": "https://schema.org/",
    "separator": "cr:separator",
    "source": "cr:source",
    "subField": "cr:subField",
    "transform": "cr:transform"
  },
  "@type": "sc:Dataset",
  "distribution": [
    {
      "@type": "cr:FileObject",
      "@id": "repo",
      "name": "repo",
      "description": "The Hugging Face git repository.",
      "contentUrl": "https://huggingface.co/datasets/google/svq/tree/refs%2Fconvert%2Fparquet",
      "encodingFormat": "git+https",
      "sha256": "https://github.com/mlcommons/croissant/issues/80"
    },
    {
      "@type": "cr:FileSet",
      "@id": "parquet-files-for-config-audio",
      "containedIn": {
        "@id": "repo"
      },
      "encodingFormat": "application/x-parquet",
      "includes": "audio/*/*.parquet"
    },
    {
      "@type": "cr:FileSet",
      "@id": "parquet-files-for-config-audio_en_us_clean",
      "containedIn": {
        "@id": "repo"
      },
      "encodingFormat": "application/x-parquet",
      "includes": "audio_en_us_clean/*/*.parquet"
    },
    {
      "@type": "cr:FileSet",
      "@id": "parquet-files-for-config-document_retrieval_cross_lang",
      "containedIn": {
        "@id": "repo"
      },
      "encodingFormat": "application/x-parquet",
      "includes": "document_retrieval_cross_lang/*/*.parquet"
    },
    {
      "@type": "cr:FileSet",
      "@id": "parquet-files-for-config-document_retrieval_in_lang",
      "containedIn": {
        "@id": "repo"
      },
      "encodingFormat": "application/x-parquet",
      "includes": "document_retrieval_in_lang/*/*.parquet"
    },
    {
      "@type": "cr:FileSet",
      "@id": "parquet-files-for-config-span_reasoning_cross_lang",
      "containedIn": {
        "@id": "repo"
      },
      "encodingFormat": "application/x-parquet",
      "includes": "span_reasoning_cross_lang/*/*.parquet"
    },
    {
      "@type": "cr:FileSet",
      "@id": "parquet-files-for-config-span_reasoning_in_lang",
      "containedIn": {
        "@id": "repo"
      },
      "encodingFormat": "application/x-parquet",
      "includes": "span_reasoning_in_lang/*/*.parquet"
    },
    {
      "@type": "cr:FileSet",
      "@id": "parquet-files-for-config-span_retrieval_cross_lang",
      "containedIn": {
        "@id": "repo"
      },
      "encodingFormat": "application/x-parquet",
      "includes": "span_retrieval_cross_lang/*/*.parquet"
    },
    {
      "@type": "cr:FileSet",
      "@id": "parquet-files-for-config-span_retrieval_in_lang",
      "containedIn": {
        "@id": "repo"
      },
      "encodingFormat": "application/x-parquet",
      "includes": "span_retrieval_in_lang/*/*.parquet"
    }
  ],
  "recordSet": [
    {
      "@type": "cr:RecordSet",
      "dataType": "cr:Split",
      "key": {
        "@id": "audio_splits/split_name"
      },
      "@id": "audio_splits",
      "name": "audio_splits",
      "description": "Splits for the audio config.",
      "field": [
        {
          "@type": "cr:Field",
          "@id": "audio_splits/split_name",
          "dataType": "sc:Text"
        }
      ],
      "data": [
        {
          "audio_splits/split_name": "test"
        }
      ]
    },
    {
      "@type": "cr:RecordSet",
      "@id": "audio",
      "description": "google/svq - 'audio' subset (first 5GB)",
      "field": [
        {
          "@type": "cr:Field",
          "@id": "audio/split",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-audio"
            },
            "extract": {
              "fileProperty": "fullpath"
            },
            "transform": {
              "regex": "audio/(?:partial-)?(test)/.+parquet$"
            }
          },
          "references": {
            "field": {
              "@id": "audio_splits/split_name"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "audio/utt_id",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-audio"
            },
            "extract": {
              "column": "utt_id"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "audio/waveform",
          "dataType": "sc:AudioObject",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-audio"
            },
            "extract": {
              "column": "waveform"
            },
            "sampling_rate": 16000
          }
        },
        {
          "@type": "cr:Field",
          "@id": "audio/locale",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-audio"
            },
            "extract": {
              "column": "locale"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "audio/speaker_id",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-audio"
            },
            "extract": {
              "column": "speaker_id"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "audio/speaker_age",
          "dataType": "cr:Int32",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-audio"
            },
            "extract": {
              "column": "speaker_age"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "audio/speaker_gender",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-audio"
            },
            "extract": {
              "column": "speaker_gender"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "audio/environment",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-audio"
            },
            "extract": {
              "column": "environment"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "audio/text",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-audio"
            },
            "extract": {
              "column": "text"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "audio/topk_salient_terms",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-audio"
            },
            "extract": {
              "column": "topk_salient_terms"
            }
          },
          "isArray": true,
          "arrayShape": "-1"
        },
        {
          "@type": "cr:Field",
          "@id": "audio/topk_salient_terms_timestamps",
          "dataType": "cr:Float32",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-audio"
            },
            "extract": {
              "column": "topk_salient_terms_timestamps"
            }
          },
          "isArray": true,
          "arrayShape": "-1,-1"
        }
      ]
    },
    {
      "@type": "cr:RecordSet",
      "dataType": "cr:Split",
      "key": {
        "@id": "audio_en_us_clean_splits/split_name"
      },
      "@id": "audio_en_us_clean_splits",
      "name": "audio_en_us_clean_splits",
      "description": "Splits for the audio_en_us_clean config.",
      "field": [
        {
          "@type": "cr:Field",
          "@id": "audio_en_us_clean_splits/split_name",
          "dataType": "sc:Text"
        }
      ],
      "data": [
        {
          "audio_en_us_clean_splits/split_name": "test"
        }
      ]
    },
    {
      "@type": "cr:RecordSet",
      "@id": "audio_en_us_clean",
      "description": "google/svq - 'audio_en_us_clean' subset (first 5GB)",
      "field": [
        {
          "@type": "cr:Field",
          "@id": "audio_en_us_clean/split",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-audio_en_us_clean"
            },
            "extract": {
              "fileProperty": "fullpath"
            },
            "transform": {
              "regex": "audio_en_us_clean/(?:partial-)?(test)/.+parquet$"
            }
          },
          "references": {
            "field": {
              "@id": "audio_en_us_clean_splits/split_name"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "audio_en_us_clean/utt_id",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-audio_en_us_clean"
            },
            "extract": {
              "column": "utt_id"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "audio_en_us_clean/waveform",
          "dataType": "sc:AudioObject",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-audio_en_us_clean"
            },
            "extract": {
              "column": "waveform"
            },
            "sampling_rate": 16000
          }
        },
        {
          "@type": "cr:Field",
          "@id": "audio_en_us_clean/locale",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-audio_en_us_clean"
            },
            "extract": {
              "column": "locale"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "audio_en_us_clean/speaker_id",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-audio_en_us_clean"
            },
            "extract": {
              "column": "speaker_id"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "audio_en_us_clean/speaker_age",
          "dataType": "cr:Int32",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-audio_en_us_clean"
            },
            "extract": {
              "column": "speaker_age"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "audio_en_us_clean/speaker_gender",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-audio_en_us_clean"
            },
            "extract": {
              "column": "speaker_gender"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "audio_en_us_clean/environment",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-audio_en_us_clean"
            },
            "extract": {
              "column": "environment"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "audio_en_us_clean/text",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-audio_en_us_clean"
            },
            "extract": {
              "column": "text"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "audio_en_us_clean/topk_salient_terms",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-audio_en_us_clean"
            },
            "extract": {
              "column": "topk_salient_terms"
            }
          },
          "isArray": true,
          "arrayShape": "-1"
        },
        {
          "@type": "cr:Field",
          "@id": "audio_en_us_clean/topk_salient_terms_timestamps",
          "dataType": "cr:Float32",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-audio_en_us_clean"
            },
            "extract": {
              "column": "topk_salient_terms_timestamps"
            }
          },
          "isArray": true,
          "arrayShape": "-1,-1"
        }
      ]
    },
    {
      "@type": "cr:RecordSet",
      "dataType": "cr:Split",
      "key": {
        "@id": "document_retrieval_cross_lang_splits/split_name"
      },
      "@id": "document_retrieval_cross_lang_splits",
      "name": "document_retrieval_cross_lang_splits",
      "description": "Splits for the document_retrieval_cross_lang config.",
      "field": [
        {
          "@type": "cr:Field",
          "@id": "document_retrieval_cross_lang_splits/split_name",
          "dataType": "sc:Text"
        }
      ],
      "data": [
        {
          "document_retrieval_cross_lang_splits/split_name": "test"
        }
      ]
    },
    {
      "@type": "cr:RecordSet",
      "@id": "document_retrieval_cross_lang",
      "description": "google/svq - 'document_retrieval_cross_lang' subset (first 5GB)",
      "field": [
        {
          "@type": "cr:Field",
          "@id": "document_retrieval_cross_lang/split",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-document_retrieval_cross_lang"
            },
            "extract": {
              "fileProperty": "fullpath"
            },
            "transform": {
              "regex": "document_retrieval_cross_lang/(?:partial-)?(test)/.+parquet$"
            }
          },
          "references": {
            "field": {
              "@id": "document_retrieval_cross_lang_splits/split_name"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "document_retrieval_cross_lang/text",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-document_retrieval_cross_lang"
            },
            "extract": {
              "column": "text"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "document_retrieval_cross_lang/speaker_id",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-document_retrieval_cross_lang"
            },
            "extract": {
              "column": "speaker_id"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "document_retrieval_cross_lang/speaker_gender",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-document_retrieval_cross_lang"
            },
            "extract": {
              "column": "speaker_gender"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "document_retrieval_cross_lang/speaker_age",
          "dataType": "cr:Int64",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-document_retrieval_cross_lang"
            },
            "extract": {
              "column": "speaker_age"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "document_retrieval_cross_lang/environment",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-document_retrieval_cross_lang"
            },
            "extract": {
              "column": "environment"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "document_retrieval_cross_lang/locale",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-document_retrieval_cross_lang"
            },
            "extract": {
              "column": "locale"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "document_retrieval_cross_lang/passage_id",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-document_retrieval_cross_lang"
            },
            "extract": {
              "column": "passage_id"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "document_retrieval_cross_lang/utt_id",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-document_retrieval_cross_lang"
            },
            "extract": {
              "column": "utt_id"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "document_retrieval_cross_lang/page_title",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-document_retrieval_cross_lang"
            },
            "extract": {
              "column": "page_title"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "document_retrieval_cross_lang/passage_text",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-document_retrieval_cross_lang"
            },
            "extract": {
              "column": "passage_text"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "document_retrieval_cross_lang/span",
          "dataType": "cr:Float64",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-document_retrieval_cross_lang"
            },
            "extract": {
              "column": "span"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "document_retrieval_cross_lang/page_id",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-document_retrieval_cross_lang"
            },
            "extract": {
              "column": "page_id"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "document_retrieval_cross_lang/task",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-document_retrieval_cross_lang"
            },
            "extract": {
              "column": "task"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "document_retrieval_cross_lang/index",
          "dataType": "cr:Int64",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-document_retrieval_cross_lang"
            },
            "extract": {
              "column": "index"
            }
          }
        }
      ]
    },
    {
      "@type": "cr:RecordSet",
      "dataType": "cr:Split",
      "key": {
        "@id": "document_retrieval_in_lang_splits/split_name"
      },
      "@id": "document_retrieval_in_lang_splits",
      "name": "document_retrieval_in_lang_splits",
      "description": "Splits for the document_retrieval_in_lang config.",
      "field": [
        {
          "@type": "cr:Field",
          "@id": "document_retrieval_in_lang_splits/split_name",
          "dataType": "sc:Text"
        }
      ],
      "data": [
        {
          "document_retrieval_in_lang_splits/split_name": "test"
        }
      ]
    },
    {
      "@type": "cr:RecordSet",
      "@id": "document_retrieval_in_lang",
      "description": "google/svq - 'document_retrieval_in_lang' subset (first 5GB)",
      "field": [
        {
          "@type": "cr:Field",
          "@id": "document_retrieval_in_lang/split",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-document_retrieval_in_lang"
            },
            "extract": {
              "fileProperty": "fullpath"
            },
            "transform": {
              "regex": "document_retrieval_in_lang/(?:partial-)?(test)/.+parquet$"
            }
          },
          "references": {
            "field": {
              "@id": "document_retrieval_in_lang_splits/split_name"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "document_retrieval_in_lang/text",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-document_retrieval_in_lang"
            },
            "extract": {
              "column": "text"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "document_retrieval_in_lang/speaker_id",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-document_retrieval_in_lang"
            },
            "extract": {
              "column": "speaker_id"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "document_retrieval_in_lang/speaker_gender",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-document_retrieval_in_lang"
            },
            "extract": {
              "column": "speaker_gender"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "document_retrieval_in_lang/speaker_age",
          "dataType": "cr:Int64",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-document_retrieval_in_lang"
            },
            "extract": {
              "column": "speaker_age"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "document_retrieval_in_lang/environment",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-document_retrieval_in_lang"
            },
            "extract": {
              "column": "environment"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "document_retrieval_in_lang/locale",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-document_retrieval_in_lang"
            },
            "extract": {
              "column": "locale"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "document_retrieval_in_lang/passage_id",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-document_retrieval_in_lang"
            },
            "extract": {
              "column": "passage_id"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "document_retrieval_in_lang/utt_id",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-document_retrieval_in_lang"
            },
            "extract": {
              "column": "utt_id"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "document_retrieval_in_lang/page_title",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-document_retrieval_in_lang"
            },
            "extract": {
              "column": "page_title"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "document_retrieval_in_lang/passage_text",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-document_retrieval_in_lang"
            },
            "extract": {
              "column": "passage_text"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "document_retrieval_in_lang/span",
          "dataType": "cr:Float64",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-document_retrieval_in_lang"
            },
            "extract": {
              "column": "span"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "document_retrieval_in_lang/page_id",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-document_retrieval_in_lang"
            },
            "extract": {
              "column": "page_id"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "document_retrieval_in_lang/task",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-document_retrieval_in_lang"
            },
            "extract": {
              "column": "task"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "document_retrieval_in_lang/index",
          "dataType": "cr:Int64",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-document_retrieval_in_lang"
            },
            "extract": {
              "column": "index"
            }
          }
        }
      ]
    },
    {
      "@type": "cr:RecordSet",
      "dataType": "cr:Split",
      "key": {
        "@id": "span_reasoning_cross_lang_splits/split_name"
      },
      "@id": "span_reasoning_cross_lang_splits",
      "name": "span_reasoning_cross_lang_splits",
      "description": "Splits for the span_reasoning_cross_lang config.",
      "field": [
        {
          "@type": "cr:Field",
          "@id": "span_reasoning_cross_lang_splits/split_name",
          "dataType": "sc:Text"
        }
      ],
      "data": [
        {
          "span_reasoning_cross_lang_splits/split_name": "test"
        }
      ]
    },
    {
      "@type": "cr:RecordSet",
      "@id": "span_reasoning_cross_lang",
      "description": "google/svq - 'span_reasoning_cross_lang' subset (first 5GB)",
      "field": [
        {
          "@type": "cr:Field",
          "@id": "span_reasoning_cross_lang/split",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_reasoning_cross_lang"
            },
            "extract": {
              "fileProperty": "fullpath"
            },
            "transform": {
              "regex": "span_reasoning_cross_lang/(?:partial-)?(test)/.+parquet$"
            }
          },
          "references": {
            "field": {
              "@id": "span_reasoning_cross_lang_splits/split_name"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_reasoning_cross_lang/text",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_reasoning_cross_lang"
            },
            "extract": {
              "column": "text"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_reasoning_cross_lang/speaker_id",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_reasoning_cross_lang"
            },
            "extract": {
              "column": "speaker_id"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_reasoning_cross_lang/speaker_gender",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_reasoning_cross_lang"
            },
            "extract": {
              "column": "speaker_gender"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_reasoning_cross_lang/speaker_age",
          "dataType": "cr:Int64",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_reasoning_cross_lang"
            },
            "extract": {
              "column": "speaker_age"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_reasoning_cross_lang/environment",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_reasoning_cross_lang"
            },
            "extract": {
              "column": "environment"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_reasoning_cross_lang/locale",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_reasoning_cross_lang"
            },
            "extract": {
              "column": "locale"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_reasoning_cross_lang/passage_id",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_reasoning_cross_lang"
            },
            "extract": {
              "column": "passage_id"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_reasoning_cross_lang/utt_id",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_reasoning_cross_lang"
            },
            "extract": {
              "column": "utt_id"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_reasoning_cross_lang/page_title",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_reasoning_cross_lang"
            },
            "extract": {
              "column": "page_title"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_reasoning_cross_lang/passage_text",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_reasoning_cross_lang"
            },
            "extract": {
              "column": "passage_text"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_reasoning_cross_lang/span",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_reasoning_cross_lang"
            },
            "extract": {
              "column": "span"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_reasoning_cross_lang/page_id",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_reasoning_cross_lang"
            },
            "extract": {
              "column": "page_id"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_reasoning_cross_lang/task",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_reasoning_cross_lang"
            },
            "extract": {
              "column": "task"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_reasoning_cross_lang/index",
          "dataType": "cr:Int64",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_reasoning_cross_lang"
            },
            "extract": {
              "column": "index"
            }
          }
        }
      ]
    },
    {
      "@type": "cr:RecordSet",
      "dataType": "cr:Split",
      "key": {
        "@id": "span_reasoning_in_lang_splits/split_name"
      },
      "@id": "span_reasoning_in_lang_splits",
      "name": "span_reasoning_in_lang_splits",
      "description": "Splits for the span_reasoning_in_lang config.",
      "field": [
        {
          "@type": "cr:Field",
          "@id": "span_reasoning_in_lang_splits/split_name",
          "dataType": "sc:Text"
        }
      ],
      "data": [
        {
          "span_reasoning_in_lang_splits/split_name": "test"
        }
      ]
    },
    {
      "@type": "cr:RecordSet",
      "@id": "span_reasoning_in_lang",
      "description": "google/svq - 'span_reasoning_in_lang' subset (first 5GB)",
      "field": [
        {
          "@type": "cr:Field",
          "@id": "span_reasoning_in_lang/split",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_reasoning_in_lang"
            },
            "extract": {
              "fileProperty": "fullpath"
            },
            "transform": {
              "regex": "span_reasoning_in_lang/(?:partial-)?(test)/.+parquet$"
            }
          },
          "references": {
            "field": {
              "@id": "span_reasoning_in_lang_splits/split_name"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_reasoning_in_lang/text",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_reasoning_in_lang"
            },
            "extract": {
              "column": "text"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_reasoning_in_lang/speaker_id",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_reasoning_in_lang"
            },
            "extract": {
              "column": "speaker_id"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_reasoning_in_lang/speaker_gender",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_reasoning_in_lang"
            },
            "extract": {
              "column": "speaker_gender"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_reasoning_in_lang/speaker_age",
          "dataType": "cr:Int64",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_reasoning_in_lang"
            },
            "extract": {
              "column": "speaker_age"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_reasoning_in_lang/environment",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_reasoning_in_lang"
            },
            "extract": {
              "column": "environment"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_reasoning_in_lang/locale",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_reasoning_in_lang"
            },
            "extract": {
              "column": "locale"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_reasoning_in_lang/passage_id",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_reasoning_in_lang"
            },
            "extract": {
              "column": "passage_id"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_reasoning_in_lang/utt_id",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_reasoning_in_lang"
            },
            "extract": {
              "column": "utt_id"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_reasoning_in_lang/page_title",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_reasoning_in_lang"
            },
            "extract": {
              "column": "page_title"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_reasoning_in_lang/passage_text",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_reasoning_in_lang"
            },
            "extract": {
              "column": "passage_text"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_reasoning_in_lang/span",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_reasoning_in_lang"
            },
            "extract": {
              "column": "span"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_reasoning_in_lang/page_id",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_reasoning_in_lang"
            },
            "extract": {
              "column": "page_id"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_reasoning_in_lang/task",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_reasoning_in_lang"
            },
            "extract": {
              "column": "task"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_reasoning_in_lang/index",
          "dataType": "cr:Int64",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_reasoning_in_lang"
            },
            "extract": {
              "column": "index"
            }
          }
        }
      ]
    },
    {
      "@type": "cr:RecordSet",
      "dataType": "cr:Split",
      "key": {
        "@id": "span_retrieval_cross_lang_splits/split_name"
      },
      "@id": "span_retrieval_cross_lang_splits",
      "name": "span_retrieval_cross_lang_splits",
      "description": "Splits for the span_retrieval_cross_lang config.",
      "field": [
        {
          "@type": "cr:Field",
          "@id": "span_retrieval_cross_lang_splits/split_name",
          "dataType": "sc:Text"
        }
      ],
      "data": [
        {
          "span_retrieval_cross_lang_splits/split_name": "test"
        }
      ]
    },
    {
      "@type": "cr:RecordSet",
      "@id": "span_retrieval_cross_lang",
      "description": "google/svq - 'span_retrieval_cross_lang' subset (first 5GB)",
      "field": [
        {
          "@type": "cr:Field",
          "@id": "span_retrieval_cross_lang/split",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_retrieval_cross_lang"
            },
            "extract": {
              "fileProperty": "fullpath"
            },
            "transform": {
              "regex": "span_retrieval_cross_lang/(?:partial-)?(test)/.+parquet$"
            }
          },
          "references": {
            "field": {
              "@id": "span_retrieval_cross_lang_splits/split_name"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_retrieval_cross_lang/text",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_retrieval_cross_lang"
            },
            "extract": {
              "column": "text"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_retrieval_cross_lang/speaker_id",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_retrieval_cross_lang"
            },
            "extract": {
              "column": "speaker_id"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_retrieval_cross_lang/speaker_gender",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_retrieval_cross_lang"
            },
            "extract": {
              "column": "speaker_gender"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_retrieval_cross_lang/speaker_age",
          "dataType": "cr:Int64",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_retrieval_cross_lang"
            },
            "extract": {
              "column": "speaker_age"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_retrieval_cross_lang/environment",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_retrieval_cross_lang"
            },
            "extract": {
              "column": "environment"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_retrieval_cross_lang/locale",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_retrieval_cross_lang"
            },
            "extract": {
              "column": "locale"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_retrieval_cross_lang/passage_id",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_retrieval_cross_lang"
            },
            "extract": {
              "column": "passage_id"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_retrieval_cross_lang/utt_id",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_retrieval_cross_lang"
            },
            "extract": {
              "column": "utt_id"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_retrieval_cross_lang/page_title",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_retrieval_cross_lang"
            },
            "extract": {
              "column": "page_title"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_retrieval_cross_lang/passage_text",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_retrieval_cross_lang"
            },
            "extract": {
              "column": "passage_text"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_retrieval_cross_lang/span",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_retrieval_cross_lang"
            },
            "extract": {
              "column": "span"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_retrieval_cross_lang/page_id",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_retrieval_cross_lang"
            },
            "extract": {
              "column": "page_id"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_retrieval_cross_lang/task",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_retrieval_cross_lang"
            },
            "extract": {
              "column": "task"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_retrieval_cross_lang/index",
          "dataType": "cr:Int64",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_retrieval_cross_lang"
            },
            "extract": {
              "column": "index"
            }
          }
        }
      ]
    },
    {
      "@type": "cr:RecordSet",
      "dataType": "cr:Split",
      "key": {
        "@id": "span_retrieval_in_lang_splits/split_name"
      },
      "@id": "span_retrieval_in_lang_splits",
      "name": "span_retrieval_in_lang_splits",
      "description": "Splits for the span_retrieval_in_lang config.",
      "field": [
        {
          "@type": "cr:Field",
          "@id": "span_retrieval_in_lang_splits/split_name",
          "dataType": "sc:Text"
        }
      ],
      "data": [
        {
          "span_retrieval_in_lang_splits/split_name": "test"
        }
      ]
    },
    {
      "@type": "cr:RecordSet",
      "@id": "span_retrieval_in_lang",
      "description": "google/svq - 'span_retrieval_in_lang' subset (first 5GB)",
      "field": [
        {
          "@type": "cr:Field",
          "@id": "span_retrieval_in_lang/split",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_retrieval_in_lang"
            },
            "extract": {
              "fileProperty": "fullpath"
            },
            "transform": {
              "regex": "span_retrieval_in_lang/(?:partial-)?(test)/.+parquet$"
            }
          },
          "references": {
            "field": {
              "@id": "span_retrieval_in_lang_splits/split_name"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_retrieval_in_lang/text",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_retrieval_in_lang"
            },
            "extract": {
              "column": "text"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_retrieval_in_lang/speaker_id",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_retrieval_in_lang"
            },
            "extract": {
              "column": "speaker_id"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_retrieval_in_lang/speaker_gender",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_retrieval_in_lang"
            },
            "extract": {
              "column": "speaker_gender"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_retrieval_in_lang/speaker_age",
          "dataType": "cr:Int64",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_retrieval_in_lang"
            },
            "extract": {
              "column": "speaker_age"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_retrieval_in_lang/environment",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_retrieval_in_lang"
            },
            "extract": {
              "column": "environment"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_retrieval_in_lang/locale",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_retrieval_in_lang"
            },
            "extract": {
              "column": "locale"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_retrieval_in_lang/passage_id",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_retrieval_in_lang"
            },
            "extract": {
              "column": "passage_id"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_retrieval_in_lang/utt_id",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_retrieval_in_lang"
            },
            "extract": {
              "column": "utt_id"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_retrieval_in_lang/page_title",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_retrieval_in_lang"
            },
            "extract": {
              "column": "page_title"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_retrieval_in_lang/passage_text",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_retrieval_in_lang"
            },
            "extract": {
              "column": "passage_text"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_retrieval_in_lang/span",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_retrieval_in_lang"
            },
            "extract": {
              "column": "span"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_retrieval_in_lang/page_id",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_retrieval_in_lang"
            },
            "extract": {
              "column": "page_id"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_retrieval_in_lang/task",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_retrieval_in_lang"
            },
            "extract": {
              "column": "task"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "span_retrieval_in_lang/index",
          "dataType": "cr:Int64",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-span_retrieval_in_lang"
            },
            "extract": {
              "column": "index"
            }
          }
        }
      ]
    }
  ],
  "conformsTo": "http://mlcommons.org/croissant/1.1",
  "name": "svq",
  "description": "\n\t\n\t\t\n\t\tSimple Voice Questions\n\t\n\nSimple Voice Questions (SVQ) is a set of short audio questions recorded in 26 locales across 17 languages under multiple audio conditions.\n\n\t\n\t\t\n\t\tData Collection\n\t\n\nSpeakers were presented with recording instructions specifying the recording environment and text query to be recorded.\nThey recorded using their own phones or tablets under four conditions:\n\nclean: Record in quiet environment\nbackground speech noise: Record while audio from sources like podcasts… See the full description on the dataset page: https://huggingface.co/datasets/google/svq.",
  "alternateName": [
    "google/svq",
    "Simple Voice Questions"
  ],
  "creator": {
    "@type": "Organization",
    "name": "Google",
    "url": "https://huggingface.co/google"
  },
  "keywords": [
    "question-answering",
    "automatic-speech-recognition",
    "Arabic",
    "Bengali",
    "English",
    "Finnish",
    "Gujarati",
    "Hindi",
    "Indonesian",
    "Japanese",
    "Kannada",
    "Korean",
    "Malayalam",
    "Marathi",
    "Russian",
    "Swahili",
    "Tamil",
    "Telugu",
    "Urdu",
    "cc-by-4.0",
    "100K - 1M",
    "parquet",
    "Audio",
    "Tabular",
    "Text",
    "Datasets",
    "Dask",
    "Croissant",
    "Polars",
    "🇺🇸 Region: US"
  ],
  "license": "https://choosealicense.com/licenses/cc-by-4.0/",
  "url": "https://huggingface.co/datasets/google/svq"
}
