# ------------------public chinese data------------------
- hours: 10005
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/disturb/wenetspeech_denoise/wenetspeech_denoise_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 1000
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/open_source/aishell2/aishell2_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 150
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/open_source/aishell1/aishell1_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 555
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/code_switch/talcs/tal_csasr_train_set.jsonl.gz
  weights: 1
  source: wav
  note: code-switch

# ----------------------hunyuan boss data -------------------
# - hours: 2
#   manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/boss/mark_all/mark_all_cuts.jsonl.gz
#   weights: 1
#   source: patch
# - hours: 7
#   manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/boss/martin_all/martin_all_cuts.jsonl.gz
#   weights: 1
#   source: patch
# - hours: 8
#   manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/boss/allen_all/allen_all_cuts.jsonl.gz
#   weights: 1
#   source: patch
# - hours: 3
#   manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/boss/pony_all/pony_all_cuts.jsonl.gz
#   weights: 1
#   source: patch

# ---------------------- ailab private code-switch data -------------------------------------
- hours: 125
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/code_switch/ManEng_King-ASR-065/ManEng_King-ASR-065_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 2328
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/code_switch/chonglang_7000h_maneng_part1/chonglang_7000h_maneng_part1_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 199
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/code_switch/datatang_maneng_2019_200h_traindata/datatang_maneng_2019_200h_traindata_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 3985
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/code_switch/chonglang_7000h_maneng_part2/chonglang_7000h_maneng_part2_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 1000
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/code_switch/ManEng_201712/ManEng_201712_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 732
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/code_switch/datatang_maneng_2019_700h_traindata/datatang_maneng_2019_700h_traindata_cuts.jsonl.gz
  weights: 1
  source: patch

# ------------------ailab old private chinese data----------------
- hours: 863
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/buy/aishu_spontaneous_MDT2018S019_part1/aishu_spontaneous_MDT2018S019_part1_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 117
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/buy/younger_children_mobile_1611/younger_children_mobile_1611_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 714
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/buy/datatang_accent_132_662/datatang_accent_132_662_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 1476
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/buy/datatang_1505h/datatang_1505h_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 5219
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/buy/aishu_spontaneous_MDT2017S019/aishu_spontaneous_MDT2017S019_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 1980
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/buy/datatang_chinese_sopn_1980h/datatang_chinese_sopn_1980h_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 3015
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/buy/shichen_kouyin_3000h/shichen_kouyin_3000h_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 236
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/buy/xiaoshuo_ximalay_2022/xiaoshuo_ximalay_2022_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 2475
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/buy/aishu_spontaneous_201911_2500h/aishu_spontaneous_201911_2500h_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 2095
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/buy/datatang_chinese_sopn_1200h/datatang_chinese_sopn_1200h_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 187
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/buy/chinese_children_1611/chinese_children_1611_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 11993
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/buy/data_split/split_all_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 1045
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/buy/shicen_spon_1180_202007/shicen_spon_1180_202007_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 997
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/buy/datatang_accent_1000h/datatang_accent_1000h_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 461
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/buy/spontaneous_datatang/spontaneous_datatang_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 1927
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/buy/spon_King-ASR-113/spon_King-ASR-113_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 419
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/buy/spontaneous_demao/spontaneous_demao_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 40
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/oteam/infosec_corpus_4/infosec_corpus_4_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 767
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/oteam/wxzl_corpus_2/wxzl_corpus_2_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 997
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/oteam/sppd_corpus_1/sppd_corpus_1_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 517
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/oteam/wxzl_corpus_6/wxzl_corpus_6_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 247
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/oteam/infosec_corpus_3/infosec_corpus_3_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 999
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/oteam/sppd_corpus_3/sppd_corpus_3_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 5500
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/oteam/teg_ailab_corpus_3/teg_ailab_corpus_3_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 2067
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/oteam/wxzl_corpus_4/wxzl_corpus_4_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 698
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/oteam/wxzl_corpus_7/wxzl_corpus_7_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 3961
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/oteam/infosec_corpus_2/infosec_corpus_2_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 999
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/oteam/sppd_corpus_2/sppd_corpus_2_cuts.jsonl.gz
  weights: 1
- hours: 1523
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/oteam/wxzl_corpus_5/wxzl_corpus_5_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 1476
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/oteam/teg_ailab_corpus_2/teg_ailab_corpus_2_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 141
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/oteam/wxzl_corpus_3/wxzl_corpus_3_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 1000
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/oteam/teg_ailab_corpus_1/teg_ailab_corpus_1_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 731
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/oteam/infosec_corpus_1/infosec_corpus_1_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 1178
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/oteam/wxzl_corpus_1/wxzl_corpus_1_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 426
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/online/htrs_yinxiang_0611/htrs_yinxiang_0611_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 22
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/online/500h_label_mig_musicbox_20180316/500h_label_mig_musicbox_20180316_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 1016
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/online/tencent_ad_202104_part1/tencent_ad_202104_part1_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 443
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/online/htrs_yinxiang_0817/htrs_yinxiang_0817_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 349
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/online/wangzheshouban_20181206/wangzheshouban_20181206_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 173
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/online/htrs_tv_20181128/htrs_tv_20181128_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 1798
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/online/qq_data_202012_part1/qq_data_202012_part1_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 283
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/online/1000h_label_yinxiang_0326/1000h_label_yinxiang_0326_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 219
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/online/htrs_yinxiang_0530/htrs_yinxiang_0530_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 141
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/online/wangzheshouban_20181130/wangzheshouban_20181130_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 350
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/online/htrs_yinxiang_0522/htrs_yinxiang_0522_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 3
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/online/wangzheshouban_20181030/wangzheshouban_20181030_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 2267
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/online/tencent_ad_2020_part2/tencent_ad_2020_part2_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 216
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/online/hepingjingying_202108/hepingjingying_202108_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 183
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/online/qq_data_201906_3/qq_data_201906_3_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 222
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/online/htrs_yinxiang_0716/htrs_yinxiang_0716_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 267
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/online/aishu_yinxinag_20181207/aishu_yinxinag_20181207_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 594
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/online/tencent_ad_202104_part2/tencent_ad_202104_part2_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 33
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/online/htrs_tv_20181203/htrs_tv_20181203_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 194
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/online/qq_data_201906_2/qq_data_201906_2_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 60
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/online/500h_label_mig_musicbox_20180329/500h_label_mig_musicbox_20180329_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 6328
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/online/qq_data_202101/qq_data_202101_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 349
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/online/htrs_tv_20181211/htrs_tv_20181211_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 100
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/online/wangzheshouban_20181220/wangzheshouban_20181220_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 67
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/online/wangzheshouban_1120/wangzheshouban_1120_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 14
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/online/500h_label_mig_musicbox_20180312/500h_label_mig_musicbox_20180312_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 112
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/online/htrs_yinxiang_0523_vad/htrs_yinxiang_0523_vad_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 165
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/online/wangzheshouban_20181116/wangzheshouban_20181116_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 27
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/online/wangzheshouban_20181227/wangzheshouban_20181227_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 262
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/online/htrs_yinxiang_0725/htrs_yinxiang_0725_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 129
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/online/htrs_yinxiang_0517/htrs_yinxiang_0517_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 20
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/online/500h_label_mig_musicbox_20180305/500h_label_mig_musicbox_20180305_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 6
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/online/htrs_tv_20181123/htrs_tv_20181123_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 273
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/online/qq_data_201906_1/qq_data_201906_1_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 2403
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/online/qq_data_202012_part2/qq_data_202012_part2_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 1052
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/online/tencent_ad_2020_part1/tencent_ad_2020_part1_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 29
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/online/wangzheshouban_20190103/wangzheshouban_20190103_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 359
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/online/htrs_yinxiang_0523/htrs_yinxiang_0523_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 422
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/online/htrs_yinxiang_0827/htrs_yinxiang_0827_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 1837
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/disturb/chongliang_net_data_2000h_20200710/chongliang_net_data_2000h_20200710_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 1792
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/disturb/chongliang_net_data_2000h_202003/chongliang_net_data_2000h_202003_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 551
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/disturb/zhongkouyin_ten_city_201210/zhongkouyin_ten_city_201210_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 2921
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/precise/qq_data/qq_data_add_patch_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 2525
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/precise/spon_talk/spon_talk_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 3671
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/precise/normal_reading/normal_reading_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 7118
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/precise/collection_reading/collection_reading_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 10005
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/open_source/wenetspeech/wenetspeech_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 751
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/net_data/net_video_191107_1/net_video_191107_1_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 1289
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/net_data/net_video_xinxi_202006/net_video_xinxi_202006_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 89
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/net_data/net_video_191223/net_video_191223_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 163
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/net_data/net_video_kuaishou_20190610/net_video_kuaishou_20190610_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 370
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/net_data/htrs_wangyi_xueyuan_20180704/htrs_wangyi_xueyuan_20180704_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 179
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/net_data/net_video_ocr_1111/net_video_ocr_1111_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 118
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/net_data/biaobei_xueyuan_0401/biaobei_xueyuan_0401_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 83
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/net_data/net_video_ocr_1029/net_video_ocr_1029_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 918
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/net_data/net_video_xinxi_202007/net_video_xinxi_202007_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 1566
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/net_data/net_video_xinxi_20191221/net_video_xinxi_20191221_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 913
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/net_data/3000h_label_xueyuan_0524/3000h_label_xueyuan_0524_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 876
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/net_data/net_video_191107_2/net_video_191107_2_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 163
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/net_data/datatang_xueyuan_0402/datatang_xueyuan_0402_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 751
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/net_data/net_video_xinxi_202004/net_video_xinxi_202004_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 42
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/net_data/net_video_liyongle_20191118/net_video_liyongle_20191118_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 815
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/net_data/net_video_from_auszhang_202003/net_video_from_auszhang_202003_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 83
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/net_data/net_video_ocr_1105/net_video_ocr_1105_cuts.jsonl.gz
  weights: 1
  source: patch
- hours: 1239
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/net_data/htrs_xueyuan_201901/htrs_xueyuan_201901_cuts.jsonl.gz
  weights: 1
  source: patch
# ---------------------------- ailab new private chinese data ---------------------------------
- hours: 2000
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/buy/buy_20240331_zhibo/zhibo_22k_2000h.jsonl.gz
  weights: 1
  source: wav fbank
- hours: 5000
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/buy/buy_20240331_zhibo/zhibo_16k_5000h.jsonl.gz
  weights: 1
  source: wav fbank
# ---------------------------- yuanbao private data --------------------------------
- hours: 40
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/yuanbao/yuanbao_20241102_1st_part1.jsonl.gz
  weights: 1
  source: wav fbank
- hours: 352
  manifest: /apdcephfs_cq12/share_302080740/data/asr_train_data/manifests/chinese/yuanbao/yuanbao_20241204_2st_3st.jsonl.gz
  weights: 1
  source: wav fbank
