TEXT_DATASETS = {
    "gutenberg_english": "sedthh/gutenberg_english",  # Gutenberg eBooks in English
    "gutenberg_multilang": "sedthh/gutenberg_multilang",  # Gutenberg eBooks in foreign languages
    "tv_dialogue": "sedthh/tv_dialogue",  # TV and Movie dialogues and transcripts
    "fd_dialogue": "sedthh/fd_dialogue",  # TV and Movie dialogues and transcripts from ForeverDreaming
    "tlcv2.0_oa": "pythainlp/tlcv2.0_oa",  # Thai classical literature texts
    "fa-isna-news": "pourmand1376/isna-news",  # Isna Persian News
    "fa-wikipedia": "pourmand1376/fa-wikipedia",  # Farsi Wikipedia texts
}

INSTRUCTION_DATASETS = {
    "humaneval_mbpp_codegen_qa": "OllieStanley/humaneval-mbpp-codegen-qa",
    "humaneval_mbpp_testgen_qa": "OllieStanley/humaneval-mbpp-testgen-qa",
    "grade_school_math_instructions": "qwedsacf/grade-school-math-instructions",
    "recipes": "dctanner/oa_recipes",
    "ubuntu_dialogue_qa": "sedthh/ubuntu_dialogue_qa",
    "cmu_wiki_qa": "sedthh/cmu_wiki_qa",
    "youtube_subs_howto100M": "totuta/youtube_subs_howto100M",
    "iapp_wiki_qa_squad": "wannaphong/iapp_wiki_qa_squad_oa",
    "zhihu-kol": "wangrui6/zhihu-kol",
    "tell_a_joke": "mikegarts/oa_tell_a_joke_20000",
    "oa_wiki_qa_bart_10000row": "michaelthwan/oa_wiki_qa_bart_10000row",
    "biostars_qa": "cannin/biostars_qa",
    "oa_leet10k": "ehartford/oa_leet10k",
    "LogicInference_OA": "KK04/LogicInference_OA",
    "oa_dolly_15k": "OllieStanley/oa_dolly_15k",
    "TSSB-3M": "zirui3/TSSB-3M-instructions",
    "poetry_instruction": "checkai/instruction-poems",
    "oa_stackexchange": "donfu/oa-stackexchange",
    "stable_diffusion_instructional_dataset": "MadVoyager/stable_diffusion_instructional_dataset",
    "ru_riddles_337": "0x22almostEvil/ru-riddles-377",
    "instructional_codesearchnet_python": "Nan-Do/instructional_code-search-net-python",
    "tatoeba_mt_qna_oa": "0x22almostEvil/tatoeba-mt-qna-oa",
    "reasoning_bg_oa": "0x22almostEvil/reasoning_bg_oa",
    "reasoning_gsm_qna_oa": "0x22almostEvil/reasoning-gsm-qna-oa",
    "semantics_ws_qna_oa": "0x22almostEvil/semantics-ws-qna-oa",
}

SAFETY_DATASETS = {
    "prosocial-dialog": "allenai/prosocial-dialog",
    "prosocial-confessions": "shahules786/prosocial-confessions",
}

MULTI_TURN_DIALOG_DATASETS = {}
