FLEURS_TO_FLORES = {
    "af_za": "afr_Latn",
    "am_et": "amh_Ethi",
    "ar_eg": "arb_Arab",
    "as_in": "asm_Beng",
    "ast_es": "ast_Latn",
    "az_az": "azj_Latn",
    "be_by": "bel_Cyrl",
    "bg_bg": "bul_Cyrl",
    "bn_in": "ben_Beng",
    "bs_ba": "bos_Latn",
    "ca_es": "cat_Latn",
    "ceb_ph": "ceb_Latn",
    "ckb_iq": "ckb_Arab",
    "cmn_hans_cn": "zho_Hans",
    "cs_cz": "ces_Latn",
    "cy_gb": "cym_Latn",
    "da_dk": "dan_Latn",
    "de_de": "deu_Latn",
    "el_gr": "ell_Grek",
    "en_us": "eng_Latn",
    "es_419": "spa_Latn",
    "et_ee": "est_Latn",
    "fa_ir": "pes_Arab",
    "fi_fi": "fin_Latn",
    "fil_ph": "tgl_Latn",
    "fr_fr": "fra_Latn",
    "ga_ie": "gle_Latn",
    "gl_es": "glg_Latn",
    "gu_in": "guj_Gujr",
    "ha_ng": "hau_Latn",
    "he_il": "heb_Hebr",
    "hi_in": "hin_Deva",
    "hr_hr": "hrv_Latn",
    "hu_hu": "hun_Latn",
    "hy_am": "hye_Armn",
    "id_id": "ind_Latn",
    "ig_ng": "ibo_Latn",
    "is_is": "isl_Latn",
    "it_it": "ita_Latn",
    "ja_jp": "jpn_Jpan",
    "jv_id": "jav_Latn",
    "ka_ge": "kat_Geor",
    "kam_ke": "kam_Latn",
    "kea_cv": "kea_Latn",
    "kk_kz": "kaz_Cyrl",
    "km_kh": "khm_Khmr",
    "kn_in": "kan_Knda",
    "ko_kr": "kor_Hang",
    "ky_kg": "kir_Cyrl",
    "lb_lu": "ltz_Latn",
    "lg_ug": "lug_Latn",
    "ln_cd": "lin_Latn",
    "lo_la": "lao_Laoo",
    "lt_lt": "lit_Latn",
    "luo_ke": "luo_Latn",
    "lv_lv": "lvs_Latn",
    "mi_nz": "mri_Latn",
    "mk_mk": "mkd_Cyrl",
    "ml_in": "mal_Mlym",
    "mn_mn": "khk_Cyrl",
    "mr_in": "mar_Deva",
    "ms_my": "zsm_Latn",
    "mt_mt": "mlt_Latn",
    "my_mm": "mya_Mymr",
    "nb_no": "nob_Latn",
    "ne_np": "npi_Deva",
    "nl_nl": "nld_Latn",
    "nso_za": "nso_Latn",
    "ny_mw": "nya_Latn",
    "oc_fr": "oci_Latn",
    "or_in": "ory_Orya",
    "pa_in": "pan_Guru",
    "pl_pl": "pol_Latn",
    "ps_af": "pbt_Arab",
    "pt_br": "por_Latn",
    "ro_ro": "ron_Latn",
    "ru_ru": "rus_Cyrl",
    "sd_in": "snd_Arab",
    "sk_sk": "slk_Latn",
    "sl_si": "slv_Latn",
    "sn_zw": "sna_Latn",
    "so_so": "som_Latn",
    "sr_rs": "srp_Cyrl",
    "sv_se": "swe_Latn",
    "sw_ke": "swh_Latn",
    "ta_in": "tam_Taml",
    "te_in": "tel_Telu",
    "tg_tj": "tgk_Cyrl",
    "th_th": "tha_Thai",
    "tr_tr": "tur_Latn",
    "uk_ua": "ukr_Cyrl",
    "umb_ao": "umb_Latn",
    "ur_pk": "urd_Arab",
    "uz_uz": "uzn_Latn",
    "vi_vn": "vie_Latn",
    "wo_sn": "wol_Latn",
    "xh_za": "xho_Latn",
    "yo_ng": "yor_Latn",
    "yue_hant_hk": "zho_Hant",
    "zu_za": "zul_Latn",
    "ff_sn": "fuv_Latn",  # Fulah - Nigeria mapped to Fulah - Senegal
    "om_et": "gaz_Latn",  # Oromo - West Central mapped to Oromo - Ethiopia
}


whisper_unsupported = [
    "ast_es",
    "ceb_ph",
    "ckb_iq",
    "ff_sn",
    # strictly speaking tagalog is not filipino
    "fil_ph",
    "ga_ie",
    "ig_ng",
    "kam_ke",
    "kea_cv",
    "ky_kg",
    "lg_ug",
    "luo_ke",
    "nso_za",
    "umb_ao",
    "wo_sn",
    "xh_za",
    "zu_za",
]


FLEURS2WHISPER = {
    "af_za": "af",  # Afrikaans
    "am_et": "am",  # Amharic
    "ar_eg": "ar",  # Arabic
    "as_in": "as",  # Assamese
    "ast_es": "es",  # Asturian, mapped to Spanish (closest)
    "az_az": "az",  # Azerbaijani
    "be_by": "be",  # Belarusian
    "bg_bg": "bg",  # Bulgarian
    "bn_in": "bn",  # Bengali
    "bs_ba": "bs",  # Bosnian
    "ca_es": "ca",  # Catalan
    "ceb_ph": "tl",  # Cebuano, mapped to Tagalog (closest)
    "ckb_iq": "ar",  # Central Kurdish, mapped to Arabic (closest available)
    "cmn_hans_cn": "zh",  # Mandarin Chinese (simplified)
    "cs_cz": "cs",  # Czech
    "cy_gb": "cy",  # Welsh
    "da_dk": "da",  # Danish
    "de_de": "de",  # German
    "el_gr": "el",  # Greek
    "en_us": "en",  # English
    "es_419": "es",  # Latin American Spanish, mapped to general Spanish
    "et_ee": "et",  # Estonian
    "fa_ir": "fa",  # Persian
    "ff_sn": "fr",  # Fula, mapped to French (spoken in Senegal)
    "fi_fi": "fi",  # Finnish
    "fil_ph": "tl",  # Filipino, mapped to Tagalog
    "fr_fr": "fr",  # French
    "ga_ie": "cy",  # Irish, mapped to Welsh (closest available)
    "gl_es": "gl",  # Galician
    "gu_in": "gu",  # Gujarati
    "ha_ng": "ha",  # Hausa
    "he_il": "he",  # Hebrew
    "hi_in": "hi",  # Hindi
    "hr_hr": "hr",  # Croatian
    "hu_hu": "hu",  # Hungarian
    "hy_am": "hy",  # Armenian
    "id_id": "id",  # Indonesian
    "ig_ng": "yo",  # Igbo, mapped to Yoruba (closest available)
    "is_is": "is",  # Icelandic
    "it_it": "it",  # Italian
    "ja_jp": "ja",  # Japanese
    "jv_id": "jw",  # Javanese
    "ka_ge": "ka",  # Georgian
    "kam_ke": "sw",  # Kamba, mapped to Swahili (closest)
    "kea_cv": "pt",  # Cape Verdean Creole, mapped to Portuguese
    "kk_kz": "kk",  # Kazakh
    "km_kh": "km",  # Khmer
    "kn_in": "kn",  # Kannada
    "ko_kr": "ko",  # Korean
    "ky_kg": "kk",  # Kyrgyz, mapped to Kazakh (closest)
    "lb_lu": "lb",  # Luxembourgish
    "lg_ug": "sw",  # Ganda, mapped to Swahili (closest)
    "ln_cd": "ln",  # Lingala
    "lo_la": "lo",  # Lao
    "lt_lt": "lt",  # Lithuanian
    "luo_ke": "sw",  # Luo, mapped to Swahili
    "lv_lv": "lv",  # Latvian
    "mi_nz": "mi",  # Maori
    "mk_mk": "mk",  # Macedonian
    "ml_in": "ml",  # Malayalam
    "mn_mn": "mn",  # Mongolian
    "mr_in": "mr",  # Marathi
    "ms_my": "ms",  # Malay
    "mt_mt": "mt",  # Maltese
    "my_mm": "my",  # Burmese
    "nb_no": "no",  # Norwegian Bokmal
    "ne_np": "ne",  # Nepali
    "nl_nl": "nl",  # Dutch
    "nso_za": "sw",  # Northern Sotho, mapped to Swahili (closest available)
    "ny_mw": "sw",  # Chichewa, mapped to Swahili (closest)
    "oc_fr": "oc",  # Occitan
    "om_et": "so",  # Oromo, mapped to Somali (closest)
    "or_in": "bn",  # Odia
    "pa_in": "pa",  # Punjabi
    "pl_pl": "pl",  # Polish
    "ps_af": "ps",  # Pashto
    "pt_br": "pt",  # Portuguese
    "ro_ro": "ro",  # Romanian
    "ru_ru": "ru",  # Russian
    "sd_in": "sd",  # Sindhi
    "sk_sk": "sk",  # Slovak
    "sl_si": "sl",  # Slovenian
    "sn_zw": "sn",  # Shona
    "so_so": "so",  # Somali
    "sr_rs": "sr",  # Serbian
    "sv_se": "sv",  # Swedish
    "sw_ke": "sw",  # Swahili
    "ta_in": "ta",  # Tamil
    "te_in": "te",  # Telugu
    "tg_tj": "tg",  # Tajik
    "th_th": "th",  # Thai
    "tr_tr": "tr",  # Turkish
    "uk_ua": "uk",  # Ukrainian
    "umb_ao": "pt",  # Umbundu, mapped to Portuguese (Angola)
    "ur_pk": "ur",  # Urdu
    "uz_uz": "uz",  # Uzbek
    "vi_vn": "vi",  # Vietnamese
    "wo_sn": "fr",  # Wolof, mapped to French (Senegal)
    "xh_za": "sw",  # Xhosa, mapped to Swahili (closest available)
    "yo_ng": "yo",  # Yoruba
    "yue_hant_hk": "yue",  # Cantonese (traditional script)
    "zu_za": "sw",  # Zulu, mapped to Swahili (closest available)
}

seamless_unsupported = [
    "ast_es",
    "fil_ph",
    "ha_ng",
    "kam_ke",
    "kea_cv",
    "ln_cd",
    "mi_nz",
    "nso_za",
    "oc_fr",
    "umb_ao",
    "wo_sn",
    "xh_za",
]

FLEURSSEAMLESSM4T = {
    "af_za": "afr",
    "am_et": "amh",
    "ar_eg": "arz",  # Mapping Egyptian Arabic as closest
    "as_in": "asm",
    "ast_es": "spa",  # Asturian not supported in decoding, mapping to Spanish as closest language in terms of phonetics available
    "az_az": "azj",
    "be_by": "bel",
    "bg_bg": "bul",
    "bn_in": "ben",
    "bs_ba": "bos",
    "ca_es": "cat",
    "ceb_ph": "ceb",
    "ckb_iq": "ckb",
    "cmn_hans_cn": "cmn",
    "cs_cz": "ces",
    "cy_gb": "cym",
    "da_dk": "dan",
    "de_de": "deu",
    "el_gr": "ell",
    "en_us": "eng",
    "es_419": "spa",  # General Spanish
    "et_ee": "est",
    "fa_ir": "pes",  # Mapping to Western Persian
    "ff_sn": "fuv",  # Fulah, Mapping to Nigerian Fulfulde as closest
    "fi_fi": "fin",
    "fil_ph": "tgl",  # Tagalog for Filipino
    "fr_fr": "fra",
    "ga_ie": "gle",
    "gl_es": "glg",
    "gu_in": "guj",
    "ha_ng": "fuv",  # Hausa isn't listed in SeamlessM4T, map to a similar language like Nigerian Fulfulde (fuv) as closest
    "he_il": "heb",
    "hi_in": "hin",
    "hr_hr": "hrv",
    "hu_hu": "hun",
    "hy_am": "hye",
    "id_id": "ind",
    "ig_ng": "ibo",
    "is_is": "isl",
    "it_it": "ita",
    "ja_jp": "jpn",
    "jv_id": "jav",
    "ka_ge": "kat",
    "kam_ke": "swh",  # Kamba not supported in decoding, mapping to Swahili as closest East African language available
    "kea_cv": "por",  # Kabuverdianu (spoken in Cape Verde) not supported in decoding, mapping to Portuguese as closest available (the language is a creole with strong Portuguese influence)
    "kk_kz": "kaz",
    "km_kh": "khm",
    "kn_in": "kan",
    "ko_kr": "kor",
    "ky_kg": "kir",
    "lb_lu": "deu",  # Luxemburgish not supported in decoding, mapping to German as closest language available
    "lg_ug": "lug",
    "ln_cd": "lug",  # Lingala not in SeamlessM4T; mapping to a close alternative Ganda (lug)
    "lo_la": "lao",
    "lt_lt": "lit",
    "luo_ke": "luo",
    "lv_lv": "lvs",
    "mi_nz": "jav",  # Māori not available in SeamlessM4T, Javanese somewhat close
    "mk_mk": "mkd",
    "ml_in": "mal",
    "mn_mn": "khk",  # Mapping Halh Mongolian as closest
    "mr_in": "mar",
    "ms_my": "zlm",  # Standard Malay needs to be mapped to colloquial malay (seems to be misreported on SeamlessM4T README)
    "mt_mt": "mlt",
    "my_mm": "mya",
    "nb_no": "nob",
    "ne_np": "npi",
    "nl_nl": "nld",
    "nso_za": "zul",  # Not available in SeamlessM4T; map to closest available Zulu
    "ny_mw": "nya",
    "oc_fr": "cat",  # Occitan not available in decoding, mapping to Catalan as closest relative (sort of dialectal variants) of each other
    "om_et": "gaz",  # West Central Oromo
    "or_in": "ory",
    "pa_in": "pan",
    "pl_pl": "pol",
    "ps_af": "pbt",  # Southern Pashto
    "pt_br": "por",
    "ro_ro": "ron",
    "ru_ru": "rus",
    "sd_in": "snd",
    "sk_sk": "slk",
    "sl_si": "slv",
    "sn_zw": "sna",
    "so_so": "som",
    "sr_rs": "srp",
    "sv_se": "swe",
    "sw_ke": "swh",
    "ta_in": "tam",
    "te_in": "tel",
    "tg_tj": "tgk",
    "th_th": "tha",
    "tr_tr": "tur",
    "uk_ua": "ukr",
    "umb_ao": "swh",  # Mapping closest or unlisted
    "ur_pk": "urd",
    "uz_uz": "uzn",
    "vi_vn": "vie",
    "wo_sn": "fuv",  # Wolof mapped to Fulfulde
    "xh_za": "zul",  # Xhosa not available in decoding, mapping to closest phonetic relative Zulu ('zul')
    "yo_ng": "yor",
    "yue_hant_hk": "yue",
    "zu_za": "zul",
}


whisper_unsupported_flores = [
    "ast_Latn",
    "ceb_Latn",
    "ckb_Arab",
    "fuv_Latn",
    # strictly speaking tagalog is not filipino
    "tgl_Latn",
    "gle_Latn",
    "ibo_Latn",
    "kam_Latn",
    "kea_Latn",
    "kir_Cyrl",
    "lug_Latn",
    "luo_Latn",
    "nso_Latn",
    "umb_Latn",
    "wol_Latn",
    "xho_Latn",
    "zul_Latn",
]

seamless_unsupported_flores = [
    "ast_Latn",
    "tgl_Latn",
    "hau_Latn",
    "kam_Latn",
    "kea_Latn",
    "lin_Latn",
    "mri_Latn",
    "nso_Latn",
    "oci_Latn",
    "umb_Latn",
    "wol_Latn",
    "xho_Latn",
]
