Whisper Languages ZIE: https://github.com/openai/whisper/blob/main/whisper/tokenizer.py LANGUAGES = { "en": "english", "zh": "chinese", "de": "german", "es": "spanish", "ru": "russian", "ko": "korean", "fr": "french", "ja": "japanese", "pt": "portuguese", "tr": "turkish", "pl": "polish", "ca": "catalan", "nl": "dutch", "ar": "arabic", "sv": "swedish", "it": "italian", "id": "indonesian", "hi": "hindi", "fi": "finnish", "vi": "vietnamese", "he": "hebrew", "uk": "ukrainian", "el": "greek", "ms": "malay", "cs": "czech", "ro": "romanian", "da": "danish", "hu": "hungarian", "ta": "tamil", "no": "norwegian", "th": "thai", "ur": "urdu", "hr": "croatian", "bg": "bulgarian", "lt": "lithuanian", "la": "latin", "mi": "maori", "ml": "malayalam", "cy": "welsh", "sk": "slovak", "te": "telugu", "fa": "persian", "lv": "latvian", "bn": "bengali", "sr": "serbian", "az": "azerbaijani", "sl": "slovenian", "kn": "kannada", "et": "estonian", "mk": "macedonian", "br": "breton", "eu": "basque", "is": "icelandic", "hy": "armenian", "ne": "nepali", "mn": "mongolian", "bs": "bosnian", "kk": "kazakh", "sq": "albanian", "sw": "swahili", "gl": "galician", "mr": "marathi", "pa": "punjabi", "si": "sinhala", "km": "khmer", "sn": "shona", "yo": "yoruba", "so": "somali", "af": "afrikaans", "oc": "occitan", "ka": "georgian", "be": "belarusian", "tg": "tajik", "sd": "sindhi", "gu": "gujarati", "am": "amharic", "yi": "yiddish", "lo": "lao", "uz": "uzbek", "fo": "faroese", "ht": "haitian creole", "ps": "pashto", "tk": "turkmen", "nn": "nynorsk", "mt": "maltese", "sa": "sanskrit", "lb": "luxembourgish", "my": "myanmar", "bo": "tibetan", "tl": "tagalog", "mg": "malagasy", "as": "assamese", "tt": "tatar", "haw": "hawaiian", "ln": "lingala", "ha": "hausa", "ba": "bashkir", "jw": "javanese", "su": "sundanese", "yue": "cantonese", } # language code lookup by name, with a few language aliases TO_LANGUAGE_CODE = { **{language: code for code, language in LANGUAGES.items()}, "burmese": "my", "valencian": "ca", "flemish": "nl", "haitian": "ht", "letzeburgesch": "lb", "pushto": "ps", "panjabi": "pa", "moldavian": "ro", "moldovan": "ro", "sinhalese": "si", "castilian": "es", "mandarin": "zh", } [ [ "fr", 0.9877740740776062 ], [ "en", 0.004565223585814238 ], [ "it", 0.0013161455281078815 ], [ "de", 0.0010678422404453158 ], [ "br", 0.0010424673091620207 ], [ "la", 0.0007281662547029555 ], [ "es", 0.00047290409565903246 ], [ "ja", 0.0003620732750277966 ], [ "pt", 0.00025811095838434994 ], [ "nn", 0.0002432575129205361 ], [ "pl", 0.00021074499818496406 ], [ "nl", 0.00020471173047553748 ], [ "ru", 0.0002026906149694696 ], [ "cy", 0.00018965611525345594 ], [ "oc", 0.00017241497698705643 ], [ "km", 0.00012158624303992838 ], [ "zh", 0.00011692210682667792 ], [ "jw", 0.00010178791853832081 ], [ "ro", 0.00009590199624653906 ], [ "sv", 0.00007909776468295604 ], [ "ar", 0.0000725739446352236 ], [ "tr", 0.000057220226153731346 ], [ "fi", 0.00005474982754094526 ], [ "ko", 0.00004248135883244686 ], [ "mi", 0.00003786800516536459 ], [ "sn", 0.000031534167646896094 ], [ "hu", 0.000028775264581781812 ], [ "ht", 0.000028651957109104842 ], [ "vi", 0.000028335278329905123 ], [ "da", 0.000027910642529604957 ], [ "el", 0.000023526350560132414 ], [ "fa", 0.000022600075681111775 ], [ "haw", 0.00002183002288802527 ], [ "no", 0.000019493931176839396 ], [ "cs", 0.000016285941455862485 ], [ "he", 0.00001463644457544433 ], [ "sa", 0.000012327554941293783 ], [ "th", 0.000010766526429506484 ], [ "ca", 0.000008936658559832722 ], [ "bg", 0.000007716189429629594 ], [ "ms", 0.000007680522685404867 ], [ "yo", 0.000006417556051019346 ], [ "gl", 0.000006063059572625207 ], [ "ln", 0.000006041265805833973 ], [ "si", 0.000005011619577999227 ], [ "hi", 0.000004941234692523722 ], [ "sk", 0.000004815055945073254 ], [ "uk", 0.000004419264769239817 ], [ "hy", 0.000004105790594621794 ], [ "id", 0.000003996842679043766 ], [ "tl", 0.000003896923772117589 ], [ "ml", 0.000003836657469946658 ], [ "hr", 0.0000035950304209109163 ], [ "ta", 0.0000031487393243878614 ], [ "ur", 0.000002829937329806853 ], [ "yi", 0.0000028043848487868672 ], [ "bs", 0.000002576711949586752 ], [ "sl", 0.0000025655576791905332 ], [ "eu", 0.000002537221007514745 ], [ "sw", 0.000002027671598625602 ], [ "bn", 0.0000020110176137677627 ], [ "et", 0.0000019163769593433244 ], [ "af", 0.0000016607176576144411 ], [ "lb", 0.0000015626893627995742 ], [ "fo", 0.000001537753632874228 ], [ "mn", 0.0000012066857379977591 ], [ "sq", 0.0000010732701412052847 ], [ "te", 0.0000010657946631908999 ], [ "pa", 0.000001037814399751369 ], [ "sr", 0.000001028694782689854 ], [ "my", 9.428002840650151e-7 ], [ "bo", 8.66582468006527e-7 ], [ "ne", 8.636189363642188e-7 ], [ "lo", 7.318710117942828e-7 ], [ "be", 6.941849051145255e-7 ], [ "lv", 6.265471483857254e-7 ], [ "az", 6.14897032846784e-7 ], [ "sd", 5.913773293286795e-7 ], [ "mr", 5.441081043500162e-7 ], [ "ps", 4.3055180753981404e-7 ], [ "is", 3.4580702390485385e-7 ], [ "kk", 3.0084689228715433e-7 ], [ "kn", 1.7288991216446448e-7 ], [ "as", 1.5110647666460864e-7 ], [ "am", 1.2572900232044049e-7 ], [ "gu", 1.2536465021639742e-7 ], [ "lt", 1.1584211279114243e-7 ], [ "mt", 9.117716359696715e-8 ], [ "mk", 8.674353324522599e-8 ], [ "ka", 8.111815219535856e-8 ], [ "ha", 5.529446056584675e-8 ], [ "su", 5.323035523474573e-8 ], [ "tg", 4.645137252623499e-8 ], [ "tt", 2.9631449294242884e-8 ], [ "so", 5.109404455083677e-9 ], [ "uz", 3.229358558343165e-9 ], [ "mg", 3.0856159849435016e-9 ], [ "tk", 4.514433618596492e-11 ], [ "ba", 3.5637395812138095e-11 ] ]