Spaces:
Runtime error
Runtime error
# OpenAI Whisper - Supported Languages | |
whisper_languages = [ | |
'afrikaans', | |
'arabic', | |
'armenian', | |
'azerbaijani', | |
'belarusian', | |
'bosnian', | |
'bulgarian', | |
'catalan', | |
'chinese', | |
'croatian', | |
'czech', | |
'danish', | |
'dutch', | |
'english', | |
'estonian', | |
'finnish', | |
'french', | |
'galician', | |
'german', | |
'greek', | |
'hebrew', | |
'hindi', | |
'hungarian', | |
'icelandic', | |
'indonesian', | |
'italian', | |
'japanese', | |
'kannada', | |
'kazakh', | |
'korean', | |
'latvian', | |
'lithuanian', | |
'macedonian', | |
'malay', | |
'marathi', | |
'maori', | |
'nepali', | |
'norwegian', | |
'persian', | |
'polish', | |
'portuguese', | |
'romanian', | |
'russian', | |
'serbian', | |
'slovak', | |
'slovenian', | |
'spanish', | |
'swahili', | |
'swedish', | |
'tagalog', | |
'tamil', | |
'thai', | |
'turkish', | |
'ukrainian', | |
'urdu', | |
'vietnamese', | |
'welsh' | |
] | |
# Google Translate - Supported Languages | |
gt_languages = { | |
'afrikaans': 'af', | |
'albanian': 'sq', | |
'amharic': 'am', | |
'arabic': 'ar', | |
'armenian': 'hy', | |
'assamese': 'as', | |
'aymara': 'ay', | |
'azerbaijani': 'az', | |
'bambara': 'bm', | |
'basque': 'eu', | |
'belarusian': 'be', | |
'bengali': 'bn', | |
'bhojpuri': 'bho', | |
'bosnian': 'bs', | |
'bulgarian': 'bg', | |
'catalan': 'ca', | |
'cebuano': 'ceb', | |
'chichewa': 'ny', | |
'chinese (simplified)': 'zh-CN', | |
'chinese (traditional)': 'zh-TW', | |
'corsican': 'co', | |
'croatian': 'hr', | |
'czech': 'cs', | |
'danish': 'da', | |
'dhivehi': 'dv', | |
'dogri': 'doi', | |
'dutch': 'nl', | |
'english': 'en', | |
'esperanto': 'eo', | |
'estonian': 'et', | |
'ewe': 'ee', | |
'filipino': 'tl', | |
'finnish': 'fi', | |
'french': 'fr', | |
'frisian': 'fy', | |
'galician': 'gl', | |
'georgian': 'ka', | |
'german': 'de', | |
'greek': 'el', | |
'guarani': 'gn', | |
'gujarati': 'gu', | |
'haitian creole': 'ht', | |
'hausa': 'ha', | |
'hawaiian': 'haw', | |
'hebrew': 'iw', | |
'hindi': 'hi', | |
'hmong': 'hmn', | |
'hungarian': 'hu', | |
'icelandic': 'is', | |
'igbo': 'ig', | |
'ilocano': 'ilo', | |
'indonesian': 'id', | |
'irish': 'ga', | |
'italian': 'it', | |
'japanese': 'ja', | |
'javanese': 'jw', | |
'kannada': 'kn', | |
'kazakh': 'kk', | |
'khmer': 'km', | |
'kinyarwanda': 'rw', | |
'konkani': 'gom', | |
'korean': 'ko', | |
'krio': 'kri', | |
'kurdish (kurmanji)': 'ku', | |
'kurdish (sorani)': 'ckb', | |
'kyrgyz': 'ky', | |
'lao': 'lo', | |
'latin': 'la', | |
'latvian': 'lv', | |
'lingala': 'ln', | |
'lithuanian': 'lt', | |
'luganda': 'lg', | |
'luxembourgish': 'lb', | |
'macedonian': 'mk', | |
'maithili': 'mai', | |
'malagasy': 'mg', | |
'malay': 'ms', | |
'malayalam': 'ml', | |
'maltese': 'mt', | |
'maori': 'mi', | |
'marathi': 'mr', | |
'meiteilon (manipuri)': 'mni-Mtei', | |
'mizo': 'lus', | |
'mongolian': 'mn', | |
'myanmar': 'my', | |
'nepali': 'ne', | |
'norwegian': 'no', | |
'odia (oriya)': 'or', | |
'oromo': 'om', | |
'pashto': 'ps', | |
'persian': 'fa', | |
'polish': 'pl', | |
'portuguese': 'pt', | |
'punjabi': 'pa', | |
'quechua': 'qu', | |
'romanian': 'ro', | |
'russian': 'ru', | |
'samoan': 'sm', | |
'sanskrit': 'sa', | |
'scots gaelic': 'gd', | |
'sepedi': 'nso', | |
'serbian': 'sr', | |
'sesotho': 'st', | |
'shona': 'sn', | |
'sindhi': 'sd', | |
'sinhala': 'si', | |
'slovak': 'sk', | |
'slovenian': 'sl', | |
'somali': 'so', | |
'spanish': 'es', | |
'sundanese': 'su', | |
'swahili': 'sw', | |
'swedish': 'sv', | |
'tajik': 'tg', | |
'tamil': 'ta', | |
'tatar': 'tt', | |
'telugu': 'te', | |
'thai': 'th', | |
'tigrinya': 'ti', | |
'tsonga': 'ts', | |
'turkish': 'tr', | |
'turkmen': 'tk', | |
'twi': 'ak', | |
'ukrainian': 'uk', | |
'urdu': 'ur', | |
'uyghur': 'ug', | |
'uzbek': 'uz', | |
'vietnamese': 'vi', | |
'welsh': 'cy', | |
'xhosa': 'xh', | |
'yiddish': 'yi', | |
'yoruba': 'yo', | |
'zulu': 'zu' | |
} | |
# NLLB - Supported Languages | |
nllb_languages = [ | |
'Acehnese (Arabic script)', | |
'Acehnese (Latin script)', | |
'Afrikaans', | |
'Akan', | |
'Amharic', | |
'Armenian', | |
'Assamese', | |
'Asturian', | |
'Awadhi', | |
'Ayacucho Quechua', | |
'Balinese', | |
'Bambara', | |
'Banjar (Arabic script)', | |
'Banjar (Latin script)', | |
'Bashkir', | |
'Basque', | |
'Belarusian', | |
'Bemba', | |
'Bengali', | |
'Bhojpuri', | |
'Bosnian', | |
'Buginese', | |
'Bulgarian', | |
'Burmese', | |
'Catalan', | |
'Cebuano', | |
'Central Atlas Tamazight', | |
'Central Aymara', | |
'Central Kanuri (Arabic script)', | |
'Central Kanuri (Latin script)', | |
'Central Kurdish', | |
'Chhattisgarhi', | |
'Chinese (Simplified)', | |
'Chinese (Traditional)', | |
'Chokwe', | |
'Crimean Tatar', | |
'Croatian', | |
'Czech', | |
'Danish', | |
'Dari', | |
'Dutch', | |
'Dyula', | |
'Dzongkha', | |
'Eastern Panjabi', | |
'Eastern Yiddish', | |
'Egyptian Arabic', | |
'English', | |
'Esperanto', | |
'Estonian', | |
'Ewe', | |
'Faroese', | |
'Fijian', | |
'Finnish', | |
'Fon', | |
'French', | |
'Friulian', | |
'Galician', | |
'Ganda', | |
'Georgian', | |
'German', | |
'Greek', | |
'Guarani', | |
'Gujarati', | |
'Haitian Creole', | |
'Halh Mongolian', | |
'Hausa', | |
'Hebrew', | |
'Hindi', | |
'Hungarian', | |
'Icelandic', | |
'Igbo', | |
'Ilocano', | |
'Indonesian', | |
'Irish', | |
'Italian', | |
'Japanese', | |
'Javanese', | |
'Jingpho', | |
'Kabiyè', | |
'Kabuverdianu', | |
'Kabyle', | |
'Kamba', | |
'Kannada', | |
'Kashmiri (Arabic script)', | |
'Kashmiri (Devanagari script)', | |
'Kazakh', | |
'Khmer', | |
'Kikongo', | |
'Kikuyu', | |
'Kimbundu', | |
'Kinyarwanda', | |
'Korean', | |
'Kyrgyz', | |
'Lao', | |
'Latgalian', | |
'Ligurian', | |
'Limburgish', | |
'Lingala', | |
'Lithuanian', | |
'Lombard', | |
'Luba-Kasai', | |
'Luo', | |
'Luxembourgish', | |
'Macedonian', | |
'Magahi', | |
'Maithili', | |
'Malayalam', | |
'Maltese', | |
'Maori', | |
'Marathi', | |
'Meitei (Bengali script)', | |
'Mesopotamian Arabic', | |
'Minangkabau (Arabic script)', | |
'Minangkabau (Latin script)', | |
'Mizo', | |
'Modern Standard Arabic', | |
'Modern Standard Arabic (Romanized)', | |
'Moroccan Arabic', | |
'Mossi', | |
'Najdi Arabic', | |
'Nepali', | |
'Nigerian Fulfulde', | |
'North Azerbaijani', | |
'North Levantine Arabic', | |
'Northern Kurdish', | |
'Northern Sotho', | |
'Northern Uzbek', | |
'Norwegian Bokmål', | |
'Norwegian Nynorsk', | |
'Nuer', | |
'Nyanja', | |
'Occitan', | |
'Odia', | |
'Pangasinan', | |
'Papiamento', | |
'Plateau Malagasy', | |
'Polish', | |
'Portuguese', | |
'Romanian', | |
'Rundi', | |
'Russian', | |
'Samoan', | |
'Sango', | |
'Sanskrit', | |
'Santali', | |
'Sardinian', | |
'Scottish Gaelic', | |
'Serbian', | |
'Shan', | |
'Shona', | |
'Sicilian', | |
'Silesian', | |
'Sindhi', | |
'Sinhala', | |
'Slovak', | |
'Slovenian', | |
'Somali', | |
'South Azerbaijani', | |
'South Levantine Arabic', | |
'Southern Pashto', | |
'Southern Sotho', | |
'Southwestern Dinka', | |
'Spanish', | |
'Standard Latvian', | |
'Standard Malay', | |
'Standard Tibetan', | |
'Sundanese', | |
'Swahili', | |
'Swati', | |
'Swedish', | |
"Ta'izzi-Adeni Arabic", | |
'Tagalog', | |
'Tajik', | |
'Tamasheq (Latin script)', | |
'Tamasheq (Tifinagh script)', | |
'Tamil', | |
'Tatar', | |
'Telugu', | |
'Thai', | |
'Tigrinya', | |
'Tok Pisin', | |
'Tosk Albanian', | |
'Tsonga', | |
'Tswana', | |
'Tumbuka', | |
'Tunisian Arabic', | |
'Turkish', | |
'Turkmen', | |
'Twi', | |
'Ukrainian', | |
'Umbundu', | |
'Urdu', | |
'Uyghur', | |
'Venetian', | |
'Vietnamese', | |
'Waray', | |
'Welsh', | |
'West Central Oromo', | |
'Western Persian', | |
'Wolof', | |
'Xhosa', | |
'Yoruba', | |
'Yue Chinese', | |
'Zulu' | |
] | |
# Right to Left Written Languages | |
r2l_languages = { | |
'ar': 'Arabic', | |
'he': 'Hebrew', | |
'fa': 'Persian (Farsi)', | |
'ur': 'Urdu', | |
'yi': 'Yiddish', | |
'sd': 'Sindhi', | |
'ps': 'Pashto' | |
# Add more language codes as needed | |
} | |
CODE2LANG = dict() | |
LANG2CODE = dict() | |
for lang in nllb_languages: | |
lang_code = gt_languages.get(lang.lower(), None) | |
if lang_code: | |
CODE2LANG[lang_code] = lang | |
LANG2CODE[lang] = lang_code |