whispertube_backend / languages.py
uzi007's picture
Added GPU Models
a689179
raw
history blame
8.03 kB
# OpenAI Whisper - Supported Languages
whisper_languages = [
'afrikaans',
'arabic',
'armenian',
'azerbaijani',
'belarusian',
'bosnian',
'bulgarian',
'catalan',
'chinese',
'croatian',
'czech',
'danish',
'dutch',
'english',
'estonian',
'finnish',
'french',
'galician',
'german',
'greek',
'hebrew',
'hindi',
'hungarian',
'icelandic',
'indonesian',
'italian',
'japanese',
'kannada',
'kazakh',
'korean',
'latvian',
'lithuanian',
'macedonian',
'malay',
'marathi',
'maori',
'nepali',
'norwegian',
'persian',
'polish',
'portuguese',
'romanian',
'russian',
'serbian',
'slovak',
'slovenian',
'spanish',
'swahili',
'swedish',
'tagalog',
'tamil',
'thai',
'turkish',
'ukrainian',
'urdu',
'vietnamese',
'welsh'
]
# Google Translate - Supported Languages
gt_languages = {
'afrikaans': 'af',
'albanian': 'sq',
'amharic': 'am',
'arabic': 'ar',
'armenian': 'hy',
'assamese': 'as',
'aymara': 'ay',
'azerbaijani': 'az',
'bambara': 'bm',
'basque': 'eu',
'belarusian': 'be',
'bengali': 'bn',
'bhojpuri': 'bho',
'bosnian': 'bs',
'bulgarian': 'bg',
'catalan': 'ca',
'cebuano': 'ceb',
'chichewa': 'ny',
'chinese (simplified)': 'zh-CN',
'chinese (traditional)': 'zh-TW',
'corsican': 'co',
'croatian': 'hr',
'czech': 'cs',
'danish': 'da',
'dhivehi': 'dv',
'dogri': 'doi',
'dutch': 'nl',
'english': 'en',
'esperanto': 'eo',
'estonian': 'et',
'ewe': 'ee',
'filipino': 'tl',
'finnish': 'fi',
'french': 'fr',
'frisian': 'fy',
'galician': 'gl',
'georgian': 'ka',
'german': 'de',
'greek': 'el',
'guarani': 'gn',
'gujarati': 'gu',
'haitian creole': 'ht',
'hausa': 'ha',
'hawaiian': 'haw',
'hebrew': 'iw',
'hindi': 'hi',
'hmong': 'hmn',
'hungarian': 'hu',
'icelandic': 'is',
'igbo': 'ig',
'ilocano': 'ilo',
'indonesian': 'id',
'irish': 'ga',
'italian': 'it',
'japanese': 'ja',
'javanese': 'jw',
'kannada': 'kn',
'kazakh': 'kk',
'khmer': 'km',
'kinyarwanda': 'rw',
'konkani': 'gom',
'korean': 'ko',
'krio': 'kri',
'kurdish (kurmanji)': 'ku',
'kurdish (sorani)': 'ckb',
'kyrgyz': 'ky',
'lao': 'lo',
'latin': 'la',
'latvian': 'lv',
'lingala': 'ln',
'lithuanian': 'lt',
'luganda': 'lg',
'luxembourgish': 'lb',
'macedonian': 'mk',
'maithili': 'mai',
'malagasy': 'mg',
'malay': 'ms',
'malayalam': 'ml',
'maltese': 'mt',
'maori': 'mi',
'marathi': 'mr',
'meiteilon (manipuri)': 'mni-Mtei',
'mizo': 'lus',
'mongolian': 'mn',
'myanmar': 'my',
'nepali': 'ne',
'norwegian': 'no',
'odia (oriya)': 'or',
'oromo': 'om',
'pashto': 'ps',
'persian': 'fa',
'polish': 'pl',
'portuguese': 'pt',
'punjabi': 'pa',
'quechua': 'qu',
'romanian': 'ro',
'russian': 'ru',
'samoan': 'sm',
'sanskrit': 'sa',
'scots gaelic': 'gd',
'sepedi': 'nso',
'serbian': 'sr',
'sesotho': 'st',
'shona': 'sn',
'sindhi': 'sd',
'sinhala': 'si',
'slovak': 'sk',
'slovenian': 'sl',
'somali': 'so',
'spanish': 'es',
'sundanese': 'su',
'swahili': 'sw',
'swedish': 'sv',
'tajik': 'tg',
'tamil': 'ta',
'tatar': 'tt',
'telugu': 'te',
'thai': 'th',
'tigrinya': 'ti',
'tsonga': 'ts',
'turkish': 'tr',
'turkmen': 'tk',
'twi': 'ak',
'ukrainian': 'uk',
'urdu': 'ur',
'uyghur': 'ug',
'uzbek': 'uz',
'vietnamese': 'vi',
'welsh': 'cy',
'xhosa': 'xh',
'yiddish': 'yi',
'yoruba': 'yo',
'zulu': 'zu'
}
# NLLB - Supported Languages
nllb_languages = [
'Acehnese (Arabic script)',
'Acehnese (Latin script)',
'Afrikaans',
'Akan',
'Amharic',
'Armenian',
'Assamese',
'Asturian',
'Awadhi',
'Ayacucho Quechua',
'Balinese',
'Bambara',
'Banjar (Arabic script)',
'Banjar (Latin script)',
'Bashkir',
'Basque',
'Belarusian',
'Bemba',
'Bengali',
'Bhojpuri',
'Bosnian',
'Buginese',
'Bulgarian',
'Burmese',
'Catalan',
'Cebuano',
'Central Atlas Tamazight',
'Central Aymara',
'Central Kanuri (Arabic script)',
'Central Kanuri (Latin script)',
'Central Kurdish',
'Chhattisgarhi',
'Chinese (Simplified)',
'Chinese (Traditional)',
'Chokwe',
'Crimean Tatar',
'Croatian',
'Czech',
'Danish',
'Dari',
'Dutch',
'Dyula',
'Dzongkha',
'Eastern Panjabi',
'Eastern Yiddish',
'Egyptian Arabic',
'English',
'Esperanto',
'Estonian',
'Ewe',
'Faroese',
'Fijian',
'Finnish',
'Fon',
'French',
'Friulian',
'Galician',
'Ganda',
'Georgian',
'German',
'Greek',
'Guarani',
'Gujarati',
'Haitian Creole',
'Halh Mongolian',
'Hausa',
'Hebrew',
'Hindi',
'Hungarian',
'Icelandic',
'Igbo',
'Ilocano',
'Indonesian',
'Irish',
'Italian',
'Japanese',
'Javanese',
'Jingpho',
'Kabiyè',
'Kabuverdianu',
'Kabyle',
'Kamba',
'Kannada',
'Kashmiri (Arabic script)',
'Kashmiri (Devanagari script)',
'Kazakh',
'Khmer',
'Kikongo',
'Kikuyu',
'Kimbundu',
'Kinyarwanda',
'Korean',
'Kyrgyz',
'Lao',
'Latgalian',
'Ligurian',
'Limburgish',
'Lingala',
'Lithuanian',
'Lombard',
'Luba-Kasai',
'Luo',
'Luxembourgish',
'Macedonian',
'Magahi',
'Maithili',
'Malayalam',
'Maltese',
'Maori',
'Marathi',
'Meitei (Bengali script)',
'Mesopotamian Arabic',
'Minangkabau (Arabic script)',
'Minangkabau (Latin script)',
'Mizo',
'Modern Standard Arabic',
'Modern Standard Arabic (Romanized)',
'Moroccan Arabic',
'Mossi',
'Najdi Arabic',
'Nepali',
'Nigerian Fulfulde',
'North Azerbaijani',
'North Levantine Arabic',
'Northern Kurdish',
'Northern Sotho',
'Northern Uzbek',
'Norwegian Bokmål',
'Norwegian Nynorsk',
'Nuer',
'Nyanja',
'Occitan',
'Odia',
'Pangasinan',
'Papiamento',
'Plateau Malagasy',
'Polish',
'Portuguese',
'Romanian',
'Rundi',
'Russian',
'Samoan',
'Sango',
'Sanskrit',
'Santali',
'Sardinian',
'Scottish Gaelic',
'Serbian',
'Shan',
'Shona',
'Sicilian',
'Silesian',
'Sindhi',
'Sinhala',
'Slovak',
'Slovenian',
'Somali',
'South Azerbaijani',
'South Levantine Arabic',
'Southern Pashto',
'Southern Sotho',
'Southwestern Dinka',
'Spanish',
'Standard Latvian',
'Standard Malay',
'Standard Tibetan',
'Sundanese',
'Swahili',
'Swati',
'Swedish',
"Ta'izzi-Adeni Arabic",
'Tagalog',
'Tajik',
'Tamasheq (Latin script)',
'Tamasheq (Tifinagh script)',
'Tamil',
'Tatar',
'Telugu',
'Thai',
'Tigrinya',
'Tok Pisin',
'Tosk Albanian',
'Tsonga',
'Tswana',
'Tumbuka',
'Tunisian Arabic',
'Turkish',
'Turkmen',
'Twi',
'Ukrainian',
'Umbundu',
'Urdu',
'Uyghur',
'Venetian',
'Vietnamese',
'Waray',
'Welsh',
'West Central Oromo',
'Western Persian',
'Wolof',
'Xhosa',
'Yoruba',
'Yue Chinese',
'Zulu'
]
# Right to Left Written Languages
r2l_languages = {
'ar': 'Arabic',
'he': 'Hebrew',
'fa': 'Persian (Farsi)',
'ur': 'Urdu',
'yi': 'Yiddish',
'sd': 'Sindhi',
'ps': 'Pashto'
# Add more language codes as needed
}
CODE2LANG = dict()
LANG2CODE = dict()
for lang in nllb_languages:
lang_code = gt_languages.get(lang.lower(), None)
if lang_code:
CODE2LANG[lang_code] = lang
LANG2CODE[lang] = lang_code