JBHF commited on
Commit
fff2d46
1 Parent(s): b18b8c6

Whisper Languages.txt

Browse files
Files changed (1) hide show
  1. Whisper Languages.txt +525 -0
Whisper Languages.txt ADDED
@@ -0,0 +1,525 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Whisper Languages
2
+
3
+ ZIE:
4
+ https://github.com/openai/whisper/blob/main/whisper/tokenizer.py
5
+
6
+ LANGUAGES = {
7
+ "en": "english",
8
+ "zh": "chinese",
9
+ "de": "german",
10
+ "es": "spanish",
11
+ "ru": "russian",
12
+ "ko": "korean",
13
+ "fr": "french",
14
+ "ja": "japanese",
15
+ "pt": "portuguese",
16
+ "tr": "turkish",
17
+ "pl": "polish",
18
+ "ca": "catalan",
19
+ "nl": "dutch",
20
+ "ar": "arabic",
21
+ "sv": "swedish",
22
+ "it": "italian",
23
+ "id": "indonesian",
24
+ "hi": "hindi",
25
+ "fi": "finnish",
26
+ "vi": "vietnamese",
27
+ "he": "hebrew",
28
+ "uk": "ukrainian",
29
+ "el": "greek",
30
+ "ms": "malay",
31
+ "cs": "czech",
32
+ "ro": "romanian",
33
+ "da": "danish",
34
+ "hu": "hungarian",
35
+ "ta": "tamil",
36
+ "no": "norwegian",
37
+ "th": "thai",
38
+ "ur": "urdu",
39
+ "hr": "croatian",
40
+ "bg": "bulgarian",
41
+ "lt": "lithuanian",
42
+ "la": "latin",
43
+ "mi": "maori",
44
+ "ml": "malayalam",
45
+ "cy": "welsh",
46
+ "sk": "slovak",
47
+ "te": "telugu",
48
+ "fa": "persian",
49
+ "lv": "latvian",
50
+ "bn": "bengali",
51
+ "sr": "serbian",
52
+ "az": "azerbaijani",
53
+ "sl": "slovenian",
54
+ "kn": "kannada",
55
+ "et": "estonian",
56
+ "mk": "macedonian",
57
+ "br": "breton",
58
+ "eu": "basque",
59
+ "is": "icelandic",
60
+ "hy": "armenian",
61
+ "ne": "nepali",
62
+ "mn": "mongolian",
63
+ "bs": "bosnian",
64
+ "kk": "kazakh",
65
+ "sq": "albanian",
66
+ "sw": "swahili",
67
+ "gl": "galician",
68
+ "mr": "marathi",
69
+ "pa": "punjabi",
70
+ "si": "sinhala",
71
+ "km": "khmer",
72
+ "sn": "shona",
73
+ "yo": "yoruba",
74
+ "so": "somali",
75
+ "af": "afrikaans",
76
+ "oc": "occitan",
77
+ "ka": "georgian",
78
+ "be": "belarusian",
79
+ "tg": "tajik",
80
+ "sd": "sindhi",
81
+ "gu": "gujarati",
82
+ "am": "amharic",
83
+ "yi": "yiddish",
84
+ "lo": "lao",
85
+ "uz": "uzbek",
86
+ "fo": "faroese",
87
+ "ht": "haitian creole",
88
+ "ps": "pashto",
89
+ "tk": "turkmen",
90
+ "nn": "nynorsk",
91
+ "mt": "maltese",
92
+ "sa": "sanskrit",
93
+ "lb": "luxembourgish",
94
+ "my": "myanmar",
95
+ "bo": "tibetan",
96
+ "tl": "tagalog",
97
+ "mg": "malagasy",
98
+ "as": "assamese",
99
+ "tt": "tatar",
100
+ "haw": "hawaiian",
101
+ "ln": "lingala",
102
+ "ha": "hausa",
103
+ "ba": "bashkir",
104
+ "jw": "javanese",
105
+ "su": "sundanese",
106
+ "yue": "cantonese",
107
+ }
108
+
109
+ # language code lookup by name, with a few language aliases
110
+ TO_LANGUAGE_CODE = {
111
+ **{language: code for code, language in LANGUAGES.items()},
112
+ "burmese": "my",
113
+ "valencian": "ca",
114
+ "flemish": "nl",
115
+ "haitian": "ht",
116
+ "letzeburgesch": "lb",
117
+ "pushto": "ps",
118
+ "panjabi": "pa",
119
+ "moldavian": "ro",
120
+ "moldovan": "ro",
121
+ "sinhalese": "si",
122
+ "castilian": "es",
123
+ "mandarin": "zh",
124
+ }
125
+
126
+
127
+
128
+ [
129
+ [
130
+ "fr",
131
+ 0.9877740740776062
132
+ ],
133
+ [
134
+ "en",
135
+ 0.004565223585814238
136
+ ],
137
+ [
138
+ "it",
139
+ 0.0013161455281078815
140
+ ],
141
+ [
142
+ "de",
143
+ 0.0010678422404453158
144
+ ],
145
+ [
146
+ "br",
147
+ 0.0010424673091620207
148
+ ],
149
+ [
150
+ "la",
151
+ 0.0007281662547029555
152
+ ],
153
+ [
154
+ "es",
155
+ 0.00047290409565903246
156
+ ],
157
+ [
158
+ "ja",
159
+ 0.0003620732750277966
160
+ ],
161
+ [
162
+ "pt",
163
+ 0.00025811095838434994
164
+ ],
165
+ [
166
+ "nn",
167
+ 0.0002432575129205361
168
+ ],
169
+ [
170
+ "pl",
171
+ 0.00021074499818496406
172
+ ],
173
+ [
174
+ "nl",
175
+ 0.00020471173047553748
176
+ ],
177
+ [
178
+ "ru",
179
+ 0.0002026906149694696
180
+ ],
181
+ [
182
+ "cy",
183
+ 0.00018965611525345594
184
+ ],
185
+ [
186
+ "oc",
187
+ 0.00017241497698705643
188
+ ],
189
+ [
190
+ "km",
191
+ 0.00012158624303992838
192
+ ],
193
+ [
194
+ "zh",
195
+ 0.00011692210682667792
196
+ ],
197
+ [
198
+ "jw",
199
+ 0.00010178791853832081
200
+ ],
201
+ [
202
+ "ro",
203
+ 0.00009590199624653906
204
+ ],
205
+ [
206
+ "sv",
207
+ 0.00007909776468295604
208
+ ],
209
+ [
210
+ "ar",
211
+ 0.0000725739446352236
212
+ ],
213
+ [
214
+ "tr",
215
+ 0.000057220226153731346
216
+ ],
217
+ [
218
+ "fi",
219
+ 0.00005474982754094526
220
+ ],
221
+ [
222
+ "ko",
223
+ 0.00004248135883244686
224
+ ],
225
+ [
226
+ "mi",
227
+ 0.00003786800516536459
228
+ ],
229
+ [
230
+ "sn",
231
+ 0.000031534167646896094
232
+ ],
233
+ [
234
+ "hu",
235
+ 0.000028775264581781812
236
+ ],
237
+ [
238
+ "ht",
239
+ 0.000028651957109104842
240
+ ],
241
+ [
242
+ "vi",
243
+ 0.000028335278329905123
244
+ ],
245
+ [
246
+ "da",
247
+ 0.000027910642529604957
248
+ ],
249
+ [
250
+ "el",
251
+ 0.000023526350560132414
252
+ ],
253
+ [
254
+ "fa",
255
+ 0.000022600075681111775
256
+ ],
257
+ [
258
+ "haw",
259
+ 0.00002183002288802527
260
+ ],
261
+ [
262
+ "no",
263
+ 0.000019493931176839396
264
+ ],
265
+ [
266
+ "cs",
267
+ 0.000016285941455862485
268
+ ],
269
+ [
270
+ "he",
271
+ 0.00001463644457544433
272
+ ],
273
+ [
274
+ "sa",
275
+ 0.000012327554941293783
276
+ ],
277
+ [
278
+ "th",
279
+ 0.000010766526429506484
280
+ ],
281
+ [
282
+ "ca",
283
+ 0.000008936658559832722
284
+ ],
285
+ [
286
+ "bg",
287
+ 0.000007716189429629594
288
+ ],
289
+ [
290
+ "ms",
291
+ 0.000007680522685404867
292
+ ],
293
+ [
294
+ "yo",
295
+ 0.000006417556051019346
296
+ ],
297
+ [
298
+ "gl",
299
+ 0.000006063059572625207
300
+ ],
301
+ [
302
+ "ln",
303
+ 0.000006041265805833973
304
+ ],
305
+ [
306
+ "si",
307
+ 0.000005011619577999227
308
+ ],
309
+ [
310
+ "hi",
311
+ 0.000004941234692523722
312
+ ],
313
+ [
314
+ "sk",
315
+ 0.000004815055945073254
316
+ ],
317
+ [
318
+ "uk",
319
+ 0.000004419264769239817
320
+ ],
321
+ [
322
+ "hy",
323
+ 0.000004105790594621794
324
+ ],
325
+ [
326
+ "id",
327
+ 0.000003996842679043766
328
+ ],
329
+ [
330
+ "tl",
331
+ 0.000003896923772117589
332
+ ],
333
+ [
334
+ "ml",
335
+ 0.000003836657469946658
336
+ ],
337
+ [
338
+ "hr",
339
+ 0.0000035950304209109163
340
+ ],
341
+ [
342
+ "ta",
343
+ 0.0000031487393243878614
344
+ ],
345
+ [
346
+ "ur",
347
+ 0.000002829937329806853
348
+ ],
349
+ [
350
+ "yi",
351
+ 0.0000028043848487868672
352
+ ],
353
+ [
354
+ "bs",
355
+ 0.000002576711949586752
356
+ ],
357
+ [
358
+ "sl",
359
+ 0.0000025655576791905332
360
+ ],
361
+ [
362
+ "eu",
363
+ 0.000002537221007514745
364
+ ],
365
+ [
366
+ "sw",
367
+ 0.000002027671598625602
368
+ ],
369
+ [
370
+ "bn",
371
+ 0.0000020110176137677627
372
+ ],
373
+ [
374
+ "et",
375
+ 0.0000019163769593433244
376
+ ],
377
+ [
378
+ "af",
379
+ 0.0000016607176576144411
380
+ ],
381
+ [
382
+ "lb",
383
+ 0.0000015626893627995742
384
+ ],
385
+ [
386
+ "fo",
387
+ 0.000001537753632874228
388
+ ],
389
+ [
390
+ "mn",
391
+ 0.0000012066857379977591
392
+ ],
393
+ [
394
+ "sq",
395
+ 0.0000010732701412052847
396
+ ],
397
+ [
398
+ "te",
399
+ 0.0000010657946631908999
400
+ ],
401
+ [
402
+ "pa",
403
+ 0.000001037814399751369
404
+ ],
405
+ [
406
+ "sr",
407
+ 0.000001028694782689854
408
+ ],
409
+ [
410
+ "my",
411
+ 9.428002840650151e-7
412
+ ],
413
+ [
414
+ "bo",
415
+ 8.66582468006527e-7
416
+ ],
417
+ [
418
+ "ne",
419
+ 8.636189363642188e-7
420
+ ],
421
+ [
422
+ "lo",
423
+ 7.318710117942828e-7
424
+ ],
425
+ [
426
+ "be",
427
+ 6.941849051145255e-7
428
+ ],
429
+ [
430
+ "lv",
431
+ 6.265471483857254e-7
432
+ ],
433
+ [
434
+ "az",
435
+ 6.14897032846784e-7
436
+ ],
437
+ [
438
+ "sd",
439
+ 5.913773293286795e-7
440
+ ],
441
+ [
442
+ "mr",
443
+ 5.441081043500162e-7
444
+ ],
445
+ [
446
+ "ps",
447
+ 4.3055180753981404e-7
448
+ ],
449
+ [
450
+ "is",
451
+ 3.4580702390485385e-7
452
+ ],
453
+ [
454
+ "kk",
455
+ 3.0084689228715433e-7
456
+ ],
457
+ [
458
+ "kn",
459
+ 1.7288991216446448e-7
460
+ ],
461
+ [
462
+ "as",
463
+ 1.5110647666460864e-7
464
+ ],
465
+ [
466
+ "am",
467
+ 1.2572900232044049e-7
468
+ ],
469
+ [
470
+ "gu",
471
+ 1.2536465021639742e-7
472
+ ],
473
+ [
474
+ "lt",
475
+ 1.1584211279114243e-7
476
+ ],
477
+ [
478
+ "mt",
479
+ 9.117716359696715e-8
480
+ ],
481
+ [
482
+ "mk",
483
+ 8.674353324522599e-8
484
+ ],
485
+ [
486
+ "ka",
487
+ 8.111815219535856e-8
488
+ ],
489
+ [
490
+ "ha",
491
+ 5.529446056584675e-8
492
+ ],
493
+ [
494
+ "su",
495
+ 5.323035523474573e-8
496
+ ],
497
+ [
498
+ "tg",
499
+ 4.645137252623499e-8
500
+ ],
501
+ [
502
+ "tt",
503
+ 2.9631449294242884e-8
504
+ ],
505
+ [
506
+ "so",
507
+ 5.109404455083677e-9
508
+ ],
509
+ [
510
+ "uz",
511
+ 3.229358558343165e-9
512
+ ],
513
+ [
514
+ "mg",
515
+ 3.0856159849435016e-9
516
+ ],
517
+ [
518
+ "tk",
519
+ 4.514433618596492e-11
520
+ ],
521
+ [
522
+ "ba",
523
+ 3.5637395812138095e-11
524
+ ]
525
+ ]