kamau1 commited on
Commit
bcfd8ed
1 Parent(s): ed8f351

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -1,35 +1,28 @@
1
  *.7z filter=lfs diff=lfs merge=lfs -text
2
  *.arrow filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
 
4
  *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
  *.ftz filter=lfs diff=lfs merge=lfs -text
7
  *.gz filter=lfs diff=lfs merge=lfs -text
8
  *.h5 filter=lfs diff=lfs merge=lfs -text
9
  *.joblib filter=lfs diff=lfs merge=lfs -text
10
  *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
  *.model filter=lfs diff=lfs merge=lfs -text
13
  *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
  *.onnx filter=lfs diff=lfs merge=lfs -text
17
  *.ot filter=lfs diff=lfs merge=lfs -text
18
  *.parquet filter=lfs diff=lfs merge=lfs -text
19
  *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
  *.pt filter=lfs diff=lfs merge=lfs -text
23
  *.pth filter=lfs diff=lfs merge=lfs -text
24
  *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
  saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
  *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
  *.tflite filter=lfs diff=lfs merge=lfs -text
30
  *.tgz filter=lfs diff=lfs merge=lfs -text
31
  *.wasm filter=lfs diff=lfs merge=lfs -text
32
  *.xz filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
  *.7z filter=lfs diff=lfs merge=lfs -text
2
  *.arrow filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bin.* filter=lfs diff=lfs merge=lfs -text
5
  *.bz2 filter=lfs diff=lfs merge=lfs -text
 
6
  *.ftz filter=lfs diff=lfs merge=lfs -text
7
  *.gz filter=lfs diff=lfs merge=lfs -text
8
  *.h5 filter=lfs diff=lfs merge=lfs -text
9
  *.joblib filter=lfs diff=lfs merge=lfs -text
10
  *.lfs.* filter=lfs diff=lfs merge=lfs -text
 
11
  *.model filter=lfs diff=lfs merge=lfs -text
12
  *.msgpack filter=lfs diff=lfs merge=lfs -text
 
 
13
  *.onnx filter=lfs diff=lfs merge=lfs -text
14
  *.ot filter=lfs diff=lfs merge=lfs -text
15
  *.parquet filter=lfs diff=lfs merge=lfs -text
16
  *.pb filter=lfs diff=lfs merge=lfs -text
 
 
17
  *.pt filter=lfs diff=lfs merge=lfs -text
18
  *.pth filter=lfs diff=lfs merge=lfs -text
19
  *.rar filter=lfs diff=lfs merge=lfs -text
 
20
  saved_model/**/* filter=lfs diff=lfs merge=lfs -text
21
  *.tar.* filter=lfs diff=lfs merge=lfs -text
 
22
  *.tflite filter=lfs diff=lfs merge=lfs -text
23
  *.tgz filter=lfs diff=lfs merge=lfs -text
24
  *.wasm filter=lfs diff=lfs merge=lfs -text
25
  *.xz filter=lfs diff=lfs merge=lfs -text
26
  *.zip filter=lfs diff=lfs merge=lfs -text
27
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
28
  *tfevents* filter=lfs diff=lfs merge=lfs -text
main.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ import subprocess
2
+
3
+ subprocess.run("uvicorn modules.app:app --host 0.0.0.0 --port 7860", shell=True)
modules/STT & TTS/all_langs.tsv ADDED
@@ -0,0 +1,1198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ abi Abidji
2
+ abk Abkhaz
3
+ abp Ayta, Abellen
4
+ aca Achagua
5
+ acd Gikyode
6
+ ace Aceh
7
+ acf Lesser Antillean French Creole
8
+ ach Acholi
9
+ acn Achang
10
+ acr Achi
11
+ acu Achuar-Shiwiar
12
+ ade Adele
13
+ adh Jopadhola
14
+ adj Adioukrou
15
+ adx Tibetan, Amdo
16
+ aeu Akeu
17
+ afr Afrikaans
18
+ agd Agarabi
19
+ agg Angor
20
+ agn Agutaynen
21
+ agr Awajún
22
+ agu Awakateko
23
+ agx Aghul
24
+ aha Ahanta
25
+ ahk Akha
26
+ aia Arosi
27
+ aka Akan
28
+ akb Batak Angkola
29
+ ake Akawaio
30
+ akp Siwu
31
+ alj Alangan
32
+ alp Alune
33
+ alt Altai, Southern
34
+ alz Alur
35
+ ame Yanesha’
36
+ amf Hamer-Banna
37
+ amh Amharic
38
+ ami Amis
39
+ amk Ambai
40
+ ann Obolo
41
+ any Anyin
42
+ aoz Uab Meto
43
+ apb Sa’a
44
+ apr Arop-Lokep
45
+ ara Arabic
46
+ arl Arabela
47
+ asa Asu
48
+ asg Cishingini
49
+ asm Assamese
50
+ ast Asturian
51
+ ata Pele-Ata
52
+ atb Zaiwa
53
+ atg Ivbie North-Okpela-Arhe
54
+ ati Attié
55
+ atq Aralle-Tabulahan
56
+ ava Avar
57
+ avn Avatime
58
+ avu Avokaya
59
+ awa Awadhi
60
+ awb Awa
61
+ ayo Ayoreo
62
+ ayr Aymara, Central
63
+ ayz Mai Brat
64
+ azb Azerbaijani, South
65
+ azg Amuzgo, San Pedro Amuzgos
66
+ azj-script_cyrillic Azerbaijani, North
67
+ azj-script_latin Azerbaijani, North
68
+ azz Nahuatl, Highland Puebla
69
+ bak Bashkort
70
+ bam Bamanankan
71
+ ban Bali
72
+ bao Waimaha
73
+ bas Basaa
74
+ bav Vengo
75
+ bba Baatonum
76
+ bbb Barai
77
+ bbc Batak Toba
78
+ bbo Konabéré
79
+ bcc-script_arabic Balochi, Southern
80
+ bcc-script_latin Balochi, Southern
81
+ bcl Bikol, Central
82
+ bcw Bana
83
+ bdg Bonggi
84
+ bdh Baka
85
+ bdq Bahnar
86
+ bdu Oroko
87
+ bdv Bodo Parja
88
+ beh Biali
89
+ bel Belarusian
90
+ bem Bemba
91
+ ben Bengali
92
+ bep Behoa
93
+ bex Jur Modo
94
+ bfa Bari
95
+ bfo Birifor, Malba
96
+ bfy Bagheli
97
+ bfz Pahari, Mahasu
98
+ bgc Haryanvi
99
+ bgq Bagri
100
+ bgr Chin, Bawm
101
+ bgt Bughotu
102
+ bgw Bhatri
103
+ bha Bharia
104
+ bht Bhattiyali
105
+ bhz Bada
106
+ bib Bisa
107
+ bim Bimoba
108
+ bis Bislama
109
+ biv Birifor, Southern
110
+ bjr Binumarien
111
+ bjv Bedjond
112
+ bjw Bakwé
113
+ bjz Baruga
114
+ bkd Binukid
115
+ bkv Bekwarra
116
+ blh Kuwaa
117
+ blt Tai Dam
118
+ blx Ayta, Mag-Indi
119
+ blz Balantak
120
+ bmq Bomu
121
+ bmr Muinane
122
+ bmu Somba-Siawari
123
+ bmv Bum
124
+ bng Benga
125
+ bno Bantoanon
126
+ bnp Bola
127
+ boa Bora
128
+ bod Tibetan, Central
129
+ boj Anjam
130
+ bom Berom
131
+ bor Borôro
132
+ bos Bosnian
133
+ bov Tuwuli
134
+ box Buamu
135
+ bpr Blaan, Koronadal
136
+ bps Blaan, Sarangani
137
+ bqc Boko
138
+ bqi Bakhtiâri
139
+ bqj Bandial
140
+ bqp Bisã
141
+ bre Breton
142
+ bru Bru, Eastern
143
+ bsc Oniyan
144
+ bsq Bassa
145
+ bss Akoose
146
+ btd Batak Dairi
147
+ bts Batak Simalungun
148
+ btt Bete-Bendi
149
+ btx Batak Karo
150
+ bud Ntcham
151
+ bul Bulgarian
152
+ bus Bokobaru
153
+ bvc Baelelea
154
+ bvz Bauzi
155
+ bwq Bobo Madaré, Southern
156
+ bwu Buli
157
+ byr Yipma
158
+ bzh Buang, Mapos
159
+ bzi Bisu
160
+ bzj Belize English Creole
161
+ caa Ch’orti’
162
+ cab Garifuna
163
+ cac-dialect_sanmateoixtatan Chuj
164
+ cac-dialect_sansebastiancoatan Chuj
165
+ cak-dialect_central Kaqchikel
166
+ cak-dialect_santamariadejesus Kaqchikel
167
+ cak-dialect_santodomingoxenacoj Kaqchikel
168
+ cak-dialect_southcentral Kaqchikel
169
+ cak-dialect_western Kaqchikel
170
+ cak-dialect_yepocapa Kaqchikel
171
+ cap Chipaya
172
+ car Carib
173
+ cas Tsimané
174
+ cat Catalan
175
+ cax Chiquitano
176
+ cbc Carapana
177
+ cbi Chachi
178
+ cbr Kakataibo-Kashibo
179
+ cbs Kashinawa
180
+ cbt Shawi
181
+ cbu Kandozi-Chapra
182
+ cbv Cacua
183
+ cce Chopi
184
+ cco Chinantec, Comaltepec
185
+ cdj Churahi
186
+ ceb Cebuano
187
+ ceg Chamacoco
188
+ cek Chin, Eastern Khumi
189
+ ces Czech
190
+ cfm Chin, Falam
191
+ cgc Kagayanen
192
+ che Chechen
193
+ chf Chontal, Tabasco
194
+ chv Chuvash
195
+ chz Chinantec, Ozumacín
196
+ cjo Ashéninka, Pajonal
197
+ cjp Cabécar
198
+ cjs Shor
199
+ ckb Kurdish, Central
200
+ cko Anufo
201
+ ckt Chukchi
202
+ cla Ron
203
+ cle Chinantec, Lealao
204
+ cly Chatino, Eastern Highland
205
+ cme Cerma
206
+ cmn-script_simplified Chinese, Mandarin
207
+ cmo-script_khmer Mnong, Central
208
+ cmo-script_latin Mnong, Central
209
+ cmr Mro-Khimi
210
+ cnh Chin, Hakha
211
+ cni Asháninka
212
+ cnl Chinantec, Lalana
213
+ cnt Chinantec, Tepetotutla
214
+ coe Koreguaje
215
+ cof Tsafiki
216
+ cok Cora, Santa Teresa
217
+ con Cofán
218
+ cot Caquinte
219
+ cou Wamey
220
+ cpa Chinantec, Palantla
221
+ cpb Ashéninka, Ucayali-Yurúa
222
+ cpu Ashéninka, Pichis
223
+ crh Crimean Tatar
224
+ crk-script_latin Cree, Plains
225
+ crk-script_syllabics Cree, Plains
226
+ crn Cora, El Nayar
227
+ crq Chorote, Iyo’wujwa
228
+ crs Seychelles French Creole
229
+ crt Chorote, Iyojwa’ja
230
+ csk Jola-Kasa
231
+ cso Chinantec, Sochiapam
232
+ ctd Chin, Tedim
233
+ ctg Chittagonian
234
+ cto Embera Catío
235
+ ctu Chol
236
+ cuc Chinantec, Usila
237
+ cui Cuiba
238
+ cuk Kuna, San Blas
239
+ cul Kulina
240
+ cwa Kabwa
241
+ cwe Kwere
242
+ cwt Kuwaataay
243
+ cya Chatino, Nopala
244
+ cym Welsh
245
+ daa Dangaléat
246
+ dah Gwahatike
247
+ dan Danish
248
+ dar Dargwa
249
+ dbj Ida’an
250
+ dbq Daba
251
+ ddn Dendi
252
+ ded Dedua
253
+ des Desano
254
+ deu German, Standard
255
+ dga Dagaare, Southern
256
+ dgi Dagara, Northern
257
+ dgk Dagba
258
+ dgo Dogri
259
+ dgr Tlicho
260
+ dhi Dhimal
261
+ did Didinga
262
+ dig Chidigo
263
+ dik Dinka, Southwestern
264
+ dip Dinka, Northeastern
265
+ div Maldivian
266
+ djk Aukan
267
+ dnj-dialect_blowowest Dan
268
+ dnj-dialect_gweetaawueast Dan
269
+ dnt Dani, Mid Grand Valley
270
+ dnw Dani, Western
271
+ dop Lukpa
272
+ dos Dogosé
273
+ dsh Daasanach
274
+ dso Desiya
275
+ dtp Kadazan Dusun
276
+ dts Dogon, Toro So
277
+ dug Chiduruma
278
+ dwr Dawro
279
+ dyi Sénoufo, Djimini
280
+ dyo Jola-Fonyi
281
+ dyu Jula
282
+ dzo Dzongkha
283
+ eip Lik
284
+ eka Ekajuk
285
+ ell Greek
286
+ emp Emberá, Northern
287
+ enb Markweeta
288
+ eng English
289
+ enx Enxet
290
+ epo Esperanto
291
+ ese Ese Ejja
292
+ ess Yupik, Saint Lawrence Island
293
+ est Estonian
294
+ eus Basque
295
+ evn Evenki
296
+ ewe Éwé
297
+ eza Ezaa
298
+ fal Fali, South
299
+ fao Faroese
300
+ far Fataleka
301
+ fas Persian
302
+ fij Fijian
303
+ fin Finnish
304
+ flr Fuliiru
305
+ fmu Muria, Far Western
306
+ fon Fon
307
+ fra French
308
+ frd Fordata
309
+ fry Frisian
310
+ ful Fulah
311
+ gag-script_cyrillic Gagauz
312
+ gag-script_latin Gagauz
313
+ gai Mbore
314
+ gam Kandawo
315
+ gau Gadaba, Mudhili
316
+ gbi Galela
317
+ gbk Gaddi
318
+ gbm Garhwali
319
+ gbo Grebo, Northern
320
+ gde Gude
321
+ geb Kire
322
+ gej Gen
323
+ gil Kiribati
324
+ gjn Gonja
325
+ gkn Gokana
326
+ gld Nanai
327
+ gle Irish
328
+ glg Galician
329
+ glk Gilaki
330
+ gmv Gamo
331
+ gna Kaansa
332
+ gnd Zulgo-Gemzek
333
+ gng Ngangam
334
+ gof-script_latin Gofa
335
+ gog Gogo
336
+ gor Gorontalo
337
+ gqr Gor
338
+ grc Greek, Ancient
339
+ gri Ghari
340
+ grn Guarani
341
+ grt Garo
342
+ gso Gbaya, Southwest
343
+ gub Guajajára
344
+ guc Wayuu
345
+ gud Dida, Yocoboué
346
+ guh Guahibo
347
+ guj Gujarati
348
+ guk Gumuz
349
+ gum Misak
350
+ guo Guayabero
351
+ guq Aché
352
+ guu Yanomamö
353
+ gux Gourmanchéma
354
+ gvc Wanano
355
+ gvl Gulay
356
+ gwi Gwich’in
357
+ gwr Gwere
358
+ gym Ngäbere
359
+ gyr Guarayu
360
+ had Hatam
361
+ hag Hanga
362
+ hak Chinese, Hakka
363
+ hap Hupla
364
+ hat Haitian Creole
365
+ hau Hausa
366
+ hay Haya
367
+ heb Hebrew
368
+ heh Hehe
369
+ hif Hindi, Fiji
370
+ hig Kamwe
371
+ hil Hiligaynon
372
+ hin Hindi
373
+ hlb Halbi
374
+ hlt Chin, Matu
375
+ hne Chhattisgarhi
376
+ hnn Hanunoo
377
+ hns Hindustani, Sarnami
378
+ hoc Ho
379
+ hoy Holiya
380
+ hrv Croatian
381
+ hsb Sorbian, Upper
382
+ hto Witoto, Minika
383
+ hub Wampís
384
+ hui Huli
385
+ hun Hungarian
386
+ hus-dialect_centralveracruz Huastec
387
+ hus-dialect_westernpotosino Huastec
388
+ huu Witoto, Murui
389
+ huv Huave, San Mateo del Mar
390
+ hvn Hawu
391
+ hwc Hawaii Pidgin
392
+ hye Armenian
393
+ hyw Armenian, Western
394
+ iba Iban
395
+ ibo Igbo
396
+ icr Islander English Creole
397
+ idd Ede Idaca
398
+ ifa Ifugao, Amganad
399
+ ifb Ifugao, Batad
400
+ ife Ifè
401
+ ifk Ifugao, Tuwali
402
+ ifu Ifugao, Mayoyao
403
+ ify Kallahan, Keley-i
404
+ ign Ignaciano
405
+ ikk Ika
406
+ ilb Ila
407
+ ilo Ilocano
408
+ imo Imbongu
409
+ ina Interlingua (International Auxiliary Language Association)
410
+ inb Inga
411
+ ind Indonesian
412
+ iou Tuma-Irumu
413
+ ipi Ipili
414
+ iqw Ikwo
415
+ iri Rigwe
416
+ irk Iraqw
417
+ isl Icelandic
418
+ ita Italian
419
+ itl Itelmen
420
+ itv Itawit
421
+ ixl-dialect_sangasparchajul Ixil
422
+ ixl-dialect_sanjuancotzal Ixil
423
+ ixl-dialect_santamarianebaj Ixil
424
+ izr Izere
425
+ izz Izii
426
+ jac Jakalteko
427
+ jam Jamaican English Creole
428
+ jav Javanese
429
+ jbu Jukun Takum
430
+ jen Dza
431
+ jic Tol
432
+ jiv Shuar
433
+ jmc Machame
434
+ jmd Yamdena
435
+ jpn Japanese
436
+ jun Juang
437
+ juy Juray
438
+ jvn Javanese, Suriname
439
+ kaa Karakalpak
440
+ kab Amazigh
441
+ kac Jingpho
442
+ kak Kalanguya
443
+ kam Kamba
444
+ kan Kannada
445
+ kao Xaasongaxango
446
+ kaq Capanahua
447
+ kat Georgian
448
+ kay Kamayurá
449
+ kaz Kazakh
450
+ kbo Keliko
451
+ kbp Kabiyè
452
+ kbq Kamano
453
+ kbr Kafa
454
+ kby Kanuri, Manga
455
+ kca Khanty
456
+ kcg Tyap
457
+ kdc Kutu
458
+ kde Makonde
459
+ kdh Tem
460
+ kdi Kumam
461
+ kdj Ng’akarimojong
462
+ kdl Tsikimba
463
+ kdn Kunda
464
+ kdt Kuay
465
+ kea Kabuverdianu
466
+ kek Q’eqchi’
467
+ ken Kenyang
468
+ keo Kakwa
469
+ ker Kera
470
+ key Kupia
471
+ kez Kukele
472
+ kfb Kolami, Northwestern
473
+ kff-script_telugu Koya
474
+ kfw Naga, Kharam
475
+ kfx Pahari, Kullu
476
+ khg Tibetan, Khams
477
+ khm Khmer
478
+ khq Songhay, Koyra Chiini
479
+ kia Kim
480
+ kij Kilivila
481
+ kik Gikuyu
482
+ kin Kinyarwanda
483
+ kir Kyrgyz
484
+ kjb Q’anjob’al
485
+ kje Kisar
486
+ kjg Khmu
487
+ kjh Khakas
488
+ kki Kagulu
489
+ kkj Kako
490
+ kle Kulung
491
+ klu Klao
492
+ klv Maskelynes
493
+ klw Tado
494
+ kma Konni
495
+ kmd Kalinga, Majukayang
496
+ kml Kalinga, Tanudan
497
+ kmr-script_arabic Kurdish, Northern
498
+ kmr-script_cyrillic Kurdish, Northern
499
+ kmr-script_latin Kurdish, Northern
500
+ kmu Kanite
501
+ knb Kalinga, Lubuagan
502
+ kne Kankanaey
503
+ knf Mankanya
504
+ knj Akateko
505
+ knk Kuranko
506
+ kno Kono
507
+ kog Kogi
508
+ kor Korean
509
+ kpq Korupun-Sela
510
+ kps Tehit
511
+ kpv Komi-Zyrian
512
+ kpy Koryak
513
+ kpz Kupsapiiny
514
+ kqe Kalagan
515
+ kqp Kimré
516
+ kqr Kimaragang
517
+ kqy Koorete
518
+ krc Karachay-Balkar
519
+ kri Krio
520
+ krj Kinaray-a
521
+ krl Karelian
522
+ krr Krung
523
+ krs Gbaya
524
+ kru Kurux
525
+ ksb Shambala
526
+ ksr Borong
527
+ kss Kisi, Southern
528
+ ktb Kambaata
529
+ ktj Krumen, Plapo
530
+ kub Kutep
531
+ kue Kuman
532
+ kum Kumyk
533
+ kus Kusaal
534
+ kvn Kuna, Border
535
+ kvw Wersing
536
+ kwd Kwaio
537
+ kwf Kwara’ae
538
+ kwi Awa-Cuaiquer
539
+ kxc Konso
540
+ kxf Kawyaw
541
+ kxm Khmer, Northern
542
+ kxv Kuvi
543
+ kyb Kalinga, Butbut
544
+ kyc Kyaka
545
+ kyf Kouya
546
+ kyg Keyagana
547
+ kyo Klon
548
+ kyq Kenga
549
+ kyu Kayah, Western
550
+ kyz Kayabí
551
+ kzf Kaili, Da’a
552
+ lac Lacandon
553
+ laj Lango
554
+ lam Lamba
555
+ lao Lao
556
+ las Lama
557
+ lat Latin
558
+ lav Latvian
559
+ law Lauje
560
+ lbj Ladakhi
561
+ lbw Tolaki
562
+ lcp Lawa, Western
563
+ lee Lyélé
564
+ lef Lelemi
565
+ lem Nomaande
566
+ lew Kaili, Ledo
567
+ lex Luang
568
+ lgg Lugbara
569
+ lgl Wala
570
+ lhu Lahu
571
+ lia Limba, West-Central
572
+ lid Nyindrou
573
+ lif Limbu
574
+ lin Lingala
575
+ lip Sekpele
576
+ lis Lisu
577
+ lit Lithuanian
578
+ lje Rampi
579
+ ljp Lampung Api
580
+ llg Lole
581
+ lln Lele
582
+ lme Pévé
583
+ lnd Lundayeh
584
+ lns Lamnso’
585
+ lob Lobi
586
+ lok Loko
587
+ lom Loma
588
+ lon Lomwe, Malawi
589
+ loq Lobala
590
+ lsi Lacid
591
+ lsm Saamya-Gwe
592
+ ltz Luxembourgish
593
+ luc Aringa
594
+ lug Ganda
595
+ luo Dholuo
596
+ lwo Luwo
597
+ lww Lewo
598
+ lzz Laz
599
+ maa-dialect_sanantonio Mazatec, San Jerónimo Tecóatl
600
+ maa-dialect_sanjeronimo Mazatec, San Jerónimo Tecóatl
601
+ mad Madura
602
+ mag Magahi
603
+ mah Marshallese
604
+ mai Maithili
605
+ maj Mazatec, Jalapa de Díaz
606
+ mak Makasar
607
+ mal Malayalam
608
+ mam-dialect_central Mam
609
+ mam-dialect_northern Mam
610
+ mam-dialect_southern Mam
611
+ mam-dialect_western Mam
612
+ maq Mazatec, Chiquihuitlán
613
+ mar Marathi
614
+ maw Mampruli
615
+ maz Mazahua, Central
616
+ mbb Manobo, Western Bukidnon
617
+ mbc Macushi
618
+ mbh Mangseng
619
+ mbj Nadëb
620
+ mbt Manobo, Matigsalug
621
+ mbu Mbula-Bwazza
622
+ mbz Mixtec, Amoltepec
623
+ mca Maka
624
+ mcb Matsigenka
625
+ mcd Sharanahua
626
+ mco Mixe, Coatlán
627
+ mcp Makaa
628
+ mcq Ese
629
+ mcu Mambila, Cameroon
630
+ mda Mada
631
+ mdf Moksha
632
+ mdv Mixtec, Santa Lucía Monteverde
633
+ mdy Male
634
+ med Melpa
635
+ mee Mengen
636
+ mej Meyah
637
+ men Mende
638
+ meq Merey
639
+ met Mato
640
+ mev Maan
641
+ mfe Morisyen
642
+ mfh Matal
643
+ mfi Wandala
644
+ mfk Mofu, North
645
+ mfq Moba
646
+ mfy Mayo
647
+ mfz Mabaan
648
+ mgd Moru
649
+ mge Mango
650
+ mgh Makhuwa-Meetto
651
+ mgo Meta’
652
+ mhi Ma’di
653
+ mhr Mari, Meadow
654
+ mhu Digaro-Mishmi
655
+ mhx Lhao Vo
656
+ mhy Ma’anyan
657
+ mib Mixtec, Atatlahuca
658
+ mie Mixtec, Ocotepec
659
+ mif Mofu-Gudur
660
+ mih Mixtec, Chayuco
661
+ mil Mixtec, Peñoles
662
+ mim Mixtec, Alacatlatzala
663
+ min Minangkabau
664
+ mio Mixtec, Pinotepa Nacional
665
+ mip Mixtec, Apasco-Apoala
666
+ miq Mískito
667
+ mit Mixtec, Southern Puebla
668
+ miy Mixtec, Ayutla
669
+ miz Mixtec, Coatzospan
670
+ mjl Mandeali
671
+ mjv Mannan
672
+ mkd Macedonian
673
+ mkl Mokole
674
+ mkn Malay, Kupang
675
+ mlg Malagasy
676
+ mlt Maltese
677
+ mmg Ambrym, North
678
+ mnb Muna
679
+ mnf Mundani
680
+ mnk Mandinka
681
+ mnw Mon
682
+ mnx Sougb
683
+ moa Mwan
684
+ mog Mongondow
685
+ mon Mongolian
686
+ mop Maya, Mopán
687
+ mor Moro
688
+ mos Mòoré
689
+ mox Molima
690
+ moz Mukulu
691
+ mpg Marba
692
+ mpm Mixtec, Yosondúa
693
+ mpp Migabac
694
+ mpx Misima-Panaeati
695
+ mqb Mbuko
696
+ mqf Momuna
697
+ mqj Mamasa
698
+ mqn Moronene
699
+ mri Maori
700
+ mrw Maranao
701
+ msy Aruamu
702
+ mtd Mualang
703
+ mtj Moskona
704
+ mto Mixe, Totontepec
705
+ muh Mündü
706
+ mup Malvi
707
+ mur Murle
708
+ muv Muthuvan
709
+ muy Muyang
710
+ mvp Duri
711
+ mwq Chin, Müün
712
+ mwv Mentawai
713
+ mxb Mixtec, Tezoatlán
714
+ mxq Mixe, Juquila
715
+ mxt Mixtec, Jamiltepec
716
+ mxv Mixtec, Metlatónoc
717
+ mya Burmese
718
+ myb Mbay
719
+ myk Sénoufo, Mamara
720
+ myl Moma
721
+ myv Erzya
722
+ myx Masaaba
723
+ myy Macuna
724
+ mza Mixtec, Santa María Zacatepec
725
+ mzi Mazatec, Ixcatlán
726
+ mzj Manya
727
+ mzk Mambila, Nigeria
728
+ mzm Mumuye
729
+ mzw Deg
730
+ nab Nambikuára, Southern
731
+ nag Nagamese
732
+ nan Chinese, Min Nan
733
+ nas Naasioi
734
+ naw Nawuri
735
+ nca Iyo
736
+ nch Nahuatl, Central Huasteca
737
+ ncj Nahuatl, Northern Puebla
738
+ ncl Nahuatl, Michoacán
739
+ ncu Chumburung
740
+ ndj Ndamba
741
+ ndp Kebu
742
+ ndv Ndut
743
+ ndy Lutos
744
+ ndz Ndogo
745
+ neb Toura
746
+ new Newar
747
+ nfa Dhao
748
+ nfr Nafaanra
749
+ nga Ngbaka
750
+ ngl Lomwe
751
+ ngp Ngulu
752
+ ngu Nahuatl, Guerrero
753
+ nhe Nahuatl, Eastern Huasteca
754
+ nhi Nahuatl, Zacatlán-Ahuacatlán-Tepetzintla
755
+ nhu Noone
756
+ nhw Nahuatl, Western Huasteca
757
+ nhx Nahuatl, Isthmus-Mecayapan
758
+ nhy Nahuatl, Northern Oaxaca
759
+ nia Nias
760
+ nij Ngaju
761
+ nim Nilamba
762
+ nin Ninzo
763
+ nko Nkonya
764
+ nlc Nalca
765
+ nld Dutch
766
+ nlg Gela
767
+ nlk Yali, Ninia
768
+ nmz Nawdm
769
+ nnb Nande
770
+ nno Norwegian Nynorsk
771
+ nnq Ngindo
772
+ nnw Nuni, Southern
773
+ noa Woun Meu
774
+ nob Norwegian Bokmål
775
+ nod Thai, Northern
776
+ nog Nogai
777
+ not Nomatsigenga
778
+ npi Nepali
779
+ npl Nahuatl, Southeastern Puebla
780
+ npy Napu
781
+ nso Sotho, Northern
782
+ nst Naga, Tangshang
783
+ nsu Nahuatl, Sierra Negra
784
+ ntm Nateni
785
+ ntr Delo
786
+ nuj Nyole
787
+ nus Nuer
788
+ nuz Nahuatl, Tlamacazapa
789
+ nwb Nyabwa
790
+ nxq Naxi
791
+ nya Chichewa
792
+ nyf Kigiryama
793
+ nyn Nyankore
794
+ nyo Nyoro
795
+ nyy Nyakyusa-Ngonde
796
+ nzi Nzema
797
+ obo Manobo, Obo
798
+ oci Occitan
799
+ ojb-script_latin Ojibwa, Northwestern
800
+ ojb-script_syllabics Ojibwa, Northwestern
801
+ oku Oku
802
+ old Mochi
803
+ omw Tairora, South
804
+ onb Lingao
805
+ ood Tohono O’odham
806
+ orm Oromo
807
+ ory Odia
808
+ oss Ossetic
809
+ ote Otomi, Mezquital
810
+ otq Otomi, Querétaro
811
+ ozm Koonzime
812
+ pab Parecís
813
+ pad Paumarí
814
+ pag Pangasinan
815
+ pam Kapampangan
816
+ pan Punjabi, Eastern
817
+ pao Paiute, Northern
818
+ pap Papiamentu
819
+ pau Palauan
820
+ pbb Nasa
821
+ pbc Patamona
822
+ pbi Parkwa
823
+ pce Palaung, Ruching
824
+ pcm Pidgin, Nigerian
825
+ peg Pengo
826
+ pez Penan, Eastern
827
+ pib Yine
828
+ pil Yom
829
+ pir Piratapuyo
830
+ pis Pijin
831
+ pjt Pitjantjatjara
832
+ pkb Kipfokomo
833
+ pls Popoloca, San Marcos Tlacoyalco
834
+ plw Palawano, Brooke’s Point
835
+ pmf Pamona
836
+ pny Pinyin
837
+ poh-dialect_eastern Poqomchi’
838
+ poh-dialect_western Poqomchi’
839
+ poi Popoluca, Highland
840
+ pol Polish
841
+ por Portuguese
842
+ poy Pogolo
843
+ ppk Uma
844
+ pps Popoloca, San Luís Temalacayuca
845
+ prf Paranan
846
+ prk Wa, Parauk
847
+ prt Prai
848
+ pse Malay, Central
849
+ pss Kaulong
850
+ ptu Bambam
851
+ pui Puinave
852
+ pus Pushto
853
+ pwg Gapapaiwa
854
+ pww Karen, Pwo Northern
855
+ pxm Mixe, Quetzaltepec
856
+ qub Quechua, Huallaga
857
+ quc-dialect_central K’iche’
858
+ quc-dialect_east K’iche’
859
+ quc-dialect_north K’iche’
860
+ quf Quechua, Lambayeque
861
+ quh Quechua, South Bolivian
862
+ qul Quechua, North Bolivian
863
+ quw Quichua, Tena Lowland
864
+ quy Quechua, Ayacucho
865
+ quz Quechua, Cusco
866
+ qvc Quechua, Cajamarca
867
+ qve Quechua, Eastern Apurímac
868
+ qvh Quechua, Huamalíes-Dos de Mayo Huánuco
869
+ qvm Quechua, Margos-Yarowilca-Lauricocha
870
+ qvn Quechua, North Junín
871
+ qvo Quichua, Napo
872
+ qvs Quechua, San Martín
873
+ qvw Quechua, Huaylla Wanca
874
+ qvz Quichua, Northern Pastaza
875
+ qwh Quechua, Huaylas Ancash
876
+ qxh Quechua, Panao
877
+ qxl Quichua, Salasaca Highland
878
+ qxn Quechua, Northern Conchucos Ancash
879
+ qxo Quechua, Southern Conchucos
880
+ qxr Quichua, Cañar Highland
881
+ rah Rabha
882
+ rai Ramoaaina
883
+ rap Rapa Nui
884
+ rav Sampang
885
+ raw Rawang
886
+ rej Rejang
887
+ rel Rendille
888
+ rgu Rikou
889
+ rhg Rohingya
890
+ rif-script_arabic Tarifit
891
+ rif-script_latin Tarifit
892
+ ril Riang Lang
893
+ rim Nyaturu
894
+ rjs Rajbanshi
895
+ rkt Rangpuri
896
+ rmc-script_cyrillic Romani, Carpathian
897
+ rmc-script_latin Romani, Carpathian
898
+ rmo Romani, Sinte
899
+ rmy-script_cyrillic Romani, Vlax
900
+ rmy-script_latin Romani, Vlax
901
+ rng Ronga
902
+ rnl Ranglong
903
+ roh-dialect_sursilv Romansh
904
+ roh-dialect_vallader Romansh
905
+ rol Romblomanon
906
+ ron Romanian
907
+ rop Kriol
908
+ rro Waima
909
+ rub Gungu
910
+ ruf Luguru
911
+ rug Roviana
912
+ run Rundi
913
+ rus Russian
914
+ sab Buglere
915
+ sag Sango
916
+ sah Yakut
917
+ saj Sahu
918
+ saq Samburu
919
+ sas Sasak
920
+ sat Santhali
921
+ sba Ngambay
922
+ sbd Samo, Southern
923
+ sbl Sambal, Botolan
924
+ sbp Sangu
925
+ sch Sakachep
926
+ sck Sadri
927
+ sda Toraja-Sa’dan
928
+ sea Semai
929
+ seh Sena
930
+ ses Songhay, Koyraboro Senni
931
+ sey Paicoca
932
+ sgb Ayta, Mag-antsi
933
+ sgj Surgujia
934
+ sgw Sebat Bet Gurage
935
+ shi Tachelhit
936
+ shk Shilluk
937
+ shn Shan
938
+ sho Shanga
939
+ shp Shipibo-Conibo
940
+ sid Sidamo
941
+ sig Paasaal
942
+ sil Sisaala, Tumulung
943
+ sja Epena
944
+ sjm Mapun
945
+ sld Sissala
946
+ slk Slovak
947
+ slu Selaru
948
+ slv Slovene
949
+ sml Sama, Central
950
+ smo Samoan
951
+ sna Shona
952
+ snd Sindhi
953
+ sne Bidayuh, Bau
954
+ snn Siona
955
+ snp Siane
956
+ snw Selee
957
+ som Somali
958
+ soy Miyobe
959
+ spa Spanish
960
+ spp Sénoufo, Supyire
961
+ spy Sabaot
962
+ sqi Albanian
963
+ sri Siriano
964
+ srm Saramaccan
965
+ srn Sranan Tongo
966
+ srp-script_cyrillic Serbian
967
+ srp-script_latin Serbian
968
+ srx Sirmauri
969
+ stn Owa
970
+ stp Tepehuan, Southeastern
971
+ suc Subanon, Western
972
+ suk Sukuma
973
+ sun Sunda
974
+ sur Mwaghavul
975
+ sus Susu
976
+ suv Puroik
977
+ suz Sunwar
978
+ swe Swedish
979
+ swh Swahili
980
+ sxb Suba
981
+ sxn Sangir
982
+ sya Siang
983
+ syl Sylheti
984
+ sza Semelai
985
+ tac Tarahumara, Western
986
+ taj Tamang, Eastern
987
+ tam Tamil
988
+ tao Yami
989
+ tap Taabwa
990
+ taq Tamasheq
991
+ tat Tatar
992
+ tav Tatuyo
993
+ tbc Takia
994
+ tbg Tairora, North
995
+ tbk Tagbanwa, Calamian
996
+ tbl Tboli
997
+ tby Tabaru
998
+ tbz Ditammari
999
+ tca Ticuna
1000
+ tcc Datooga
1001
+ tcs Torres Strait Creole
1002
+ tcz Chin, Thado
1003
+ tdj Tajio
1004
+ ted Krumen, Tepo
1005
+ tee Tepehua, Huehuetla
1006
+ tel Telugu
1007
+ tem Themne
1008
+ teo Ateso
1009
+ ter Terêna
1010
+ tes Tengger
1011
+ tew Tewa
1012
+ tex Tennet
1013
+ tfr Teribe
1014
+ tgj Tagin
1015
+ tgk Tajik
1016
+ tgl Tagalog
1017
+ tgo Sudest
1018
+ tgp Tangoa
1019
+ tha Thai
1020
+ thk Kitharaka
1021
+ thl Tharu, Dangaura
1022
+ tih Murut, Timugon
1023
+ tik Tikar
1024
+ tir Tigrigna
1025
+ tkr Tsakhur
1026
+ tlb Tobelo
1027
+ tlj Talinga-Bwisi
1028
+ tly Talysh
1029
+ tmc Tumak
1030
+ tmf Toba-Maskoy
1031
+ tna Tacana
1032
+ tng Tobanga
1033
+ tnk Kwamera
1034
+ tnn Tanna, North
1035
+ tnp Whitesands
1036
+ tnr Ménik
1037
+ tnt Tontemboan
1038
+ tob Toba
1039
+ toc Totonac, Coyutla
1040
+ toh Tonga
1041
+ tom Tombulu
1042
+ tos Totonac, Highland
1043
+ tpi Tok Pisin
1044
+ tpm Tampulma
1045
+ tpp Tepehua, Pisaflores
1046
+ tpt Tepehua, Tlachichilco
1047
+ trc Triqui, Copala
1048
+ tri Trió
1049
+ trn Trinitario
1050
+ trs Triqui, Chicahuaxtla
1051
+ tso Tsonga
1052
+ tsz Purepecha
1053
+ ttc Tektiteko
1054
+ tte Bwanabwana
1055
+ ttq-script_tifinagh Tamajaq, Tawallammat
1056
+ tue Tuyuca
1057
+ tuf Tunebo, Central
1058
+ tuk-script_arabic Turkmen
1059
+ tuk-script_latin Turkmen
1060
+ tuo Tucano
1061
+ tur Turkish
1062
+ tvw Sedoa
1063
+ twb Tawbuid
1064
+ twe Teiwa
1065
+ twu Termanu
1066
+ txa Tombonuo
1067
+ txq Tii
1068
+ txu Kayapó
1069
+ tye Kyanga
1070
+ tzh-dialect_bachajon Tzeltal
1071
+ tzh-dialect_tenejapa Tzeltal
1072
+ tzj-dialect_eastern Tz’utujil
1073
+ tzj-dialect_western Tz’utujil
1074
+ tzo-dialect_chamula Tzotzil
1075
+ tzo-dialect_chenalho Tzotzil
1076
+ ubl Bikol, Buhi’non
1077
+ ubu Umbu-Ungu
1078
+ udm Udmurt
1079
+ udu Uduk
1080
+ uig-script_arabic Uyghur
1081
+ uig-script_cyrillic Uyghur
1082
+ ukr Ukrainian
1083
+ umb Umbundu
1084
+ unr Mundari
1085
+ upv Uripiv-Wala-Rano-Atchin
1086
+ ura Urarina
1087
+ urb Kaapor
1088
+ urd-script_arabic Urdu
1089
+ urd-script_devanagari Urdu
1090
+ urd-script_latin Urdu
1091
+ urk Urak Lawoi’
1092
+ urt Urat
1093
+ ury Orya
1094
+ usp Uspanteko
1095
+ uzb-script_cyrillic Uzbek
1096
+ uzb-script_latin Uzbek
1097
+ vag Vagla
1098
+ vid Vidunda
1099
+ vie Vietnamese
1100
+ vif Vili
1101
+ vmw Makhuwa
1102
+ vmy Mazatec, Ayautla
1103
+ vot Vod
1104
+ vun Vunjo
1105
+ vut Vute
1106
+ wal-script_ethiopic Wolaytta
1107
+ wal-script_latin Wolaytta
1108
+ wap Wapishana
1109
+ war Waray-Waray
1110
+ waw Waiwai
1111
+ way Wayana
1112
+ wba Warao
1113
+ wlo Wolio
1114
+ wlx Wali
1115
+ wmw Mwani
1116
+ wob Wè Northern
1117
+ wol Wolof
1118
+ wsg Gondi, Adilabad
1119
+ wwa Waama
1120
+ xal Kalmyk-Oirat
1121
+ xdy Malayic Dayak
1122
+ xed Hdi
1123
+ xer Xerénte
1124
+ xho Xhosa
1125
+ xmm Malay, Manado
1126
+ xnj Chingoni
1127
+ xnr Kangri
1128
+ xog Soga
1129
+ xon Konkomba
1130
+ xrb Karaboro, Eastern
1131
+ xsb Sambal
1132
+ xsm Kasem
1133
+ xsr Sherpa
1134
+ xsu Sanumá
1135
+ xta Mixtec, Alcozauca
1136
+ xtd Mixtec, Diuxi-Tilantongo
1137
+ xte Ketengban
1138
+ xtm Mixtec, Magdalena Peñasco
1139
+ xtn Mixtec, Northern Tlaxiaco
1140
+ xua Kurumba, Alu
1141
+ xuo Kuo
1142
+ yaa Yaminahua
1143
+ yad Yagua
1144
+ yal Yalunka
1145
+ yam Yamba
1146
+ yao Yao
1147
+ yas Nugunu
1148
+ yat Yambeta
1149
+ yaz Lokaa
1150
+ yba Yala
1151
+ ybb Yemba
1152
+ ycl Lolopo
1153
+ ycn Yucuna
1154
+ yea Ravula
1155
+ yka Yakan
1156
+ yli Yali, Angguruk
1157
+ yor Yoruba
1158
+ yre Yaouré
1159
+ yua Maya, Yucatec
1160
+ yue-script_traditional Chinese, Yue
1161
+ yuz Yuracare
1162
+ yva Yawa
1163
+ zaa Zapotec, Sierra de Juárez
1164
+ zab Zapotec, Western Tlacolula Valley
1165
+ zac Zapotec, Ocotlán
1166
+ zad Zapotec, Cajonos
1167
+ zae Zapotec, Yareni
1168
+ zai Zapotec, Isthmus
1169
+ zam Zapotec, Miahuatlán
1170
+ zao Zapotec, Ozolotepec
1171
+ zaq Zapotec, Aloápam
1172
+ zar Zapotec, Rincón
1173
+ zas Zapotec, Santo Domingo Albarradas
1174
+ zav Zapotec, Yatzachi
1175
+ zaw Zapotec, Mitla
1176
+ zca Zapotec, Coatecas Altas
1177
+ zga Kinga
1178
+ zim Mesme
1179
+ ziw Zigula
1180
+ zlm Malay
1181
+ zmz Mbandja
1182
+ zne Zande
1183
+ zos Zoque, Francisco León
1184
+ zpc Zapotec, Choapan
1185
+ zpg Zapotec, Guevea de Humboldt
1186
+ zpi Zapotec, Santa María Quiegolani
1187
+ zpl Zapotec, Lachixío
1188
+ zpm Zapotec, Mixtepec
1189
+ zpo Zapotec, Amatlán
1190
+ zpt Zapotec, San Vicente Coatlán
1191
+ zpu Zapotec, Yalálag
1192
+ zpz Zapotec, Texmelucan
1193
+ ztq Zapotec, Quioquitani-Quierí
1194
+ zty Zapotec, Yatee
1195
+ zul Zulu
1196
+ zyb Zhuang, Yongbei
1197
+ zyp Chin, Zyphe
1198
+ zza Zaza
modules/app.py ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+ Created By Lewis Kamau Kimaru
3
+ Sema translator api backend
4
+ January 2024
5
+ Docker deployment
6
+ '''
7
+
8
+ from fastapi import FastAPI, HTTPException, Request
9
+ from fastapi.middleware.cors import CORSMiddleware
10
+ from fastapi.responses import HTMLResponse
11
+ import gradio as gr
12
+ import ctranslate2
13
+ import sentencepiece as spm
14
+ import fasttext
15
+ import uvicorn
16
+ import pytz
17
+ from datetime import datetime
18
+ import os
19
+
20
+ app = FastAPI()
21
+
22
+ fasttext.FastText.eprint = lambda x: None
23
+
24
+ # Get time of request
25
+
26
+ def get_time():
27
+ nairobi_timezone = pytz.timezone('Africa/Nairobi')
28
+ current_time_nairobi = datetime.now(nairobi_timezone)
29
+
30
+ curr_day = current_time_nairobi.strftime('%A')
31
+ curr_date = current_time_nairobi.strftime('%Y-%m-%d')
32
+ curr_time = current_time_nairobi.strftime('%H:%M:%S')
33
+
34
+ full_date = f"{curr_day} | {curr_date} | {curr_time}"
35
+ return full_date, curr_time
36
+
37
+ # Load the model and tokenizer ..... only once!
38
+ beam_size = 1 # change to a smaller value for faster inference
39
+ device = "cpu" # or "cuda"
40
+
41
+ # Language Prediction model
42
+ print("\nimporting Language Prediction model")
43
+ lang_model_file = "lid218e.bin"
44
+ lang_model_full_path = os.path.join(os.path.dirname(__file__), lang_model_file)
45
+ lang_model = fasttext.load_model(lang_model_full_path)
46
+
47
+
48
+ # Load the source SentencePiece model
49
+ print("\nimporting SentencePiece model")
50
+ sp_model_file = "spm.model"
51
+ sp_model_full_path = os.path.join(os.path.dirname(__file__), sp_model_file)
52
+ sp = spm.SentencePieceProcessor()
53
+ sp.load(sp_model_full_path)
54
+
55
+ # Import The Translator model
56
+ print("\nimporting Translator model")
57
+ ct_model_file = "sematrans-3.3B"
58
+ ct_model_full_path = os.path.join(os.path.dirname(__file__), ct_model_file)
59
+ translator = ctranslate2.Translator(ct_model_full_path, device)
60
+
61
+ print('\nDone importing models\n')
62
+
63
+
64
+ def translate_detect(userinput: str, target_lang: str):
65
+ source_sents = [userinput]
66
+ source_sents = [sent.strip() for sent in source_sents]
67
+ target_prefix = [[target_lang]] * len(source_sents)
68
+
69
+ # Predict the source language
70
+ predictions = lang_model.predict(source_sents[0], k=1)
71
+ source_lang = predictions[0][0].replace('__label__', '')
72
+
73
+ # Subword the source sentences
74
+ source_sents_subworded = sp.encode(source_sents, out_type=str)
75
+ source_sents_subworded = [[source_lang] + sent + ["</s>"] for sent in source_sents_subworded]
76
+
77
+ # Translate the source sentences
78
+ translations = translator.translate_batch(
79
+ source_sents_subworded,
80
+ batch_type="tokens",
81
+ max_batch_size=2024,
82
+ beam_size=beam_size,
83
+ target_prefix=target_prefix,
84
+ )
85
+ translations = [translation[0]['tokens'] for translation in translations]
86
+
87
+ # Desubword the target sentences
88
+ translations_desubword = sp.decode(translations)
89
+ translations_desubword = [sent[len(target_lang):] for sent in translations_desubword]
90
+
91
+ # Return the source language and the translated text
92
+ return source_lang, translations_desubword
93
+
94
+ def translate_enter(userinput: str, source_lang: str, target_lang: str):
95
+ source_sents = [userinput]
96
+ source_sents = [sent.strip() for sent in source_sents]
97
+ target_prefix = [[target_lang]] * len(source_sents)
98
+
99
+ # Subword the source sentences
100
+ source_sents_subworded = sp.encode(source_sents, out_type=str)
101
+ source_sents_subworded = [[source_lang] + sent + ["</s>"] for sent in source_sents_subworded]
102
+
103
+ # Translate the source sentences
104
+ translations = translator.translate_batch(source_sents_subworded, batch_type="tokens", max_batch_size=2024, beam_size=beam_size, target_prefix=target_prefix)
105
+ translations = [translation[0]['tokens'] for translation in translations]
106
+
107
+ # Desubword the target sentences
108
+ translations_desubword = sp.decode(translations)
109
+ translations_desubword = [sent[len(target_lang):] for sent in translations_desubword]
110
+
111
+ # Return the source language and the translated text
112
+ return translations_desubword[0]
113
+
114
+
115
+ @app.get("/")
116
+ async def read_root():
117
+ gradio_interface = """
118
+ <html>
119
+ <meta name="viewport" content="width=device-width, height=device-height, initial-scale=1.0">
120
+ <head>
121
+ <title>Sema</title>
122
+ </head>
123
+ <frameset>
124
+ <frame src=https://kamau1-semaapi-frontend.hf.space/?embedded=true'>
125
+ </frameset>
126
+ </html>
127
+ """
128
+ return HTMLResponse(content=gradio_interface)
129
+
130
+
131
+ @app.post("/translate_detect/")
132
+ async def translate_detect_endpoint(request: Request):
133
+ datad = await request.json()
134
+ userinputd = datad.get("userinput")
135
+ target_langd = datad.get("target_lang")
136
+ dfull_date = get_time()[0]
137
+ print(f"\nrequest: {dfull_date}\nTarget Language; {target_langd}, User Input: {userinputd}\n")
138
+
139
+ if not userinputd or not target_langd:
140
+ raise HTTPException(status_code=422, detail="Both 'userinput' and 'target_lang' are required.")
141
+
142
+ source_langd, translated_text_d = translate_detect(userinputd, target_langd)
143
+ dcurrent_time = get_time()[1]
144
+ print(f"\nresponse: {dcurrent_time}; ... Source_language: {source_langd}, Translated Text: {translated_text_d}\n\n")
145
+ return {
146
+ "source_language": source_langd,
147
+ "translated_text": translated_text_d[0],
148
+ }
149
+
150
+
151
+ @app.post("/translate_enter/")
152
+ async def translate_enter_endpoint(request: Request):
153
+ datae = await request.json()
154
+ userinpute = datae.get("userinput")
155
+ source_lange = datae.get("source_lang")
156
+ target_lange = datae.get("target_lang")
157
+ efull_date = get_time()[0]
158
+ print(f"\nrequest: {efull_date}\nSource_language; {source_lange}, Target Language; {target_lange}, User Input: {userinpute}\n")
159
+
160
+ if not userinpute or not target_lange:
161
+ raise HTTPException(status_code=422, detail="'userinput' 'sourc_lang'and 'target_lang' are required.")
162
+
163
+ translated_text_e = translate_enter(userinpute, source_lange, target_lange)
164
+ ecurrent_time = get_time()[1]
165
+ print(f"\nresponse: {ecurrent_time}; ... Translated Text: {translated_text_e}\n\n")
166
+ return {
167
+ "translated_text": translated_text_e,
168
+ }
169
+
170
+
171
+ print("\nAPI starting .......\n")
modules/flores200_codes.py ADDED
@@ -0,0 +1,211 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ codes_as_string = '''Acehnese (Arabic script) ace_Arab
2
+ Acehnese (Latin script) ace_Latn
3
+ Mesopotamian Arabic acm_Arab
4
+ Ta’izzi-Adeni Arabic acq_Arab
5
+ Tunisian Arabic aeb_Arab
6
+ Afrikaans afr_Latn
7
+ South Levantine Arabic ajp_Arab
8
+ Akan aka_Latn
9
+ Amharic amh_Ethi
10
+ North Levantine Arabic apc_Arab
11
+ Modern Standard Arabic arb_Arab
12
+ Modern Standard Arabic (Romanized) arb_Latn
13
+ Najdi Arabic ars_Arab
14
+ Moroccan Arabic ary_Arab
15
+ Egyptian Arabic arz_Arab
16
+ Assamese asm_Beng
17
+ Asturian ast_Latn
18
+ Awadhi awa_Deva
19
+ Central Aymara ayr_Latn
20
+ South Azerbaijani azb_Arab
21
+ North Azerbaijani azj_Latn
22
+ Bashkir bak_Cyrl
23
+ Bambara bam_Latn
24
+ Balinese ban_Latn
25
+ Belarusian bel_Cyrl
26
+ Bemba bem_Latn
27
+ Bengali ben_Beng
28
+ Bhojpuri bho_Deva
29
+ Banjar (Arabic script) bjn_Arab
30
+ Banjar (Latin script) bjn_Latn
31
+ Standard Tibetan bod_Tibt
32
+ Bosnian bos_Latn
33
+ Buginese bug_Latn
34
+ Bulgarian bul_Cyrl
35
+ Catalan cat_Latn
36
+ Cebuano ceb_Latn
37
+ Czech ces_Latn
38
+ Chokwe cjk_Latn
39
+ Central Kurdish ckb_Arab
40
+ Crimean Tatar crh_Latn
41
+ Welsh cym_Latn
42
+ Danish dan_Latn
43
+ German deu_Latn
44
+ Southwestern Dinka dik_Latn
45
+ Dyula dyu_Latn
46
+ Dzongkha dzo_Tibt
47
+ Greek ell_Grek
48
+ English eng_Latn
49
+ Esperanto epo_Latn
50
+ Estonian est_Latn
51
+ Basque eus_Latn
52
+ Ewe ewe_Latn
53
+ Faroese fao_Latn
54
+ Fijian fij_Latn
55
+ Finnish fin_Latn
56
+ Fon fon_Latn
57
+ French fra_Latn
58
+ Friulian fur_Latn
59
+ Nigerian Fulfulde fuv_Latn
60
+ Scottish Gaelic gla_Latn
61
+ Irish gle_Latn
62
+ Galician glg_Latn
63
+ Guarani grn_Latn
64
+ Gujarati guj_Gujr
65
+ Haitian Creole hat_Latn
66
+ Hausa hau_Latn
67
+ Hebrew heb_Hebr
68
+ Hindi hin_Deva
69
+ Chhattisgarhi hne_Deva
70
+ Croatian hrv_Latn
71
+ Hungarian hun_Latn
72
+ Armenian hye_Armn
73
+ Igbo ibo_Latn
74
+ Ilocano ilo_Latn
75
+ Indonesian ind_Latn
76
+ Icelandic isl_Latn
77
+ Italian ita_Latn
78
+ Javanese jav_Latn
79
+ Japanese jpn_Jpan
80
+ Kabyle kab_Latn
81
+ Jingpho kac_Latn
82
+ Kamba kam_Latn
83
+ Kannada kan_Knda
84
+ Kashmiri (Arabic script) kas_Arab
85
+ Kashmiri (Devanagari script) kas_Deva
86
+ Georgian kat_Geor
87
+ Central Kanuri (Arabic script) knc_Arab
88
+ Central Kanuri (Latin script) knc_Latn
89
+ Kazakh kaz_Cyrl
90
+ Kabiyè kbp_Latn
91
+ Kabuverdianu kea_Latn
92
+ Khmer khm_Khmr
93
+ Kikuyu kik_Latn
94
+ Kinyarwanda kin_Latn
95
+ Kyrgyz kir_Cyrl
96
+ Kimbundu kmb_Latn
97
+ Northern Kurdish kmr_Latn
98
+ Kikongo kon_Latn
99
+ Korean kor_Hang
100
+ Lao lao_Laoo
101
+ Ligurian lij_Latn
102
+ Limburgish lim_Latn
103
+ Lingala lin_Latn
104
+ Lithuanian lit_Latn
105
+ Lombard lmo_Latn
106
+ Latgalian ltg_Latn
107
+ Luxembourgish ltz_Latn
108
+ Luba-Kasai lua_Latn
109
+ Ganda lug_Latn
110
+ Luo luo_Latn
111
+ Mizo lus_Latn
112
+ Standard Latvian lvs_Latn
113
+ Magahi mag_Deva
114
+ Maithili mai_Deva
115
+ Malayalam mal_Mlym
116
+ Marathi mar_Deva
117
+ Minangkabau (Arabic script) min_Arab
118
+ Minangkabau (Latin script) min_Latn
119
+ Macedonian mkd_Cyrl
120
+ Plateau Malagasy plt_Latn
121
+ Maltese mlt_Latn
122
+ Meitei (Bengali script) mni_Beng
123
+ Halh Mongolian khk_Cyrl
124
+ Mossi mos_Latn
125
+ Maori mri_Latn
126
+ Burmese mya_Mymr
127
+ Dutch nld_Latn
128
+ Norwegian Nynorsk nno_Latn
129
+ Norwegian Bokmål nob_Latn
130
+ Nepali npi_Deva
131
+ Northern Sotho nso_Latn
132
+ Nuer nus_Latn
133
+ Nyanja nya_Latn
134
+ Occitan oci_Latn
135
+ West Central Oromo gaz_Latn
136
+ Odia ory_Orya
137
+ Pangasinan pag_Latn
138
+ Eastern Panjabi pan_Guru
139
+ Papiamento pap_Latn
140
+ Western Persian pes_Arab
141
+ Polish pol_Latn
142
+ Portuguese por_Latn
143
+ Dari prs_Arab
144
+ Southern Pashto pbt_Arab
145
+ Ayacucho Quechua quy_Latn
146
+ Romanian ron_Latn
147
+ Rundi run_Latn
148
+ Russian rus_Cyrl
149
+ Sango sag_Latn
150
+ Sanskrit san_Deva
151
+ Santali sat_Olck
152
+ Sicilian scn_Latn
153
+ Shan shn_Mymr
154
+ Sinhala sin_Sinh
155
+ Slovak slk_Latn
156
+ Slovenian slv_Latn
157
+ Samoan smo_Latn
158
+ Shona sna_Latn
159
+ Sindhi snd_Arab
160
+ Somali som_Latn
161
+ Southern Sotho sot_Latn
162
+ Spanish spa_Latn
163
+ Tosk Albanian als_Latn
164
+ Sardinian srd_Latn
165
+ Serbian srp_Cyrl
166
+ Swati ssw_Latn
167
+ Sundanese sun_Latn
168
+ Swedish swe_Latn
169
+ Swahili swh_Latn
170
+ Silesian szl_Latn
171
+ Tamil tam_Taml
172
+ Tatar tat_Cyrl
173
+ Telugu tel_Telu
174
+ Tajik tgk_Cyrl
175
+ Tagalog tgl_Latn
176
+ Thai tha_Thai
177
+ Tigrinya tir_Ethi
178
+ Tamasheq (Latin script) taq_Latn
179
+ Tamasheq (Tifinagh script) taq_Tfng
180
+ Tok Pisin tpi_Latn
181
+ Tswana tsn_Latn
182
+ Tsonga tso_Latn
183
+ Turkmen tuk_Latn
184
+ Tumbuka tum_Latn
185
+ Turkish tur_Latn
186
+ Twi twi_Latn
187
+ Central Atlas Tamazight tzm_Tfng
188
+ Uyghur uig_Arab
189
+ Ukrainian ukr_Cyrl
190
+ Umbundu umb_Latn
191
+ Urdu urd_Arab
192
+ Northern Uzbek uzn_Latn
193
+ Venetian vec_Latn
194
+ Vietnamese vie_Latn
195
+ Waray war_Latn
196
+ Wolof wol_Latn
197
+ Xhosa xho_Latn
198
+ Eastern Yiddish ydd_Hebr
199
+ Yoruba yor_Latn
200
+ Yue Chinese yue_Hant
201
+ Chinese (Simplified) zho_Hans
202
+ Chinese (Traditional) zho_Hant
203
+ Standard Malay zsm_Latn
204
+ Zulu zul_Latn'''
205
+
206
+ codes_as_string = codes_as_string.split('\n')
207
+
208
+ flores_codes = {}
209
+ for code in codes_as_string:
210
+ lang, lang_code = code.split('\t')
211
+ flores_codes[lang] = lang_code
modules/lid218e.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ded5749a2ad79ae9ab7c9190c7c8b97ff20d54ad8b9527ffa50107238fc7f6a
3
+ size 1176355829
modules/sematrans-1.2B/config.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_source_bos": false,
3
+ "add_source_eos": false,
4
+ "bos_token": "<s>",
5
+ "decoder_start_token": "</s>",
6
+ "eos_token": "</s>",
7
+ "unk_token": "<unk>"
8
+ }
modules/sematrans-1.2B/model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd041a40b02d6e3343b174e84ad75c2a058c3a60951dca6bbb4aa9334e7b9ddf
3
+ size 1381827064
modules/sematrans-1.2B/shared_vocabulary.txt ADDED
The diff for this file is too large to render. See raw diff
 
modules/sematrans-3.3B/config.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_source_bos": false,
3
+ "add_source_eos": false,
4
+ "bos_token": "<s>",
5
+ "decoder_start_token": "</s>",
6
+ "eos_token": "</s>",
7
+ "unk_token": "<unk>"
8
+ }
modules/sematrans-3.3B/model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cacb7f6f36b75d73549ab9d99d0bb95270ef65dabea07204adc5648c68274ae8
3
+ size 3363404280
modules/sematrans-3.3B/shared_vocabulary.txt ADDED
The diff for this file is too large to render. See raw diff
 
modules/spm.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14bb8dfb35c0ffdea7bc01e56cea38b9e3d5efcdcb9c251d6b40538e1aab555a
3
+ size 4852054
modules/translator_all_langs.tsv ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Acehnese-(Arabic script) ace_Arab
2
+ Acehnese-(Latin script) ace_Latn
3
+ Mesopotamian-(Arabic) acm_Arab
4
+ Ta’izzi-Adeni-(Arabic) acq_Arab
5
+ Tunisian-(Arabic) aeb_Arab
6
+ Afrikaans afr_Latn
7
+ South_Levantine_Arabic ajp_Arab
8
+ Akan aka_Latn
9
+ Amharic amh_Ethi
10
+ North_Levantine_Arabic apc_Arab
11
+ Modern_Standard_Arabic arb_Arab
12
+ Modern_Standard_Arabic_(Romanized) arb_Latn
13
+ Najdi_Arabic ars_Arab
14
+ Moroccan_Arabic ary_Arab
15
+ Egyptian_Arabic arz_Arab
16
+ Assamese asm_Beng
17
+ Asturian ast_Latn
18
+ Awadhi awa_Deva
19
+ Central_Aymara ayr_Latn
20
+ South_Azerbaijani azb_Arab
21
+ North_Azerbaijani azj_Latn
22
+ Bashkir bak_Cyrl
23
+ Bambara bam_Latn
24
+ Balinese ban_Latn
25
+ Belarusian bel_Cyrl
26
+ Bemba bem_Latn
27
+ Bengali ben_Beng
28
+ Bhojpuri bho_Deva
29
+ Banjar_(Arabic script) bjn_Arab
30
+ Banjar_(Latin script) bjn_Latn
31
+ Standard_Tibetan bod_Tibt
32
+ Bosnian bos_Latn
33
+ Buginese bug_Latn
34
+ Bulgarian bul_Cyrl
35
+ Catalan cat_Latn
36
+ Cebuano ceb_Latn
37
+ Czech ces_Latn
38
+ Chokwe cjk_Latn
39
+ Central_Kurdish ckb_Arab
40
+ Crimean_Tatar crh_Latn
41
+ Welsh cym_Latn
42
+ Danish dan_Latn
43
+ German deu_Latn
44
+ Southwestern_Dinka dik_Latn
45
+ Dyula dyu_Latn
46
+ Dzongkha dzo_Tibt
47
+ Greek ell_Grek
48
+ English eng_Latn
49
+ Esperanto epo_Latn
50
+ Estonian est_Latn
51
+ Basque eus_Latn
52
+ Ewe ewe_Latn
53
+ Faroese fao_Latn
54
+ Fijian fij_Latn
55
+ Finnish fin_Latn
56
+ Fon fon_Latn
57
+ French fra_Latn
58
+ Friulian fur_Latn
59
+ Nigerian_Fulfulde fuv_Latn
60
+ Scottish_Gaelic gla_Latn
61
+ Irish gle_Latn
62
+ Galician glg_Latn
63
+ Guarani grn_Latn
64
+ Gujarati guj_Gujr
65
+ Haitian_Creole hat_Latn
66
+ Hausa hau_Latn
67
+ Hebrew heb_Hebr
68
+ Hindi hin_Deva
69
+ Chhattisgarhi hne_Deva
70
+ Croatian hrv_Latn
71
+ Hungarian hun_Latn
72
+ Armenian hye_Armn
73
+ Igbo ibo_Latn
74
+ Ilocano ilo_Latn
75
+ Indonesian ind_Latn
76
+ Icelandic isl_Latn
77
+ Italian ita_Latn
78
+ Javanese jav_Latn
79
+ Japanese jpn_Jpan
80
+ Kabyle kab_Latn
81
+ Jingpho kac_Latn
82
+ Kamba kam_Latn
83
+ Kannada kan_Knda
84
+ Kashmiri_(Arabic script) kas_Arab
85
+ Kashmiri_(Devanagari script) kas_Deva
86
+ Georgian kat_Geor
87
+ Central_Kanuri_(Arabic script) knc_Arab
88
+ Central_Kanuri_(Latin script) knc_Latn
89
+ Kazakh kaz_Cyrl
90
+ Kabiyè kbp_Latn
91
+ Kabuverdianu kea_Latn
92
+ Khmer khm_Khmr
93
+ Kikuyu kik_Latn
94
+ Kinyarwanda kin_Latn
95
+ Kyrgyz kir_Cyrl
96
+ Kimbundu kmb_Latn
97
+ Northern_Kurdish kmr_Latn
98
+ Kikongo kon_Latn
99
+ Korean kor_Hang
100
+ Lao lao_Laoo
101
+ Ligurian lij_Latn
102
+ Limburgish lim_Latn
103
+ Lingala lin_Latn
104
+ Lithuanian lit_Latn
105
+ Lombard lmo_Latn
106
+ Latgalian ltg_Latn
107
+ Luxembourgish ltz_Latn
108
+ Luba-Kasai lua_Latn
109
+ Ganda lug_Latn
110
+ Luo luo_Latn
111
+ Mizo lus_Latn
112
+ Standard-Latvian lvs_Latn
113
+ Magahi mag_Deva
114
+ Maithili mai_Deva
115
+ Malayalam mal_Mlym
116
+ Marathi mar_Deva
117
+ Minangkabau-(Arabic script) min_Arab
118
+ Minangkabau-(Latin script) min_Latn
119
+ Macedonian mkd_Cyrl
120
+ Plateau-Malagasy plt_Latn
121
+ Maltese mlt_Latn
122
+ Meitei-(Bengali script) mni_Beng
123
+ Halh-Mongolian khk_Cyrl
124
+ Mossi mos_Latn
125
+ Maori mri_Latn
126
+ Burmese mya_Mymr
127
+ Dutch nld_Latn
128
+ Norwegian-Nynorsk nno_Latn
129
+ Norwegian-Bokmål nob_Latn
130
+ Nepali npi_Deva
131
+ Northern-Sotho nso_Latn
132
+ Nuer nus_Latn
133
+ Nyanja nya_Latn
134
+ Occitan oci_Latn
135
+ West-Central-Oromo gaz_Latn
136
+ Odia ory_Orya
137
+ Pangasinan pag_Latn
138
+ Eastern-Panjabi pan_Guru
139
+ Papiamento pap_Latn
140
+ Western-Persian pes_Arab
141
+ Polish pol_Latn
142
+ Portuguese por_Latn
143
+ Dari prs_Arab
144
+ Southern-Pashto pbt_Arab
145
+ Ayacucho-Quechua quy_Latn
146
+ Romanian ron_Latn
147
+ Rundi run_Latn
148
+ Russian rus_Cyrl
149
+ Sango sag_Latn
150
+ Sanskrit san_Deva
151
+ Santali sat_Olck
152
+ Sicilian scn_Latn
153
+ Shan shn_Mymr
154
+ Sinhala sin_Sinh
155
+ Slovak slk_Latn
156
+ Slovenian slv_Latn
157
+ Samoan smo_Latn
158
+ Shona sna_Latn
159
+ Sindhi snd_Arab
160
+ Somali som_Latn
161
+ Southern-Sotho sot_Latn
162
+ Spanish spa_Latn
163
+ Tosk-Albanian als_Latn
164
+ Sardinian srd_Latn
165
+ Serbian srp_Cyrl
166
+ Swati ssw_Latn
167
+ Sundanese sun_Latn
168
+ Swedish swe_Latn
169
+ Swahili swh_Latn
170
+ Silesian szl_Latn
171
+ Tamil tam_Taml
172
+ Tatar tat_Cyrl
173
+ Telugu tel_Telu
174
+ Tajik tgk_Cyrl
175
+ Tagalog tgl_Latn
176
+ Thai tha_Thai
177
+ Tigrinya tir_Ethi
178
+ Tamasheq-(Latin script) taq_Latn
179
+ Tamasheq-(Tifinagh script) taq_Tfng
180
+ Tok-Pisin tpi_Latn
181
+ Tswana tsn_Latn
182
+ Tsonga tso_Latn
183
+ Turkmen tuk_Latn
184
+ Tumbuka tum_Latn
185
+ Turkish tur_Latn
186
+ Twi twi_Latn
187
+ Central-Atlas-Tamazight tzm_Tfng
188
+ Uyghur uig_Arab
189
+ Ukrainian ukr_Cyrl
190
+ Umbundu umb_Latn
191
+ Urdu urd_Arab
192
+ Northern-Uzbek uzn_Latn
193
+ Venetian vec_Latn
194
+ Vietnamese vie_Latn
195
+ Waray war_Latn
196
+ Wolof wol_Latn
197
+ Xhosa xho_Latn
198
+ Eastern-Yiddish ydd_Hebr
199
+ Yoruba yor_Latn
200
+ Yue Chinese yue_Hant
201
+ Chinese-(Simplified) zho_Hans
202
+ Chinese-(Traditional) zho_Hant
203
+ Standard-Malay zsm_Latn
204
+ Zulu zul_Latn
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ huggingface_hub
2
+ ctranslate2
3
+ sentencepiece
4
+ accelerate
5
+ bitsandbytes
6
+ transformers
7
+ requests
8
+ pypi-json
9
+ pytz
10
+ fasttext
11
+ gradio
12
+ fastapi
13
+ uvicorn[standard]==0.17.*
static/index.html ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <meta charset="utf-8">
5
+ <meta name="viewport" content="width=device-width">
6
+ <title>Sema Translator</title>
7
+ <link rel="stylesheet" href="static/style.css" />
8
+ <py-script src="modules/flores200_codes.py"></py-script>
9
+ </head>
10
+ <body>
11
+ <div class="Header">
12
+ <h1>Sema Translator</h1>
13
+ <p>Unlock the Power of Global Communication with Sema Translator! Seamlessly bridging language barriers. With support for over 200 languages, Sema Translator opens up a realm of possibilities for building truly global applications.</p>
14
+
15
+ </div>
16
+
17
+ <div class="LanguageDropdown" style="text-align:center">
18
+ <h2>Select a Language:</h2>
19
+ <select id="languageSelect">
20
+ {"".join(f"<option value='{code}'>{lang} - {code}</option>" for lang, code in flores_codes.items())}
21
+ </select>
22
+ </div>
23
+
24
+ <div class="instructions">
25
+ <p>Use the following python code to access the api endpoint</p>
26
+ <pre style="text-align: left;">
27
+ import requests
28
+
29
+ url = "{public_url}/translate/"
30
+ data = {
31
+ "userinput": "rũcinĩ rwega, niwokĩra wega?",
32
+ "target_lang": "eng_Latn",
33
+ }
34
+
35
+ response = requests.post(url, json=data)
36
+ result = response.json()
37
+
38
+ print(result)
39
+
40
+ source_language = result['source_language']
41
+ print("Source Language:", source_language)
42
+
43
+ translation = result['translated_text']
44
+ print("Translated text:", translation)
45
+ </pre>
46
+ </div>
47
+
48
+ <div class="footer">
49
+ <h1>Created by Lewis Kamau Kiamru</h1>
50
+ </div>
51
+ </body>
52
+ </html>
static/style.css ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .Header {
2
+ text-align: center;
3
+ }
4
+ .LanguageDropdown {
5
+ text-align: center;
6
+ }
7
+ .instructions {
8
+ background-color: #056e33;
9
+ padding: 20px;
10
+ }
11
+ .footer {
12
+ text-align: center;
13
+ }