Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- .gitattributes +2 -9
- main.py +3 -0
- modules/STT & TTS/all_langs.tsv +1198 -0
- modules/app.py +171 -0
- modules/flores200_codes.py +211 -0
- modules/lid218e.bin +3 -0
- modules/sematrans-1.2B/config.json +8 -0
- modules/sematrans-1.2B/model.bin +3 -0
- modules/sematrans-1.2B/shared_vocabulary.txt +0 -0
- modules/sematrans-3.3B/config.json +8 -0
- modules/sematrans-3.3B/model.bin +3 -0
- modules/sematrans-3.3B/shared_vocabulary.txt +0 -0
- modules/spm.model +3 -0
- modules/translator_all_langs.tsv +204 -0
- requirements.txt +13 -0
- static/index.html +52 -0
- static/style.css +13 -0
.gitattributes
CHANGED
@@ -1,35 +1,28 @@
|
|
1 |
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
*.bin filter=lfs diff=lfs merge=lfs -text
|
|
|
4 |
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
-
*.
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
1 |
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bin.* filter=lfs diff=lfs merge=lfs -text
|
5 |
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
|
|
6 |
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
|
|
11 |
*.model filter=lfs diff=lfs merge=lfs -text
|
12 |
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
13 |
*.onnx filter=lfs diff=lfs merge=lfs -text
|
14 |
*.ot filter=lfs diff=lfs merge=lfs -text
|
15 |
*.parquet filter=lfs diff=lfs merge=lfs -text
|
16 |
*.pb filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
17 |
*.pt filter=lfs diff=lfs merge=lfs -text
|
18 |
*.pth filter=lfs diff=lfs merge=lfs -text
|
19 |
*.rar filter=lfs diff=lfs merge=lfs -text
|
|
|
20 |
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
21 |
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
|
|
22 |
*.tflite filter=lfs diff=lfs merge=lfs -text
|
23 |
*.tgz filter=lfs diff=lfs merge=lfs -text
|
24 |
*.wasm filter=lfs diff=lfs merge=lfs -text
|
25 |
*.xz filter=lfs diff=lfs merge=lfs -text
|
26 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.zstandard filter=lfs diff=lfs merge=lfs -text
|
28 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
main.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
import subprocess
|
2 |
+
|
3 |
+
subprocess.run("uvicorn modules.app:app --host 0.0.0.0 --port 7860", shell=True)
|
modules/STT & TTS/all_langs.tsv
ADDED
@@ -0,0 +1,1198 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
abi Abidji
|
2 |
+
abk Abkhaz
|
3 |
+
abp Ayta, Abellen
|
4 |
+
aca Achagua
|
5 |
+
acd Gikyode
|
6 |
+
ace Aceh
|
7 |
+
acf Lesser Antillean French Creole
|
8 |
+
ach Acholi
|
9 |
+
acn Achang
|
10 |
+
acr Achi
|
11 |
+
acu Achuar-Shiwiar
|
12 |
+
ade Adele
|
13 |
+
adh Jopadhola
|
14 |
+
adj Adioukrou
|
15 |
+
adx Tibetan, Amdo
|
16 |
+
aeu Akeu
|
17 |
+
afr Afrikaans
|
18 |
+
agd Agarabi
|
19 |
+
agg Angor
|
20 |
+
agn Agutaynen
|
21 |
+
agr Awajún
|
22 |
+
agu Awakateko
|
23 |
+
agx Aghul
|
24 |
+
aha Ahanta
|
25 |
+
ahk Akha
|
26 |
+
aia Arosi
|
27 |
+
aka Akan
|
28 |
+
akb Batak Angkola
|
29 |
+
ake Akawaio
|
30 |
+
akp Siwu
|
31 |
+
alj Alangan
|
32 |
+
alp Alune
|
33 |
+
alt Altai, Southern
|
34 |
+
alz Alur
|
35 |
+
ame Yanesha’
|
36 |
+
amf Hamer-Banna
|
37 |
+
amh Amharic
|
38 |
+
ami Amis
|
39 |
+
amk Ambai
|
40 |
+
ann Obolo
|
41 |
+
any Anyin
|
42 |
+
aoz Uab Meto
|
43 |
+
apb Sa’a
|
44 |
+
apr Arop-Lokep
|
45 |
+
ara Arabic
|
46 |
+
arl Arabela
|
47 |
+
asa Asu
|
48 |
+
asg Cishingini
|
49 |
+
asm Assamese
|
50 |
+
ast Asturian
|
51 |
+
ata Pele-Ata
|
52 |
+
atb Zaiwa
|
53 |
+
atg Ivbie North-Okpela-Arhe
|
54 |
+
ati Attié
|
55 |
+
atq Aralle-Tabulahan
|
56 |
+
ava Avar
|
57 |
+
avn Avatime
|
58 |
+
avu Avokaya
|
59 |
+
awa Awadhi
|
60 |
+
awb Awa
|
61 |
+
ayo Ayoreo
|
62 |
+
ayr Aymara, Central
|
63 |
+
ayz Mai Brat
|
64 |
+
azb Azerbaijani, South
|
65 |
+
azg Amuzgo, San Pedro Amuzgos
|
66 |
+
azj-script_cyrillic Azerbaijani, North
|
67 |
+
azj-script_latin Azerbaijani, North
|
68 |
+
azz Nahuatl, Highland Puebla
|
69 |
+
bak Bashkort
|
70 |
+
bam Bamanankan
|
71 |
+
ban Bali
|
72 |
+
bao Waimaha
|
73 |
+
bas Basaa
|
74 |
+
bav Vengo
|
75 |
+
bba Baatonum
|
76 |
+
bbb Barai
|
77 |
+
bbc Batak Toba
|
78 |
+
bbo Konabéré
|
79 |
+
bcc-script_arabic Balochi, Southern
|
80 |
+
bcc-script_latin Balochi, Southern
|
81 |
+
bcl Bikol, Central
|
82 |
+
bcw Bana
|
83 |
+
bdg Bonggi
|
84 |
+
bdh Baka
|
85 |
+
bdq Bahnar
|
86 |
+
bdu Oroko
|
87 |
+
bdv Bodo Parja
|
88 |
+
beh Biali
|
89 |
+
bel Belarusian
|
90 |
+
bem Bemba
|
91 |
+
ben Bengali
|
92 |
+
bep Behoa
|
93 |
+
bex Jur Modo
|
94 |
+
bfa Bari
|
95 |
+
bfo Birifor, Malba
|
96 |
+
bfy Bagheli
|
97 |
+
bfz Pahari, Mahasu
|
98 |
+
bgc Haryanvi
|
99 |
+
bgq Bagri
|
100 |
+
bgr Chin, Bawm
|
101 |
+
bgt Bughotu
|
102 |
+
bgw Bhatri
|
103 |
+
bha Bharia
|
104 |
+
bht Bhattiyali
|
105 |
+
bhz Bada
|
106 |
+
bib Bisa
|
107 |
+
bim Bimoba
|
108 |
+
bis Bislama
|
109 |
+
biv Birifor, Southern
|
110 |
+
bjr Binumarien
|
111 |
+
bjv Bedjond
|
112 |
+
bjw Bakwé
|
113 |
+
bjz Baruga
|
114 |
+
bkd Binukid
|
115 |
+
bkv Bekwarra
|
116 |
+
blh Kuwaa
|
117 |
+
blt Tai Dam
|
118 |
+
blx Ayta, Mag-Indi
|
119 |
+
blz Balantak
|
120 |
+
bmq Bomu
|
121 |
+
bmr Muinane
|
122 |
+
bmu Somba-Siawari
|
123 |
+
bmv Bum
|
124 |
+
bng Benga
|
125 |
+
bno Bantoanon
|
126 |
+
bnp Bola
|
127 |
+
boa Bora
|
128 |
+
bod Tibetan, Central
|
129 |
+
boj Anjam
|
130 |
+
bom Berom
|
131 |
+
bor Borôro
|
132 |
+
bos Bosnian
|
133 |
+
bov Tuwuli
|
134 |
+
box Buamu
|
135 |
+
bpr Blaan, Koronadal
|
136 |
+
bps Blaan, Sarangani
|
137 |
+
bqc Boko
|
138 |
+
bqi Bakhtiâri
|
139 |
+
bqj Bandial
|
140 |
+
bqp Bisã
|
141 |
+
bre Breton
|
142 |
+
bru Bru, Eastern
|
143 |
+
bsc Oniyan
|
144 |
+
bsq Bassa
|
145 |
+
bss Akoose
|
146 |
+
btd Batak Dairi
|
147 |
+
bts Batak Simalungun
|
148 |
+
btt Bete-Bendi
|
149 |
+
btx Batak Karo
|
150 |
+
bud Ntcham
|
151 |
+
bul Bulgarian
|
152 |
+
bus Bokobaru
|
153 |
+
bvc Baelelea
|
154 |
+
bvz Bauzi
|
155 |
+
bwq Bobo Madaré, Southern
|
156 |
+
bwu Buli
|
157 |
+
byr Yipma
|
158 |
+
bzh Buang, Mapos
|
159 |
+
bzi Bisu
|
160 |
+
bzj Belize English Creole
|
161 |
+
caa Ch’orti’
|
162 |
+
cab Garifuna
|
163 |
+
cac-dialect_sanmateoixtatan Chuj
|
164 |
+
cac-dialect_sansebastiancoatan Chuj
|
165 |
+
cak-dialect_central Kaqchikel
|
166 |
+
cak-dialect_santamariadejesus Kaqchikel
|
167 |
+
cak-dialect_santodomingoxenacoj Kaqchikel
|
168 |
+
cak-dialect_southcentral Kaqchikel
|
169 |
+
cak-dialect_western Kaqchikel
|
170 |
+
cak-dialect_yepocapa Kaqchikel
|
171 |
+
cap Chipaya
|
172 |
+
car Carib
|
173 |
+
cas Tsimané
|
174 |
+
cat Catalan
|
175 |
+
cax Chiquitano
|
176 |
+
cbc Carapana
|
177 |
+
cbi Chachi
|
178 |
+
cbr Kakataibo-Kashibo
|
179 |
+
cbs Kashinawa
|
180 |
+
cbt Shawi
|
181 |
+
cbu Kandozi-Chapra
|
182 |
+
cbv Cacua
|
183 |
+
cce Chopi
|
184 |
+
cco Chinantec, Comaltepec
|
185 |
+
cdj Churahi
|
186 |
+
ceb Cebuano
|
187 |
+
ceg Chamacoco
|
188 |
+
cek Chin, Eastern Khumi
|
189 |
+
ces Czech
|
190 |
+
cfm Chin, Falam
|
191 |
+
cgc Kagayanen
|
192 |
+
che Chechen
|
193 |
+
chf Chontal, Tabasco
|
194 |
+
chv Chuvash
|
195 |
+
chz Chinantec, Ozumacín
|
196 |
+
cjo Ashéninka, Pajonal
|
197 |
+
cjp Cabécar
|
198 |
+
cjs Shor
|
199 |
+
ckb Kurdish, Central
|
200 |
+
cko Anufo
|
201 |
+
ckt Chukchi
|
202 |
+
cla Ron
|
203 |
+
cle Chinantec, Lealao
|
204 |
+
cly Chatino, Eastern Highland
|
205 |
+
cme Cerma
|
206 |
+
cmn-script_simplified Chinese, Mandarin
|
207 |
+
cmo-script_khmer Mnong, Central
|
208 |
+
cmo-script_latin Mnong, Central
|
209 |
+
cmr Mro-Khimi
|
210 |
+
cnh Chin, Hakha
|
211 |
+
cni Asháninka
|
212 |
+
cnl Chinantec, Lalana
|
213 |
+
cnt Chinantec, Tepetotutla
|
214 |
+
coe Koreguaje
|
215 |
+
cof Tsafiki
|
216 |
+
cok Cora, Santa Teresa
|
217 |
+
con Cofán
|
218 |
+
cot Caquinte
|
219 |
+
cou Wamey
|
220 |
+
cpa Chinantec, Palantla
|
221 |
+
cpb Ashéninka, Ucayali-Yurúa
|
222 |
+
cpu Ashéninka, Pichis
|
223 |
+
crh Crimean Tatar
|
224 |
+
crk-script_latin Cree, Plains
|
225 |
+
crk-script_syllabics Cree, Plains
|
226 |
+
crn Cora, El Nayar
|
227 |
+
crq Chorote, Iyo’wujwa
|
228 |
+
crs Seychelles French Creole
|
229 |
+
crt Chorote, Iyojwa’ja
|
230 |
+
csk Jola-Kasa
|
231 |
+
cso Chinantec, Sochiapam
|
232 |
+
ctd Chin, Tedim
|
233 |
+
ctg Chittagonian
|
234 |
+
cto Embera Catío
|
235 |
+
ctu Chol
|
236 |
+
cuc Chinantec, Usila
|
237 |
+
cui Cuiba
|
238 |
+
cuk Kuna, San Blas
|
239 |
+
cul Kulina
|
240 |
+
cwa Kabwa
|
241 |
+
cwe Kwere
|
242 |
+
cwt Kuwaataay
|
243 |
+
cya Chatino, Nopala
|
244 |
+
cym Welsh
|
245 |
+
daa Dangaléat
|
246 |
+
dah Gwahatike
|
247 |
+
dan Danish
|
248 |
+
dar Dargwa
|
249 |
+
dbj Ida’an
|
250 |
+
dbq Daba
|
251 |
+
ddn Dendi
|
252 |
+
ded Dedua
|
253 |
+
des Desano
|
254 |
+
deu German, Standard
|
255 |
+
dga Dagaare, Southern
|
256 |
+
dgi Dagara, Northern
|
257 |
+
dgk Dagba
|
258 |
+
dgo Dogri
|
259 |
+
dgr Tlicho
|
260 |
+
dhi Dhimal
|
261 |
+
did Didinga
|
262 |
+
dig Chidigo
|
263 |
+
dik Dinka, Southwestern
|
264 |
+
dip Dinka, Northeastern
|
265 |
+
div Maldivian
|
266 |
+
djk Aukan
|
267 |
+
dnj-dialect_blowowest Dan
|
268 |
+
dnj-dialect_gweetaawueast Dan
|
269 |
+
dnt Dani, Mid Grand Valley
|
270 |
+
dnw Dani, Western
|
271 |
+
dop Lukpa
|
272 |
+
dos Dogosé
|
273 |
+
dsh Daasanach
|
274 |
+
dso Desiya
|
275 |
+
dtp Kadazan Dusun
|
276 |
+
dts Dogon, Toro So
|
277 |
+
dug Chiduruma
|
278 |
+
dwr Dawro
|
279 |
+
dyi Sénoufo, Djimini
|
280 |
+
dyo Jola-Fonyi
|
281 |
+
dyu Jula
|
282 |
+
dzo Dzongkha
|
283 |
+
eip Lik
|
284 |
+
eka Ekajuk
|
285 |
+
ell Greek
|
286 |
+
emp Emberá, Northern
|
287 |
+
enb Markweeta
|
288 |
+
eng English
|
289 |
+
enx Enxet
|
290 |
+
epo Esperanto
|
291 |
+
ese Ese Ejja
|
292 |
+
ess Yupik, Saint Lawrence Island
|
293 |
+
est Estonian
|
294 |
+
eus Basque
|
295 |
+
evn Evenki
|
296 |
+
ewe Éwé
|
297 |
+
eza Ezaa
|
298 |
+
fal Fali, South
|
299 |
+
fao Faroese
|
300 |
+
far Fataleka
|
301 |
+
fas Persian
|
302 |
+
fij Fijian
|
303 |
+
fin Finnish
|
304 |
+
flr Fuliiru
|
305 |
+
fmu Muria, Far Western
|
306 |
+
fon Fon
|
307 |
+
fra French
|
308 |
+
frd Fordata
|
309 |
+
fry Frisian
|
310 |
+
ful Fulah
|
311 |
+
gag-script_cyrillic Gagauz
|
312 |
+
gag-script_latin Gagauz
|
313 |
+
gai Mbore
|
314 |
+
gam Kandawo
|
315 |
+
gau Gadaba, Mudhili
|
316 |
+
gbi Galela
|
317 |
+
gbk Gaddi
|
318 |
+
gbm Garhwali
|
319 |
+
gbo Grebo, Northern
|
320 |
+
gde Gude
|
321 |
+
geb Kire
|
322 |
+
gej Gen
|
323 |
+
gil Kiribati
|
324 |
+
gjn Gonja
|
325 |
+
gkn Gokana
|
326 |
+
gld Nanai
|
327 |
+
gle Irish
|
328 |
+
glg Galician
|
329 |
+
glk Gilaki
|
330 |
+
gmv Gamo
|
331 |
+
gna Kaansa
|
332 |
+
gnd Zulgo-Gemzek
|
333 |
+
gng Ngangam
|
334 |
+
gof-script_latin Gofa
|
335 |
+
gog Gogo
|
336 |
+
gor Gorontalo
|
337 |
+
gqr Gor
|
338 |
+
grc Greek, Ancient
|
339 |
+
gri Ghari
|
340 |
+
grn Guarani
|
341 |
+
grt Garo
|
342 |
+
gso Gbaya, Southwest
|
343 |
+
gub Guajajára
|
344 |
+
guc Wayuu
|
345 |
+
gud Dida, Yocoboué
|
346 |
+
guh Guahibo
|
347 |
+
guj Gujarati
|
348 |
+
guk Gumuz
|
349 |
+
gum Misak
|
350 |
+
guo Guayabero
|
351 |
+
guq Aché
|
352 |
+
guu Yanomamö
|
353 |
+
gux Gourmanchéma
|
354 |
+
gvc Wanano
|
355 |
+
gvl Gulay
|
356 |
+
gwi Gwich’in
|
357 |
+
gwr Gwere
|
358 |
+
gym Ngäbere
|
359 |
+
gyr Guarayu
|
360 |
+
had Hatam
|
361 |
+
hag Hanga
|
362 |
+
hak Chinese, Hakka
|
363 |
+
hap Hupla
|
364 |
+
hat Haitian Creole
|
365 |
+
hau Hausa
|
366 |
+
hay Haya
|
367 |
+
heb Hebrew
|
368 |
+
heh Hehe
|
369 |
+
hif Hindi, Fiji
|
370 |
+
hig Kamwe
|
371 |
+
hil Hiligaynon
|
372 |
+
hin Hindi
|
373 |
+
hlb Halbi
|
374 |
+
hlt Chin, Matu
|
375 |
+
hne Chhattisgarhi
|
376 |
+
hnn Hanunoo
|
377 |
+
hns Hindustani, Sarnami
|
378 |
+
hoc Ho
|
379 |
+
hoy Holiya
|
380 |
+
hrv Croatian
|
381 |
+
hsb Sorbian, Upper
|
382 |
+
hto Witoto, Minika
|
383 |
+
hub Wampís
|
384 |
+
hui Huli
|
385 |
+
hun Hungarian
|
386 |
+
hus-dialect_centralveracruz Huastec
|
387 |
+
hus-dialect_westernpotosino Huastec
|
388 |
+
huu Witoto, Murui
|
389 |
+
huv Huave, San Mateo del Mar
|
390 |
+
hvn Hawu
|
391 |
+
hwc Hawaii Pidgin
|
392 |
+
hye Armenian
|
393 |
+
hyw Armenian, Western
|
394 |
+
iba Iban
|
395 |
+
ibo Igbo
|
396 |
+
icr Islander English Creole
|
397 |
+
idd Ede Idaca
|
398 |
+
ifa Ifugao, Amganad
|
399 |
+
ifb Ifugao, Batad
|
400 |
+
ife Ifè
|
401 |
+
ifk Ifugao, Tuwali
|
402 |
+
ifu Ifugao, Mayoyao
|
403 |
+
ify Kallahan, Keley-i
|
404 |
+
ign Ignaciano
|
405 |
+
ikk Ika
|
406 |
+
ilb Ila
|
407 |
+
ilo Ilocano
|
408 |
+
imo Imbongu
|
409 |
+
ina Interlingua (International Auxiliary Language Association)
|
410 |
+
inb Inga
|
411 |
+
ind Indonesian
|
412 |
+
iou Tuma-Irumu
|
413 |
+
ipi Ipili
|
414 |
+
iqw Ikwo
|
415 |
+
iri Rigwe
|
416 |
+
irk Iraqw
|
417 |
+
isl Icelandic
|
418 |
+
ita Italian
|
419 |
+
itl Itelmen
|
420 |
+
itv Itawit
|
421 |
+
ixl-dialect_sangasparchajul Ixil
|
422 |
+
ixl-dialect_sanjuancotzal Ixil
|
423 |
+
ixl-dialect_santamarianebaj Ixil
|
424 |
+
izr Izere
|
425 |
+
izz Izii
|
426 |
+
jac Jakalteko
|
427 |
+
jam Jamaican English Creole
|
428 |
+
jav Javanese
|
429 |
+
jbu Jukun Takum
|
430 |
+
jen Dza
|
431 |
+
jic Tol
|
432 |
+
jiv Shuar
|
433 |
+
jmc Machame
|
434 |
+
jmd Yamdena
|
435 |
+
jpn Japanese
|
436 |
+
jun Juang
|
437 |
+
juy Juray
|
438 |
+
jvn Javanese, Suriname
|
439 |
+
kaa Karakalpak
|
440 |
+
kab Amazigh
|
441 |
+
kac Jingpho
|
442 |
+
kak Kalanguya
|
443 |
+
kam Kamba
|
444 |
+
kan Kannada
|
445 |
+
kao Xaasongaxango
|
446 |
+
kaq Capanahua
|
447 |
+
kat Georgian
|
448 |
+
kay Kamayurá
|
449 |
+
kaz Kazakh
|
450 |
+
kbo Keliko
|
451 |
+
kbp Kabiyè
|
452 |
+
kbq Kamano
|
453 |
+
kbr Kafa
|
454 |
+
kby Kanuri, Manga
|
455 |
+
kca Khanty
|
456 |
+
kcg Tyap
|
457 |
+
kdc Kutu
|
458 |
+
kde Makonde
|
459 |
+
kdh Tem
|
460 |
+
kdi Kumam
|
461 |
+
kdj Ng’akarimojong
|
462 |
+
kdl Tsikimba
|
463 |
+
kdn Kunda
|
464 |
+
kdt Kuay
|
465 |
+
kea Kabuverdianu
|
466 |
+
kek Q’eqchi’
|
467 |
+
ken Kenyang
|
468 |
+
keo Kakwa
|
469 |
+
ker Kera
|
470 |
+
key Kupia
|
471 |
+
kez Kukele
|
472 |
+
kfb Kolami, Northwestern
|
473 |
+
kff-script_telugu Koya
|
474 |
+
kfw Naga, Kharam
|
475 |
+
kfx Pahari, Kullu
|
476 |
+
khg Tibetan, Khams
|
477 |
+
khm Khmer
|
478 |
+
khq Songhay, Koyra Chiini
|
479 |
+
kia Kim
|
480 |
+
kij Kilivila
|
481 |
+
kik Gikuyu
|
482 |
+
kin Kinyarwanda
|
483 |
+
kir Kyrgyz
|
484 |
+
kjb Q’anjob’al
|
485 |
+
kje Kisar
|
486 |
+
kjg Khmu
|
487 |
+
kjh Khakas
|
488 |
+
kki Kagulu
|
489 |
+
kkj Kako
|
490 |
+
kle Kulung
|
491 |
+
klu Klao
|
492 |
+
klv Maskelynes
|
493 |
+
klw Tado
|
494 |
+
kma Konni
|
495 |
+
kmd Kalinga, Majukayang
|
496 |
+
kml Kalinga, Tanudan
|
497 |
+
kmr-script_arabic Kurdish, Northern
|
498 |
+
kmr-script_cyrillic Kurdish, Northern
|
499 |
+
kmr-script_latin Kurdish, Northern
|
500 |
+
kmu Kanite
|
501 |
+
knb Kalinga, Lubuagan
|
502 |
+
kne Kankanaey
|
503 |
+
knf Mankanya
|
504 |
+
knj Akateko
|
505 |
+
knk Kuranko
|
506 |
+
kno Kono
|
507 |
+
kog Kogi
|
508 |
+
kor Korean
|
509 |
+
kpq Korupun-Sela
|
510 |
+
kps Tehit
|
511 |
+
kpv Komi-Zyrian
|
512 |
+
kpy Koryak
|
513 |
+
kpz Kupsapiiny
|
514 |
+
kqe Kalagan
|
515 |
+
kqp Kimré
|
516 |
+
kqr Kimaragang
|
517 |
+
kqy Koorete
|
518 |
+
krc Karachay-Balkar
|
519 |
+
kri Krio
|
520 |
+
krj Kinaray-a
|
521 |
+
krl Karelian
|
522 |
+
krr Krung
|
523 |
+
krs Gbaya
|
524 |
+
kru Kurux
|
525 |
+
ksb Shambala
|
526 |
+
ksr Borong
|
527 |
+
kss Kisi, Southern
|
528 |
+
ktb Kambaata
|
529 |
+
ktj Krumen, Plapo
|
530 |
+
kub Kutep
|
531 |
+
kue Kuman
|
532 |
+
kum Kumyk
|
533 |
+
kus Kusaal
|
534 |
+
kvn Kuna, Border
|
535 |
+
kvw Wersing
|
536 |
+
kwd Kwaio
|
537 |
+
kwf Kwara’ae
|
538 |
+
kwi Awa-Cuaiquer
|
539 |
+
kxc Konso
|
540 |
+
kxf Kawyaw
|
541 |
+
kxm Khmer, Northern
|
542 |
+
kxv Kuvi
|
543 |
+
kyb Kalinga, Butbut
|
544 |
+
kyc Kyaka
|
545 |
+
kyf Kouya
|
546 |
+
kyg Keyagana
|
547 |
+
kyo Klon
|
548 |
+
kyq Kenga
|
549 |
+
kyu Kayah, Western
|
550 |
+
kyz Kayabí
|
551 |
+
kzf Kaili, Da’a
|
552 |
+
lac Lacandon
|
553 |
+
laj Lango
|
554 |
+
lam Lamba
|
555 |
+
lao Lao
|
556 |
+
las Lama
|
557 |
+
lat Latin
|
558 |
+
lav Latvian
|
559 |
+
law Lauje
|
560 |
+
lbj Ladakhi
|
561 |
+
lbw Tolaki
|
562 |
+
lcp Lawa, Western
|
563 |
+
lee Lyélé
|
564 |
+
lef Lelemi
|
565 |
+
lem Nomaande
|
566 |
+
lew Kaili, Ledo
|
567 |
+
lex Luang
|
568 |
+
lgg Lugbara
|
569 |
+
lgl Wala
|
570 |
+
lhu Lahu
|
571 |
+
lia Limba, West-Central
|
572 |
+
lid Nyindrou
|
573 |
+
lif Limbu
|
574 |
+
lin Lingala
|
575 |
+
lip Sekpele
|
576 |
+
lis Lisu
|
577 |
+
lit Lithuanian
|
578 |
+
lje Rampi
|
579 |
+
ljp Lampung Api
|
580 |
+
llg Lole
|
581 |
+
lln Lele
|
582 |
+
lme Pévé
|
583 |
+
lnd Lundayeh
|
584 |
+
lns Lamnso’
|
585 |
+
lob Lobi
|
586 |
+
lok Loko
|
587 |
+
lom Loma
|
588 |
+
lon Lomwe, Malawi
|
589 |
+
loq Lobala
|
590 |
+
lsi Lacid
|
591 |
+
lsm Saamya-Gwe
|
592 |
+
ltz Luxembourgish
|
593 |
+
luc Aringa
|
594 |
+
lug Ganda
|
595 |
+
luo Dholuo
|
596 |
+
lwo Luwo
|
597 |
+
lww Lewo
|
598 |
+
lzz Laz
|
599 |
+
maa-dialect_sanantonio Mazatec, San Jerónimo Tecóatl
|
600 |
+
maa-dialect_sanjeronimo Mazatec, San Jerónimo Tecóatl
|
601 |
+
mad Madura
|
602 |
+
mag Magahi
|
603 |
+
mah Marshallese
|
604 |
+
mai Maithili
|
605 |
+
maj Mazatec, Jalapa de Díaz
|
606 |
+
mak Makasar
|
607 |
+
mal Malayalam
|
608 |
+
mam-dialect_central Mam
|
609 |
+
mam-dialect_northern Mam
|
610 |
+
mam-dialect_southern Mam
|
611 |
+
mam-dialect_western Mam
|
612 |
+
maq Mazatec, Chiquihuitlán
|
613 |
+
mar Marathi
|
614 |
+
maw Mampruli
|
615 |
+
maz Mazahua, Central
|
616 |
+
mbb Manobo, Western Bukidnon
|
617 |
+
mbc Macushi
|
618 |
+
mbh Mangseng
|
619 |
+
mbj Nadëb
|
620 |
+
mbt Manobo, Matigsalug
|
621 |
+
mbu Mbula-Bwazza
|
622 |
+
mbz Mixtec, Amoltepec
|
623 |
+
mca Maka
|
624 |
+
mcb Matsigenka
|
625 |
+
mcd Sharanahua
|
626 |
+
mco Mixe, Coatlán
|
627 |
+
mcp Makaa
|
628 |
+
mcq Ese
|
629 |
+
mcu Mambila, Cameroon
|
630 |
+
mda Mada
|
631 |
+
mdf Moksha
|
632 |
+
mdv Mixtec, Santa Lucía Monteverde
|
633 |
+
mdy Male
|
634 |
+
med Melpa
|
635 |
+
mee Mengen
|
636 |
+
mej Meyah
|
637 |
+
men Mende
|
638 |
+
meq Merey
|
639 |
+
met Mato
|
640 |
+
mev Maan
|
641 |
+
mfe Morisyen
|
642 |
+
mfh Matal
|
643 |
+
mfi Wandala
|
644 |
+
mfk Mofu, North
|
645 |
+
mfq Moba
|
646 |
+
mfy Mayo
|
647 |
+
mfz Mabaan
|
648 |
+
mgd Moru
|
649 |
+
mge Mango
|
650 |
+
mgh Makhuwa-Meetto
|
651 |
+
mgo Meta’
|
652 |
+
mhi Ma’di
|
653 |
+
mhr Mari, Meadow
|
654 |
+
mhu Digaro-Mishmi
|
655 |
+
mhx Lhao Vo
|
656 |
+
mhy Ma’anyan
|
657 |
+
mib Mixtec, Atatlahuca
|
658 |
+
mie Mixtec, Ocotepec
|
659 |
+
mif Mofu-Gudur
|
660 |
+
mih Mixtec, Chayuco
|
661 |
+
mil Mixtec, Peñoles
|
662 |
+
mim Mixtec, Alacatlatzala
|
663 |
+
min Minangkabau
|
664 |
+
mio Mixtec, Pinotepa Nacional
|
665 |
+
mip Mixtec, Apasco-Apoala
|
666 |
+
miq Mískito
|
667 |
+
mit Mixtec, Southern Puebla
|
668 |
+
miy Mixtec, Ayutla
|
669 |
+
miz Mixtec, Coatzospan
|
670 |
+
mjl Mandeali
|
671 |
+
mjv Mannan
|
672 |
+
mkd Macedonian
|
673 |
+
mkl Mokole
|
674 |
+
mkn Malay, Kupang
|
675 |
+
mlg Malagasy
|
676 |
+
mlt Maltese
|
677 |
+
mmg Ambrym, North
|
678 |
+
mnb Muna
|
679 |
+
mnf Mundani
|
680 |
+
mnk Mandinka
|
681 |
+
mnw Mon
|
682 |
+
mnx Sougb
|
683 |
+
moa Mwan
|
684 |
+
mog Mongondow
|
685 |
+
mon Mongolian
|
686 |
+
mop Maya, Mopán
|
687 |
+
mor Moro
|
688 |
+
mos Mòoré
|
689 |
+
mox Molima
|
690 |
+
moz Mukulu
|
691 |
+
mpg Marba
|
692 |
+
mpm Mixtec, Yosondúa
|
693 |
+
mpp Migabac
|
694 |
+
mpx Misima-Panaeati
|
695 |
+
mqb Mbuko
|
696 |
+
mqf Momuna
|
697 |
+
mqj Mamasa
|
698 |
+
mqn Moronene
|
699 |
+
mri Maori
|
700 |
+
mrw Maranao
|
701 |
+
msy Aruamu
|
702 |
+
mtd Mualang
|
703 |
+
mtj Moskona
|
704 |
+
mto Mixe, Totontepec
|
705 |
+
muh Mündü
|
706 |
+
mup Malvi
|
707 |
+
mur Murle
|
708 |
+
muv Muthuvan
|
709 |
+
muy Muyang
|
710 |
+
mvp Duri
|
711 |
+
mwq Chin, Müün
|
712 |
+
mwv Mentawai
|
713 |
+
mxb Mixtec, Tezoatlán
|
714 |
+
mxq Mixe, Juquila
|
715 |
+
mxt Mixtec, Jamiltepec
|
716 |
+
mxv Mixtec, Metlatónoc
|
717 |
+
mya Burmese
|
718 |
+
myb Mbay
|
719 |
+
myk Sénoufo, Mamara
|
720 |
+
myl Moma
|
721 |
+
myv Erzya
|
722 |
+
myx Masaaba
|
723 |
+
myy Macuna
|
724 |
+
mza Mixtec, Santa María Zacatepec
|
725 |
+
mzi Mazatec, Ixcatlán
|
726 |
+
mzj Manya
|
727 |
+
mzk Mambila, Nigeria
|
728 |
+
mzm Mumuye
|
729 |
+
mzw Deg
|
730 |
+
nab Nambikuára, Southern
|
731 |
+
nag Nagamese
|
732 |
+
nan Chinese, Min Nan
|
733 |
+
nas Naasioi
|
734 |
+
naw Nawuri
|
735 |
+
nca Iyo
|
736 |
+
nch Nahuatl, Central Huasteca
|
737 |
+
ncj Nahuatl, Northern Puebla
|
738 |
+
ncl Nahuatl, Michoacán
|
739 |
+
ncu Chumburung
|
740 |
+
ndj Ndamba
|
741 |
+
ndp Kebu
|
742 |
+
ndv Ndut
|
743 |
+
ndy Lutos
|
744 |
+
ndz Ndogo
|
745 |
+
neb Toura
|
746 |
+
new Newar
|
747 |
+
nfa Dhao
|
748 |
+
nfr Nafaanra
|
749 |
+
nga Ngbaka
|
750 |
+
ngl Lomwe
|
751 |
+
ngp Ngulu
|
752 |
+
ngu Nahuatl, Guerrero
|
753 |
+
nhe Nahuatl, Eastern Huasteca
|
754 |
+
nhi Nahuatl, Zacatlán-Ahuacatlán-Tepetzintla
|
755 |
+
nhu Noone
|
756 |
+
nhw Nahuatl, Western Huasteca
|
757 |
+
nhx Nahuatl, Isthmus-Mecayapan
|
758 |
+
nhy Nahuatl, Northern Oaxaca
|
759 |
+
nia Nias
|
760 |
+
nij Ngaju
|
761 |
+
nim Nilamba
|
762 |
+
nin Ninzo
|
763 |
+
nko Nkonya
|
764 |
+
nlc Nalca
|
765 |
+
nld Dutch
|
766 |
+
nlg Gela
|
767 |
+
nlk Yali, Ninia
|
768 |
+
nmz Nawdm
|
769 |
+
nnb Nande
|
770 |
+
nno Norwegian Nynorsk
|
771 |
+
nnq Ngindo
|
772 |
+
nnw Nuni, Southern
|
773 |
+
noa Woun Meu
|
774 |
+
nob Norwegian Bokmål
|
775 |
+
nod Thai, Northern
|
776 |
+
nog Nogai
|
777 |
+
not Nomatsigenga
|
778 |
+
npi Nepali
|
779 |
+
npl Nahuatl, Southeastern Puebla
|
780 |
+
npy Napu
|
781 |
+
nso Sotho, Northern
|
782 |
+
nst Naga, Tangshang
|
783 |
+
nsu Nahuatl, Sierra Negra
|
784 |
+
ntm Nateni
|
785 |
+
ntr Delo
|
786 |
+
nuj Nyole
|
787 |
+
nus Nuer
|
788 |
+
nuz Nahuatl, Tlamacazapa
|
789 |
+
nwb Nyabwa
|
790 |
+
nxq Naxi
|
791 |
+
nya Chichewa
|
792 |
+
nyf Kigiryama
|
793 |
+
nyn Nyankore
|
794 |
+
nyo Nyoro
|
795 |
+
nyy Nyakyusa-Ngonde
|
796 |
+
nzi Nzema
|
797 |
+
obo Manobo, Obo
|
798 |
+
oci Occitan
|
799 |
+
ojb-script_latin Ojibwa, Northwestern
|
800 |
+
ojb-script_syllabics Ojibwa, Northwestern
|
801 |
+
oku Oku
|
802 |
+
old Mochi
|
803 |
+
omw Tairora, South
|
804 |
+
onb Lingao
|
805 |
+
ood Tohono O’odham
|
806 |
+
orm Oromo
|
807 |
+
ory Odia
|
808 |
+
oss Ossetic
|
809 |
+
ote Otomi, Mezquital
|
810 |
+
otq Otomi, Querétaro
|
811 |
+
ozm Koonzime
|
812 |
+
pab Parecís
|
813 |
+
pad Paumarí
|
814 |
+
pag Pangasinan
|
815 |
+
pam Kapampangan
|
816 |
+
pan Punjabi, Eastern
|
817 |
+
pao Paiute, Northern
|
818 |
+
pap Papiamentu
|
819 |
+
pau Palauan
|
820 |
+
pbb Nasa
|
821 |
+
pbc Patamona
|
822 |
+
pbi Parkwa
|
823 |
+
pce Palaung, Ruching
|
824 |
+
pcm Pidgin, Nigerian
|
825 |
+
peg Pengo
|
826 |
+
pez Penan, Eastern
|
827 |
+
pib Yine
|
828 |
+
pil Yom
|
829 |
+
pir Piratapuyo
|
830 |
+
pis Pijin
|
831 |
+
pjt Pitjantjatjara
|
832 |
+
pkb Kipfokomo
|
833 |
+
pls Popoloca, San Marcos Tlacoyalco
|
834 |
+
plw Palawano, Brooke’s Point
|
835 |
+
pmf Pamona
|
836 |
+
pny Pinyin
|
837 |
+
poh-dialect_eastern Poqomchi’
|
838 |
+
poh-dialect_western Poqomchi’
|
839 |
+
poi Popoluca, Highland
|
840 |
+
pol Polish
|
841 |
+
por Portuguese
|
842 |
+
poy Pogolo
|
843 |
+
ppk Uma
|
844 |
+
pps Popoloca, San Luís Temalacayuca
|
845 |
+
prf Paranan
|
846 |
+
prk Wa, Parauk
|
847 |
+
prt Prai
|
848 |
+
pse Malay, Central
|
849 |
+
pss Kaulong
|
850 |
+
ptu Bambam
|
851 |
+
pui Puinave
|
852 |
+
pus Pushto
|
853 |
+
pwg Gapapaiwa
|
854 |
+
pww Karen, Pwo Northern
|
855 |
+
pxm Mixe, Quetzaltepec
|
856 |
+
qub Quechua, Huallaga
|
857 |
+
quc-dialect_central K’iche’
|
858 |
+
quc-dialect_east K’iche’
|
859 |
+
quc-dialect_north K’iche’
|
860 |
+
quf Quechua, Lambayeque
|
861 |
+
quh Quechua, South Bolivian
|
862 |
+
qul Quechua, North Bolivian
|
863 |
+
quw Quichua, Tena Lowland
|
864 |
+
quy Quechua, Ayacucho
|
865 |
+
quz Quechua, Cusco
|
866 |
+
qvc Quechua, Cajamarca
|
867 |
+
qve Quechua, Eastern Apurímac
|
868 |
+
qvh Quechua, Huamalíes-Dos de Mayo Huánuco
|
869 |
+
qvm Quechua, Margos-Yarowilca-Lauricocha
|
870 |
+
qvn Quechua, North Junín
|
871 |
+
qvo Quichua, Napo
|
872 |
+
qvs Quechua, San Martín
|
873 |
+
qvw Quechua, Huaylla Wanca
|
874 |
+
qvz Quichua, Northern Pastaza
|
875 |
+
qwh Quechua, Huaylas Ancash
|
876 |
+
qxh Quechua, Panao
|
877 |
+
qxl Quichua, Salasaca Highland
|
878 |
+
qxn Quechua, Northern Conchucos Ancash
|
879 |
+
qxo Quechua, Southern Conchucos
|
880 |
+
qxr Quichua, Cañar Highland
|
881 |
+
rah Rabha
|
882 |
+
rai Ramoaaina
|
883 |
+
rap Rapa Nui
|
884 |
+
rav Sampang
|
885 |
+
raw Rawang
|
886 |
+
rej Rejang
|
887 |
+
rel Rendille
|
888 |
+
rgu Rikou
|
889 |
+
rhg Rohingya
|
890 |
+
rif-script_arabic Tarifit
|
891 |
+
rif-script_latin Tarifit
|
892 |
+
ril Riang Lang
|
893 |
+
rim Nyaturu
|
894 |
+
rjs Rajbanshi
|
895 |
+
rkt Rangpuri
|
896 |
+
rmc-script_cyrillic Romani, Carpathian
|
897 |
+
rmc-script_latin Romani, Carpathian
|
898 |
+
rmo Romani, Sinte
|
899 |
+
rmy-script_cyrillic Romani, Vlax
|
900 |
+
rmy-script_latin Romani, Vlax
|
901 |
+
rng Ronga
|
902 |
+
rnl Ranglong
|
903 |
+
roh-dialect_sursilv Romansh
|
904 |
+
roh-dialect_vallader Romansh
|
905 |
+
rol Romblomanon
|
906 |
+
ron Romanian
|
907 |
+
rop Kriol
|
908 |
+
rro Waima
|
909 |
+
rub Gungu
|
910 |
+
ruf Luguru
|
911 |
+
rug Roviana
|
912 |
+
run Rundi
|
913 |
+
rus Russian
|
914 |
+
sab Buglere
|
915 |
+
sag Sango
|
916 |
+
sah Yakut
|
917 |
+
saj Sahu
|
918 |
+
saq Samburu
|
919 |
+
sas Sasak
|
920 |
+
sat Santhali
|
921 |
+
sba Ngambay
|
922 |
+
sbd Samo, Southern
|
923 |
+
sbl Sambal, Botolan
|
924 |
+
sbp Sangu
|
925 |
+
sch Sakachep
|
926 |
+
sck Sadri
|
927 |
+
sda Toraja-Sa’dan
|
928 |
+
sea Semai
|
929 |
+
seh Sena
|
930 |
+
ses Songhay, Koyraboro Senni
|
931 |
+
sey Paicoca
|
932 |
+
sgb Ayta, Mag-antsi
|
933 |
+
sgj Surgujia
|
934 |
+
sgw Sebat Bet Gurage
|
935 |
+
shi Tachelhit
|
936 |
+
shk Shilluk
|
937 |
+
shn Shan
|
938 |
+
sho Shanga
|
939 |
+
shp Shipibo-Conibo
|
940 |
+
sid Sidamo
|
941 |
+
sig Paasaal
|
942 |
+
sil Sisaala, Tumulung
|
943 |
+
sja Epena
|
944 |
+
sjm Mapun
|
945 |
+
sld Sissala
|
946 |
+
slk Slovak
|
947 |
+
slu Selaru
|
948 |
+
slv Slovene
|
949 |
+
sml Sama, Central
|
950 |
+
smo Samoan
|
951 |
+
sna Shona
|
952 |
+
snd Sindhi
|
953 |
+
sne Bidayuh, Bau
|
954 |
+
snn Siona
|
955 |
+
snp Siane
|
956 |
+
snw Selee
|
957 |
+
som Somali
|
958 |
+
soy Miyobe
|
959 |
+
spa Spanish
|
960 |
+
spp Sénoufo, Supyire
|
961 |
+
spy Sabaot
|
962 |
+
sqi Albanian
|
963 |
+
sri Siriano
|
964 |
+
srm Saramaccan
|
965 |
+
srn Sranan Tongo
|
966 |
+
srp-script_cyrillic Serbian
|
967 |
+
srp-script_latin Serbian
|
968 |
+
srx Sirmauri
|
969 |
+
stn Owa
|
970 |
+
stp Tepehuan, Southeastern
|
971 |
+
suc Subanon, Western
|
972 |
+
suk Sukuma
|
973 |
+
sun Sunda
|
974 |
+
sur Mwaghavul
|
975 |
+
sus Susu
|
976 |
+
suv Puroik
|
977 |
+
suz Sunwar
|
978 |
+
swe Swedish
|
979 |
+
swh Swahili
|
980 |
+
sxb Suba
|
981 |
+
sxn Sangir
|
982 |
+
sya Siang
|
983 |
+
syl Sylheti
|
984 |
+
sza Semelai
|
985 |
+
tac Tarahumara, Western
|
986 |
+
taj Tamang, Eastern
|
987 |
+
tam Tamil
|
988 |
+
tao Yami
|
989 |
+
tap Taabwa
|
990 |
+
taq Tamasheq
|
991 |
+
tat Tatar
|
992 |
+
tav Tatuyo
|
993 |
+
tbc Takia
|
994 |
+
tbg Tairora, North
|
995 |
+
tbk Tagbanwa, Calamian
|
996 |
+
tbl Tboli
|
997 |
+
tby Tabaru
|
998 |
+
tbz Ditammari
|
999 |
+
tca Ticuna
|
1000 |
+
tcc Datooga
|
1001 |
+
tcs Torres Strait Creole
|
1002 |
+
tcz Chin, Thado
|
1003 |
+
tdj Tajio
|
1004 |
+
ted Krumen, Tepo
|
1005 |
+
tee Tepehua, Huehuetla
|
1006 |
+
tel Telugu
|
1007 |
+
tem Themne
|
1008 |
+
teo Ateso
|
1009 |
+
ter Terêna
|
1010 |
+
tes Tengger
|
1011 |
+
tew Tewa
|
1012 |
+
tex Tennet
|
1013 |
+
tfr Teribe
|
1014 |
+
tgj Tagin
|
1015 |
+
tgk Tajik
|
1016 |
+
tgl Tagalog
|
1017 |
+
tgo Sudest
|
1018 |
+
tgp Tangoa
|
1019 |
+
tha Thai
|
1020 |
+
thk Kitharaka
|
1021 |
+
thl Tharu, Dangaura
|
1022 |
+
tih Murut, Timugon
|
1023 |
+
tik Tikar
|
1024 |
+
tir Tigrigna
|
1025 |
+
tkr Tsakhur
|
1026 |
+
tlb Tobelo
|
1027 |
+
tlj Talinga-Bwisi
|
1028 |
+
tly Talysh
|
1029 |
+
tmc Tumak
|
1030 |
+
tmf Toba-Maskoy
|
1031 |
+
tna Tacana
|
1032 |
+
tng Tobanga
|
1033 |
+
tnk Kwamera
|
1034 |
+
tnn Tanna, North
|
1035 |
+
tnp Whitesands
|
1036 |
+
tnr Ménik
|
1037 |
+
tnt Tontemboan
|
1038 |
+
tob Toba
|
1039 |
+
toc Totonac, Coyutla
|
1040 |
+
toh Tonga
|
1041 |
+
tom Tombulu
|
1042 |
+
tos Totonac, Highland
|
1043 |
+
tpi Tok Pisin
|
1044 |
+
tpm Tampulma
|
1045 |
+
tpp Tepehua, Pisaflores
|
1046 |
+
tpt Tepehua, Tlachichilco
|
1047 |
+
trc Triqui, Copala
|
1048 |
+
tri Trió
|
1049 |
+
trn Trinitario
|
1050 |
+
trs Triqui, Chicahuaxtla
|
1051 |
+
tso Tsonga
|
1052 |
+
tsz Purepecha
|
1053 |
+
ttc Tektiteko
|
1054 |
+
tte Bwanabwana
|
1055 |
+
ttq-script_tifinagh Tamajaq, Tawallammat
|
1056 |
+
tue Tuyuca
|
1057 |
+
tuf Tunebo, Central
|
1058 |
+
tuk-script_arabic Turkmen
|
1059 |
+
tuk-script_latin Turkmen
|
1060 |
+
tuo Tucano
|
1061 |
+
tur Turkish
|
1062 |
+
tvw Sedoa
|
1063 |
+
twb Tawbuid
|
1064 |
+
twe Teiwa
|
1065 |
+
twu Termanu
|
1066 |
+
txa Tombonuo
|
1067 |
+
txq Tii
|
1068 |
+
txu Kayapó
|
1069 |
+
tye Kyanga
|
1070 |
+
tzh-dialect_bachajon Tzeltal
|
1071 |
+
tzh-dialect_tenejapa Tzeltal
|
1072 |
+
tzj-dialect_eastern Tz’utujil
|
1073 |
+
tzj-dialect_western Tz’utujil
|
1074 |
+
tzo-dialect_chamula Tzotzil
|
1075 |
+
tzo-dialect_chenalho Tzotzil
|
1076 |
+
ubl Bikol, Buhi’non
|
1077 |
+
ubu Umbu-Ungu
|
1078 |
+
udm Udmurt
|
1079 |
+
udu Uduk
|
1080 |
+
uig-script_arabic Uyghur
|
1081 |
+
uig-script_cyrillic Uyghur
|
1082 |
+
ukr Ukrainian
|
1083 |
+
umb Umbundu
|
1084 |
+
unr Mundari
|
1085 |
+
upv Uripiv-Wala-Rano-Atchin
|
1086 |
+
ura Urarina
|
1087 |
+
urb Kaapor
|
1088 |
+
urd-script_arabic Urdu
|
1089 |
+
urd-script_devanagari Urdu
|
1090 |
+
urd-script_latin Urdu
|
1091 |
+
urk Urak Lawoi’
|
1092 |
+
urt Urat
|
1093 |
+
ury Orya
|
1094 |
+
usp Uspanteko
|
1095 |
+
uzb-script_cyrillic Uzbek
|
1096 |
+
uzb-script_latin Uzbek
|
1097 |
+
vag Vagla
|
1098 |
+
vid Vidunda
|
1099 |
+
vie Vietnamese
|
1100 |
+
vif Vili
|
1101 |
+
vmw Makhuwa
|
1102 |
+
vmy Mazatec, Ayautla
|
1103 |
+
vot Vod
|
1104 |
+
vun Vunjo
|
1105 |
+
vut Vute
|
1106 |
+
wal-script_ethiopic Wolaytta
|
1107 |
+
wal-script_latin Wolaytta
|
1108 |
+
wap Wapishana
|
1109 |
+
war Waray-Waray
|
1110 |
+
waw Waiwai
|
1111 |
+
way Wayana
|
1112 |
+
wba Warao
|
1113 |
+
wlo Wolio
|
1114 |
+
wlx Wali
|
1115 |
+
wmw Mwani
|
1116 |
+
wob Wè Northern
|
1117 |
+
wol Wolof
|
1118 |
+
wsg Gondi, Adilabad
|
1119 |
+
wwa Waama
|
1120 |
+
xal Kalmyk-Oirat
|
1121 |
+
xdy Malayic Dayak
|
1122 |
+
xed Hdi
|
1123 |
+
xer Xerénte
|
1124 |
+
xho Xhosa
|
1125 |
+
xmm Malay, Manado
|
1126 |
+
xnj Chingoni
|
1127 |
+
xnr Kangri
|
1128 |
+
xog Soga
|
1129 |
+
xon Konkomba
|
1130 |
+
xrb Karaboro, Eastern
|
1131 |
+
xsb Sambal
|
1132 |
+
xsm Kasem
|
1133 |
+
xsr Sherpa
|
1134 |
+
xsu Sanumá
|
1135 |
+
xta Mixtec, Alcozauca
|
1136 |
+
xtd Mixtec, Diuxi-Tilantongo
|
1137 |
+
xte Ketengban
|
1138 |
+
xtm Mixtec, Magdalena Peñasco
|
1139 |
+
xtn Mixtec, Northern Tlaxiaco
|
1140 |
+
xua Kurumba, Alu
|
1141 |
+
xuo Kuo
|
1142 |
+
yaa Yaminahua
|
1143 |
+
yad Yagua
|
1144 |
+
yal Yalunka
|
1145 |
+
yam Yamba
|
1146 |
+
yao Yao
|
1147 |
+
yas Nugunu
|
1148 |
+
yat Yambeta
|
1149 |
+
yaz Lokaa
|
1150 |
+
yba Yala
|
1151 |
+
ybb Yemba
|
1152 |
+
ycl Lolopo
|
1153 |
+
ycn Yucuna
|
1154 |
+
yea Ravula
|
1155 |
+
yka Yakan
|
1156 |
+
yli Yali, Angguruk
|
1157 |
+
yor Yoruba
|
1158 |
+
yre Yaouré
|
1159 |
+
yua Maya, Yucatec
|
1160 |
+
yue-script_traditional Chinese, Yue
|
1161 |
+
yuz Yuracare
|
1162 |
+
yva Yawa
|
1163 |
+
zaa Zapotec, Sierra de Juárez
|
1164 |
+
zab Zapotec, Western Tlacolula Valley
|
1165 |
+
zac Zapotec, Ocotlán
|
1166 |
+
zad Zapotec, Cajonos
|
1167 |
+
zae Zapotec, Yareni
|
1168 |
+
zai Zapotec, Isthmus
|
1169 |
+
zam Zapotec, Miahuatlán
|
1170 |
+
zao Zapotec, Ozolotepec
|
1171 |
+
zaq Zapotec, Aloápam
|
1172 |
+
zar Zapotec, Rincón
|
1173 |
+
zas Zapotec, Santo Domingo Albarradas
|
1174 |
+
zav Zapotec, Yatzachi
|
1175 |
+
zaw Zapotec, Mitla
|
1176 |
+
zca Zapotec, Coatecas Altas
|
1177 |
+
zga Kinga
|
1178 |
+
zim Mesme
|
1179 |
+
ziw Zigula
|
1180 |
+
zlm Malay
|
1181 |
+
zmz Mbandja
|
1182 |
+
zne Zande
|
1183 |
+
zos Zoque, Francisco León
|
1184 |
+
zpc Zapotec, Choapan
|
1185 |
+
zpg Zapotec, Guevea de Humboldt
|
1186 |
+
zpi Zapotec, Santa María Quiegolani
|
1187 |
+
zpl Zapotec, Lachixío
|
1188 |
+
zpm Zapotec, Mixtepec
|
1189 |
+
zpo Zapotec, Amatlán
|
1190 |
+
zpt Zapotec, San Vicente Coatlán
|
1191 |
+
zpu Zapotec, Yalálag
|
1192 |
+
zpz Zapotec, Texmelucan
|
1193 |
+
ztq Zapotec, Quioquitani-Quierí
|
1194 |
+
zty Zapotec, Yatee
|
1195 |
+
zul Zulu
|
1196 |
+
zyb Zhuang, Yongbei
|
1197 |
+
zyp Chin, Zyphe
|
1198 |
+
zza Zaza
|
modules/app.py
ADDED
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
'''
|
2 |
+
Created By Lewis Kamau Kimaru
|
3 |
+
Sema translator api backend
|
4 |
+
January 2024
|
5 |
+
Docker deployment
|
6 |
+
'''
|
7 |
+
|
8 |
+
from fastapi import FastAPI, HTTPException, Request
|
9 |
+
from fastapi.middleware.cors import CORSMiddleware
|
10 |
+
from fastapi.responses import HTMLResponse
|
11 |
+
import gradio as gr
|
12 |
+
import ctranslate2
|
13 |
+
import sentencepiece as spm
|
14 |
+
import fasttext
|
15 |
+
import uvicorn
|
16 |
+
import pytz
|
17 |
+
from datetime import datetime
|
18 |
+
import os
|
19 |
+
|
20 |
+
app = FastAPI()
|
21 |
+
|
22 |
+
fasttext.FastText.eprint = lambda x: None
|
23 |
+
|
24 |
+
# Get time of request
|
25 |
+
|
26 |
+
def get_time():
|
27 |
+
nairobi_timezone = pytz.timezone('Africa/Nairobi')
|
28 |
+
current_time_nairobi = datetime.now(nairobi_timezone)
|
29 |
+
|
30 |
+
curr_day = current_time_nairobi.strftime('%A')
|
31 |
+
curr_date = current_time_nairobi.strftime('%Y-%m-%d')
|
32 |
+
curr_time = current_time_nairobi.strftime('%H:%M:%S')
|
33 |
+
|
34 |
+
full_date = f"{curr_day} | {curr_date} | {curr_time}"
|
35 |
+
return full_date, curr_time
|
36 |
+
|
37 |
+
# Load the model and tokenizer ..... only once!
|
38 |
+
beam_size = 1 # change to a smaller value for faster inference
|
39 |
+
device = "cpu" # or "cuda"
|
40 |
+
|
41 |
+
# Language Prediction model
|
42 |
+
print("\nimporting Language Prediction model")
|
43 |
+
lang_model_file = "lid218e.bin"
|
44 |
+
lang_model_full_path = os.path.join(os.path.dirname(__file__), lang_model_file)
|
45 |
+
lang_model = fasttext.load_model(lang_model_full_path)
|
46 |
+
|
47 |
+
|
48 |
+
# Load the source SentencePiece model
|
49 |
+
print("\nimporting SentencePiece model")
|
50 |
+
sp_model_file = "spm.model"
|
51 |
+
sp_model_full_path = os.path.join(os.path.dirname(__file__), sp_model_file)
|
52 |
+
sp = spm.SentencePieceProcessor()
|
53 |
+
sp.load(sp_model_full_path)
|
54 |
+
|
55 |
+
# Import The Translator model
|
56 |
+
print("\nimporting Translator model")
|
57 |
+
ct_model_file = "sematrans-3.3B"
|
58 |
+
ct_model_full_path = os.path.join(os.path.dirname(__file__), ct_model_file)
|
59 |
+
translator = ctranslate2.Translator(ct_model_full_path, device)
|
60 |
+
|
61 |
+
print('\nDone importing models\n')
|
62 |
+
|
63 |
+
|
64 |
+
def translate_detect(userinput: str, target_lang: str):
|
65 |
+
source_sents = [userinput]
|
66 |
+
source_sents = [sent.strip() for sent in source_sents]
|
67 |
+
target_prefix = [[target_lang]] * len(source_sents)
|
68 |
+
|
69 |
+
# Predict the source language
|
70 |
+
predictions = lang_model.predict(source_sents[0], k=1)
|
71 |
+
source_lang = predictions[0][0].replace('__label__', '')
|
72 |
+
|
73 |
+
# Subword the source sentences
|
74 |
+
source_sents_subworded = sp.encode(source_sents, out_type=str)
|
75 |
+
source_sents_subworded = [[source_lang] + sent + ["</s>"] for sent in source_sents_subworded]
|
76 |
+
|
77 |
+
# Translate the source sentences
|
78 |
+
translations = translator.translate_batch(
|
79 |
+
source_sents_subworded,
|
80 |
+
batch_type="tokens",
|
81 |
+
max_batch_size=2024,
|
82 |
+
beam_size=beam_size,
|
83 |
+
target_prefix=target_prefix,
|
84 |
+
)
|
85 |
+
translations = [translation[0]['tokens'] for translation in translations]
|
86 |
+
|
87 |
+
# Desubword the target sentences
|
88 |
+
translations_desubword = sp.decode(translations)
|
89 |
+
translations_desubword = [sent[len(target_lang):] for sent in translations_desubword]
|
90 |
+
|
91 |
+
# Return the source language and the translated text
|
92 |
+
return source_lang, translations_desubword
|
93 |
+
|
94 |
+
def translate_enter(userinput: str, source_lang: str, target_lang: str):
|
95 |
+
source_sents = [userinput]
|
96 |
+
source_sents = [sent.strip() for sent in source_sents]
|
97 |
+
target_prefix = [[target_lang]] * len(source_sents)
|
98 |
+
|
99 |
+
# Subword the source sentences
|
100 |
+
source_sents_subworded = sp.encode(source_sents, out_type=str)
|
101 |
+
source_sents_subworded = [[source_lang] + sent + ["</s>"] for sent in source_sents_subworded]
|
102 |
+
|
103 |
+
# Translate the source sentences
|
104 |
+
translations = translator.translate_batch(source_sents_subworded, batch_type="tokens", max_batch_size=2024, beam_size=beam_size, target_prefix=target_prefix)
|
105 |
+
translations = [translation[0]['tokens'] for translation in translations]
|
106 |
+
|
107 |
+
# Desubword the target sentences
|
108 |
+
translations_desubword = sp.decode(translations)
|
109 |
+
translations_desubword = [sent[len(target_lang):] for sent in translations_desubword]
|
110 |
+
|
111 |
+
# Return the source language and the translated text
|
112 |
+
return translations_desubword[0]
|
113 |
+
|
114 |
+
|
115 |
+
@app.get("/")
|
116 |
+
async def read_root():
|
117 |
+
gradio_interface = """
|
118 |
+
<html>
|
119 |
+
<meta name="viewport" content="width=device-width, height=device-height, initial-scale=1.0">
|
120 |
+
<head>
|
121 |
+
<title>Sema</title>
|
122 |
+
</head>
|
123 |
+
<frameset>
|
124 |
+
<frame src=https://kamau1-semaapi-frontend.hf.space/?embedded=true'>
|
125 |
+
</frameset>
|
126 |
+
</html>
|
127 |
+
"""
|
128 |
+
return HTMLResponse(content=gradio_interface)
|
129 |
+
|
130 |
+
|
131 |
+
@app.post("/translate_detect/")
|
132 |
+
async def translate_detect_endpoint(request: Request):
|
133 |
+
datad = await request.json()
|
134 |
+
userinputd = datad.get("userinput")
|
135 |
+
target_langd = datad.get("target_lang")
|
136 |
+
dfull_date = get_time()[0]
|
137 |
+
print(f"\nrequest: {dfull_date}\nTarget Language; {target_langd}, User Input: {userinputd}\n")
|
138 |
+
|
139 |
+
if not userinputd or not target_langd:
|
140 |
+
raise HTTPException(status_code=422, detail="Both 'userinput' and 'target_lang' are required.")
|
141 |
+
|
142 |
+
source_langd, translated_text_d = translate_detect(userinputd, target_langd)
|
143 |
+
dcurrent_time = get_time()[1]
|
144 |
+
print(f"\nresponse: {dcurrent_time}; ... Source_language: {source_langd}, Translated Text: {translated_text_d}\n\n")
|
145 |
+
return {
|
146 |
+
"source_language": source_langd,
|
147 |
+
"translated_text": translated_text_d[0],
|
148 |
+
}
|
149 |
+
|
150 |
+
|
151 |
+
@app.post("/translate_enter/")
|
152 |
+
async def translate_enter_endpoint(request: Request):
|
153 |
+
datae = await request.json()
|
154 |
+
userinpute = datae.get("userinput")
|
155 |
+
source_lange = datae.get("source_lang")
|
156 |
+
target_lange = datae.get("target_lang")
|
157 |
+
efull_date = get_time()[0]
|
158 |
+
print(f"\nrequest: {efull_date}\nSource_language; {source_lange}, Target Language; {target_lange}, User Input: {userinpute}\n")
|
159 |
+
|
160 |
+
if not userinpute or not target_lange:
|
161 |
+
raise HTTPException(status_code=422, detail="'userinput' 'sourc_lang'and 'target_lang' are required.")
|
162 |
+
|
163 |
+
translated_text_e = translate_enter(userinpute, source_lange, target_lange)
|
164 |
+
ecurrent_time = get_time()[1]
|
165 |
+
print(f"\nresponse: {ecurrent_time}; ... Translated Text: {translated_text_e}\n\n")
|
166 |
+
return {
|
167 |
+
"translated_text": translated_text_e,
|
168 |
+
}
|
169 |
+
|
170 |
+
|
171 |
+
print("\nAPI starting .......\n")
|
modules/flores200_codes.py
ADDED
@@ -0,0 +1,211 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
codes_as_string = '''Acehnese (Arabic script) ace_Arab
|
2 |
+
Acehnese (Latin script) ace_Latn
|
3 |
+
Mesopotamian Arabic acm_Arab
|
4 |
+
Ta’izzi-Adeni Arabic acq_Arab
|
5 |
+
Tunisian Arabic aeb_Arab
|
6 |
+
Afrikaans afr_Latn
|
7 |
+
South Levantine Arabic ajp_Arab
|
8 |
+
Akan aka_Latn
|
9 |
+
Amharic amh_Ethi
|
10 |
+
North Levantine Arabic apc_Arab
|
11 |
+
Modern Standard Arabic arb_Arab
|
12 |
+
Modern Standard Arabic (Romanized) arb_Latn
|
13 |
+
Najdi Arabic ars_Arab
|
14 |
+
Moroccan Arabic ary_Arab
|
15 |
+
Egyptian Arabic arz_Arab
|
16 |
+
Assamese asm_Beng
|
17 |
+
Asturian ast_Latn
|
18 |
+
Awadhi awa_Deva
|
19 |
+
Central Aymara ayr_Latn
|
20 |
+
South Azerbaijani azb_Arab
|
21 |
+
North Azerbaijani azj_Latn
|
22 |
+
Bashkir bak_Cyrl
|
23 |
+
Bambara bam_Latn
|
24 |
+
Balinese ban_Latn
|
25 |
+
Belarusian bel_Cyrl
|
26 |
+
Bemba bem_Latn
|
27 |
+
Bengali ben_Beng
|
28 |
+
Bhojpuri bho_Deva
|
29 |
+
Banjar (Arabic script) bjn_Arab
|
30 |
+
Banjar (Latin script) bjn_Latn
|
31 |
+
Standard Tibetan bod_Tibt
|
32 |
+
Bosnian bos_Latn
|
33 |
+
Buginese bug_Latn
|
34 |
+
Bulgarian bul_Cyrl
|
35 |
+
Catalan cat_Latn
|
36 |
+
Cebuano ceb_Latn
|
37 |
+
Czech ces_Latn
|
38 |
+
Chokwe cjk_Latn
|
39 |
+
Central Kurdish ckb_Arab
|
40 |
+
Crimean Tatar crh_Latn
|
41 |
+
Welsh cym_Latn
|
42 |
+
Danish dan_Latn
|
43 |
+
German deu_Latn
|
44 |
+
Southwestern Dinka dik_Latn
|
45 |
+
Dyula dyu_Latn
|
46 |
+
Dzongkha dzo_Tibt
|
47 |
+
Greek ell_Grek
|
48 |
+
English eng_Latn
|
49 |
+
Esperanto epo_Latn
|
50 |
+
Estonian est_Latn
|
51 |
+
Basque eus_Latn
|
52 |
+
Ewe ewe_Latn
|
53 |
+
Faroese fao_Latn
|
54 |
+
Fijian fij_Latn
|
55 |
+
Finnish fin_Latn
|
56 |
+
Fon fon_Latn
|
57 |
+
French fra_Latn
|
58 |
+
Friulian fur_Latn
|
59 |
+
Nigerian Fulfulde fuv_Latn
|
60 |
+
Scottish Gaelic gla_Latn
|
61 |
+
Irish gle_Latn
|
62 |
+
Galician glg_Latn
|
63 |
+
Guarani grn_Latn
|
64 |
+
Gujarati guj_Gujr
|
65 |
+
Haitian Creole hat_Latn
|
66 |
+
Hausa hau_Latn
|
67 |
+
Hebrew heb_Hebr
|
68 |
+
Hindi hin_Deva
|
69 |
+
Chhattisgarhi hne_Deva
|
70 |
+
Croatian hrv_Latn
|
71 |
+
Hungarian hun_Latn
|
72 |
+
Armenian hye_Armn
|
73 |
+
Igbo ibo_Latn
|
74 |
+
Ilocano ilo_Latn
|
75 |
+
Indonesian ind_Latn
|
76 |
+
Icelandic isl_Latn
|
77 |
+
Italian ita_Latn
|
78 |
+
Javanese jav_Latn
|
79 |
+
Japanese jpn_Jpan
|
80 |
+
Kabyle kab_Latn
|
81 |
+
Jingpho kac_Latn
|
82 |
+
Kamba kam_Latn
|
83 |
+
Kannada kan_Knda
|
84 |
+
Kashmiri (Arabic script) kas_Arab
|
85 |
+
Kashmiri (Devanagari script) kas_Deva
|
86 |
+
Georgian kat_Geor
|
87 |
+
Central Kanuri (Arabic script) knc_Arab
|
88 |
+
Central Kanuri (Latin script) knc_Latn
|
89 |
+
Kazakh kaz_Cyrl
|
90 |
+
Kabiyè kbp_Latn
|
91 |
+
Kabuverdianu kea_Latn
|
92 |
+
Khmer khm_Khmr
|
93 |
+
Kikuyu kik_Latn
|
94 |
+
Kinyarwanda kin_Latn
|
95 |
+
Kyrgyz kir_Cyrl
|
96 |
+
Kimbundu kmb_Latn
|
97 |
+
Northern Kurdish kmr_Latn
|
98 |
+
Kikongo kon_Latn
|
99 |
+
Korean kor_Hang
|
100 |
+
Lao lao_Laoo
|
101 |
+
Ligurian lij_Latn
|
102 |
+
Limburgish lim_Latn
|
103 |
+
Lingala lin_Latn
|
104 |
+
Lithuanian lit_Latn
|
105 |
+
Lombard lmo_Latn
|
106 |
+
Latgalian ltg_Latn
|
107 |
+
Luxembourgish ltz_Latn
|
108 |
+
Luba-Kasai lua_Latn
|
109 |
+
Ganda lug_Latn
|
110 |
+
Luo luo_Latn
|
111 |
+
Mizo lus_Latn
|
112 |
+
Standard Latvian lvs_Latn
|
113 |
+
Magahi mag_Deva
|
114 |
+
Maithili mai_Deva
|
115 |
+
Malayalam mal_Mlym
|
116 |
+
Marathi mar_Deva
|
117 |
+
Minangkabau (Arabic script) min_Arab
|
118 |
+
Minangkabau (Latin script) min_Latn
|
119 |
+
Macedonian mkd_Cyrl
|
120 |
+
Plateau Malagasy plt_Latn
|
121 |
+
Maltese mlt_Latn
|
122 |
+
Meitei (Bengali script) mni_Beng
|
123 |
+
Halh Mongolian khk_Cyrl
|
124 |
+
Mossi mos_Latn
|
125 |
+
Maori mri_Latn
|
126 |
+
Burmese mya_Mymr
|
127 |
+
Dutch nld_Latn
|
128 |
+
Norwegian Nynorsk nno_Latn
|
129 |
+
Norwegian Bokmål nob_Latn
|
130 |
+
Nepali npi_Deva
|
131 |
+
Northern Sotho nso_Latn
|
132 |
+
Nuer nus_Latn
|
133 |
+
Nyanja nya_Latn
|
134 |
+
Occitan oci_Latn
|
135 |
+
West Central Oromo gaz_Latn
|
136 |
+
Odia ory_Orya
|
137 |
+
Pangasinan pag_Latn
|
138 |
+
Eastern Panjabi pan_Guru
|
139 |
+
Papiamento pap_Latn
|
140 |
+
Western Persian pes_Arab
|
141 |
+
Polish pol_Latn
|
142 |
+
Portuguese por_Latn
|
143 |
+
Dari prs_Arab
|
144 |
+
Southern Pashto pbt_Arab
|
145 |
+
Ayacucho Quechua quy_Latn
|
146 |
+
Romanian ron_Latn
|
147 |
+
Rundi run_Latn
|
148 |
+
Russian rus_Cyrl
|
149 |
+
Sango sag_Latn
|
150 |
+
Sanskrit san_Deva
|
151 |
+
Santali sat_Olck
|
152 |
+
Sicilian scn_Latn
|
153 |
+
Shan shn_Mymr
|
154 |
+
Sinhala sin_Sinh
|
155 |
+
Slovak slk_Latn
|
156 |
+
Slovenian slv_Latn
|
157 |
+
Samoan smo_Latn
|
158 |
+
Shona sna_Latn
|
159 |
+
Sindhi snd_Arab
|
160 |
+
Somali som_Latn
|
161 |
+
Southern Sotho sot_Latn
|
162 |
+
Spanish spa_Latn
|
163 |
+
Tosk Albanian als_Latn
|
164 |
+
Sardinian srd_Latn
|
165 |
+
Serbian srp_Cyrl
|
166 |
+
Swati ssw_Latn
|
167 |
+
Sundanese sun_Latn
|
168 |
+
Swedish swe_Latn
|
169 |
+
Swahili swh_Latn
|
170 |
+
Silesian szl_Latn
|
171 |
+
Tamil tam_Taml
|
172 |
+
Tatar tat_Cyrl
|
173 |
+
Telugu tel_Telu
|
174 |
+
Tajik tgk_Cyrl
|
175 |
+
Tagalog tgl_Latn
|
176 |
+
Thai tha_Thai
|
177 |
+
Tigrinya tir_Ethi
|
178 |
+
Tamasheq (Latin script) taq_Latn
|
179 |
+
Tamasheq (Tifinagh script) taq_Tfng
|
180 |
+
Tok Pisin tpi_Latn
|
181 |
+
Tswana tsn_Latn
|
182 |
+
Tsonga tso_Latn
|
183 |
+
Turkmen tuk_Latn
|
184 |
+
Tumbuka tum_Latn
|
185 |
+
Turkish tur_Latn
|
186 |
+
Twi twi_Latn
|
187 |
+
Central Atlas Tamazight tzm_Tfng
|
188 |
+
Uyghur uig_Arab
|
189 |
+
Ukrainian ukr_Cyrl
|
190 |
+
Umbundu umb_Latn
|
191 |
+
Urdu urd_Arab
|
192 |
+
Northern Uzbek uzn_Latn
|
193 |
+
Venetian vec_Latn
|
194 |
+
Vietnamese vie_Latn
|
195 |
+
Waray war_Latn
|
196 |
+
Wolof wol_Latn
|
197 |
+
Xhosa xho_Latn
|
198 |
+
Eastern Yiddish ydd_Hebr
|
199 |
+
Yoruba yor_Latn
|
200 |
+
Yue Chinese yue_Hant
|
201 |
+
Chinese (Simplified) zho_Hans
|
202 |
+
Chinese (Traditional) zho_Hant
|
203 |
+
Standard Malay zsm_Latn
|
204 |
+
Zulu zul_Latn'''
|
205 |
+
|
206 |
+
codes_as_string = codes_as_string.split('\n')
|
207 |
+
|
208 |
+
flores_codes = {}
|
209 |
+
for code in codes_as_string:
|
210 |
+
lang, lang_code = code.split('\t')
|
211 |
+
flores_codes[lang] = lang_code
|
modules/lid218e.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8ded5749a2ad79ae9ab7c9190c7c8b97ff20d54ad8b9527ffa50107238fc7f6a
|
3 |
+
size 1176355829
|
modules/sematrans-1.2B/config.json
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_source_bos": false,
|
3 |
+
"add_source_eos": false,
|
4 |
+
"bos_token": "<s>",
|
5 |
+
"decoder_start_token": "</s>",
|
6 |
+
"eos_token": "</s>",
|
7 |
+
"unk_token": "<unk>"
|
8 |
+
}
|
modules/sematrans-1.2B/model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dd041a40b02d6e3343b174e84ad75c2a058c3a60951dca6bbb4aa9334e7b9ddf
|
3 |
+
size 1381827064
|
modules/sematrans-1.2B/shared_vocabulary.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
modules/sematrans-3.3B/config.json
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_source_bos": false,
|
3 |
+
"add_source_eos": false,
|
4 |
+
"bos_token": "<s>",
|
5 |
+
"decoder_start_token": "</s>",
|
6 |
+
"eos_token": "</s>",
|
7 |
+
"unk_token": "<unk>"
|
8 |
+
}
|
modules/sematrans-3.3B/model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cacb7f6f36b75d73549ab9d99d0bb95270ef65dabea07204adc5648c68274ae8
|
3 |
+
size 3363404280
|
modules/sematrans-3.3B/shared_vocabulary.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
modules/spm.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:14bb8dfb35c0ffdea7bc01e56cea38b9e3d5efcdcb9c251d6b40538e1aab555a
|
3 |
+
size 4852054
|
modules/translator_all_langs.tsv
ADDED
@@ -0,0 +1,204 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Acehnese-(Arabic script) ace_Arab
|
2 |
+
Acehnese-(Latin script) ace_Latn
|
3 |
+
Mesopotamian-(Arabic) acm_Arab
|
4 |
+
Ta’izzi-Adeni-(Arabic) acq_Arab
|
5 |
+
Tunisian-(Arabic) aeb_Arab
|
6 |
+
Afrikaans afr_Latn
|
7 |
+
South_Levantine_Arabic ajp_Arab
|
8 |
+
Akan aka_Latn
|
9 |
+
Amharic amh_Ethi
|
10 |
+
North_Levantine_Arabic apc_Arab
|
11 |
+
Modern_Standard_Arabic arb_Arab
|
12 |
+
Modern_Standard_Arabic_(Romanized) arb_Latn
|
13 |
+
Najdi_Arabic ars_Arab
|
14 |
+
Moroccan_Arabic ary_Arab
|
15 |
+
Egyptian_Arabic arz_Arab
|
16 |
+
Assamese asm_Beng
|
17 |
+
Asturian ast_Latn
|
18 |
+
Awadhi awa_Deva
|
19 |
+
Central_Aymara ayr_Latn
|
20 |
+
South_Azerbaijani azb_Arab
|
21 |
+
North_Azerbaijani azj_Latn
|
22 |
+
Bashkir bak_Cyrl
|
23 |
+
Bambara bam_Latn
|
24 |
+
Balinese ban_Latn
|
25 |
+
Belarusian bel_Cyrl
|
26 |
+
Bemba bem_Latn
|
27 |
+
Bengali ben_Beng
|
28 |
+
Bhojpuri bho_Deva
|
29 |
+
Banjar_(Arabic script) bjn_Arab
|
30 |
+
Banjar_(Latin script) bjn_Latn
|
31 |
+
Standard_Tibetan bod_Tibt
|
32 |
+
Bosnian bos_Latn
|
33 |
+
Buginese bug_Latn
|
34 |
+
Bulgarian bul_Cyrl
|
35 |
+
Catalan cat_Latn
|
36 |
+
Cebuano ceb_Latn
|
37 |
+
Czech ces_Latn
|
38 |
+
Chokwe cjk_Latn
|
39 |
+
Central_Kurdish ckb_Arab
|
40 |
+
Crimean_Tatar crh_Latn
|
41 |
+
Welsh cym_Latn
|
42 |
+
Danish dan_Latn
|
43 |
+
German deu_Latn
|
44 |
+
Southwestern_Dinka dik_Latn
|
45 |
+
Dyula dyu_Latn
|
46 |
+
Dzongkha dzo_Tibt
|
47 |
+
Greek ell_Grek
|
48 |
+
English eng_Latn
|
49 |
+
Esperanto epo_Latn
|
50 |
+
Estonian est_Latn
|
51 |
+
Basque eus_Latn
|
52 |
+
Ewe ewe_Latn
|
53 |
+
Faroese fao_Latn
|
54 |
+
Fijian fij_Latn
|
55 |
+
Finnish fin_Latn
|
56 |
+
Fon fon_Latn
|
57 |
+
French fra_Latn
|
58 |
+
Friulian fur_Latn
|
59 |
+
Nigerian_Fulfulde fuv_Latn
|
60 |
+
Scottish_Gaelic gla_Latn
|
61 |
+
Irish gle_Latn
|
62 |
+
Galician glg_Latn
|
63 |
+
Guarani grn_Latn
|
64 |
+
Gujarati guj_Gujr
|
65 |
+
Haitian_Creole hat_Latn
|
66 |
+
Hausa hau_Latn
|
67 |
+
Hebrew heb_Hebr
|
68 |
+
Hindi hin_Deva
|
69 |
+
Chhattisgarhi hne_Deva
|
70 |
+
Croatian hrv_Latn
|
71 |
+
Hungarian hun_Latn
|
72 |
+
Armenian hye_Armn
|
73 |
+
Igbo ibo_Latn
|
74 |
+
Ilocano ilo_Latn
|
75 |
+
Indonesian ind_Latn
|
76 |
+
Icelandic isl_Latn
|
77 |
+
Italian ita_Latn
|
78 |
+
Javanese jav_Latn
|
79 |
+
Japanese jpn_Jpan
|
80 |
+
Kabyle kab_Latn
|
81 |
+
Jingpho kac_Latn
|
82 |
+
Kamba kam_Latn
|
83 |
+
Kannada kan_Knda
|
84 |
+
Kashmiri_(Arabic script) kas_Arab
|
85 |
+
Kashmiri_(Devanagari script) kas_Deva
|
86 |
+
Georgian kat_Geor
|
87 |
+
Central_Kanuri_(Arabic script) knc_Arab
|
88 |
+
Central_Kanuri_(Latin script) knc_Latn
|
89 |
+
Kazakh kaz_Cyrl
|
90 |
+
Kabiyè kbp_Latn
|
91 |
+
Kabuverdianu kea_Latn
|
92 |
+
Khmer khm_Khmr
|
93 |
+
Kikuyu kik_Latn
|
94 |
+
Kinyarwanda kin_Latn
|
95 |
+
Kyrgyz kir_Cyrl
|
96 |
+
Kimbundu kmb_Latn
|
97 |
+
Northern_Kurdish kmr_Latn
|
98 |
+
Kikongo kon_Latn
|
99 |
+
Korean kor_Hang
|
100 |
+
Lao lao_Laoo
|
101 |
+
Ligurian lij_Latn
|
102 |
+
Limburgish lim_Latn
|
103 |
+
Lingala lin_Latn
|
104 |
+
Lithuanian lit_Latn
|
105 |
+
Lombard lmo_Latn
|
106 |
+
Latgalian ltg_Latn
|
107 |
+
Luxembourgish ltz_Latn
|
108 |
+
Luba-Kasai lua_Latn
|
109 |
+
Ganda lug_Latn
|
110 |
+
Luo luo_Latn
|
111 |
+
Mizo lus_Latn
|
112 |
+
Standard-Latvian lvs_Latn
|
113 |
+
Magahi mag_Deva
|
114 |
+
Maithili mai_Deva
|
115 |
+
Malayalam mal_Mlym
|
116 |
+
Marathi mar_Deva
|
117 |
+
Minangkabau-(Arabic script) min_Arab
|
118 |
+
Minangkabau-(Latin script) min_Latn
|
119 |
+
Macedonian mkd_Cyrl
|
120 |
+
Plateau-Malagasy plt_Latn
|
121 |
+
Maltese mlt_Latn
|
122 |
+
Meitei-(Bengali script) mni_Beng
|
123 |
+
Halh-Mongolian khk_Cyrl
|
124 |
+
Mossi mos_Latn
|
125 |
+
Maori mri_Latn
|
126 |
+
Burmese mya_Mymr
|
127 |
+
Dutch nld_Latn
|
128 |
+
Norwegian-Nynorsk nno_Latn
|
129 |
+
Norwegian-Bokmål nob_Latn
|
130 |
+
Nepali npi_Deva
|
131 |
+
Northern-Sotho nso_Latn
|
132 |
+
Nuer nus_Latn
|
133 |
+
Nyanja nya_Latn
|
134 |
+
Occitan oci_Latn
|
135 |
+
West-Central-Oromo gaz_Latn
|
136 |
+
Odia ory_Orya
|
137 |
+
Pangasinan pag_Latn
|
138 |
+
Eastern-Panjabi pan_Guru
|
139 |
+
Papiamento pap_Latn
|
140 |
+
Western-Persian pes_Arab
|
141 |
+
Polish pol_Latn
|
142 |
+
Portuguese por_Latn
|
143 |
+
Dari prs_Arab
|
144 |
+
Southern-Pashto pbt_Arab
|
145 |
+
Ayacucho-Quechua quy_Latn
|
146 |
+
Romanian ron_Latn
|
147 |
+
Rundi run_Latn
|
148 |
+
Russian rus_Cyrl
|
149 |
+
Sango sag_Latn
|
150 |
+
Sanskrit san_Deva
|
151 |
+
Santali sat_Olck
|
152 |
+
Sicilian scn_Latn
|
153 |
+
Shan shn_Mymr
|
154 |
+
Sinhala sin_Sinh
|
155 |
+
Slovak slk_Latn
|
156 |
+
Slovenian slv_Latn
|
157 |
+
Samoan smo_Latn
|
158 |
+
Shona sna_Latn
|
159 |
+
Sindhi snd_Arab
|
160 |
+
Somali som_Latn
|
161 |
+
Southern-Sotho sot_Latn
|
162 |
+
Spanish spa_Latn
|
163 |
+
Tosk-Albanian als_Latn
|
164 |
+
Sardinian srd_Latn
|
165 |
+
Serbian srp_Cyrl
|
166 |
+
Swati ssw_Latn
|
167 |
+
Sundanese sun_Latn
|
168 |
+
Swedish swe_Latn
|
169 |
+
Swahili swh_Latn
|
170 |
+
Silesian szl_Latn
|
171 |
+
Tamil tam_Taml
|
172 |
+
Tatar tat_Cyrl
|
173 |
+
Telugu tel_Telu
|
174 |
+
Tajik tgk_Cyrl
|
175 |
+
Tagalog tgl_Latn
|
176 |
+
Thai tha_Thai
|
177 |
+
Tigrinya tir_Ethi
|
178 |
+
Tamasheq-(Latin script) taq_Latn
|
179 |
+
Tamasheq-(Tifinagh script) taq_Tfng
|
180 |
+
Tok-Pisin tpi_Latn
|
181 |
+
Tswana tsn_Latn
|
182 |
+
Tsonga tso_Latn
|
183 |
+
Turkmen tuk_Latn
|
184 |
+
Tumbuka tum_Latn
|
185 |
+
Turkish tur_Latn
|
186 |
+
Twi twi_Latn
|
187 |
+
Central-Atlas-Tamazight tzm_Tfng
|
188 |
+
Uyghur uig_Arab
|
189 |
+
Ukrainian ukr_Cyrl
|
190 |
+
Umbundu umb_Latn
|
191 |
+
Urdu urd_Arab
|
192 |
+
Northern-Uzbek uzn_Latn
|
193 |
+
Venetian vec_Latn
|
194 |
+
Vietnamese vie_Latn
|
195 |
+
Waray war_Latn
|
196 |
+
Wolof wol_Latn
|
197 |
+
Xhosa xho_Latn
|
198 |
+
Eastern-Yiddish ydd_Hebr
|
199 |
+
Yoruba yor_Latn
|
200 |
+
Yue Chinese yue_Hant
|
201 |
+
Chinese-(Simplified) zho_Hans
|
202 |
+
Chinese-(Traditional) zho_Hant
|
203 |
+
Standard-Malay zsm_Latn
|
204 |
+
Zulu zul_Latn
|
requirements.txt
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
huggingface_hub
|
2 |
+
ctranslate2
|
3 |
+
sentencepiece
|
4 |
+
accelerate
|
5 |
+
bitsandbytes
|
6 |
+
transformers
|
7 |
+
requests
|
8 |
+
pypi-json
|
9 |
+
pytz
|
10 |
+
fasttext
|
11 |
+
gradio
|
12 |
+
fastapi
|
13 |
+
uvicorn[standard]==0.17.*
|
static/index.html
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!DOCTYPE html>
|
2 |
+
<html>
|
3 |
+
<head>
|
4 |
+
<meta charset="utf-8">
|
5 |
+
<meta name="viewport" content="width=device-width">
|
6 |
+
<title>Sema Translator</title>
|
7 |
+
<link rel="stylesheet" href="static/style.css" />
|
8 |
+
<py-script src="modules/flores200_codes.py"></py-script>
|
9 |
+
</head>
|
10 |
+
<body>
|
11 |
+
<div class="Header">
|
12 |
+
<h1>Sema Translator</h1>
|
13 |
+
<p>Unlock the Power of Global Communication with Sema Translator! Seamlessly bridging language barriers. With support for over 200 languages, Sema Translator opens up a realm of possibilities for building truly global applications.</p>
|
14 |
+
|
15 |
+
</div>
|
16 |
+
|
17 |
+
<div class="LanguageDropdown" style="text-align:center">
|
18 |
+
<h2>Select a Language:</h2>
|
19 |
+
<select id="languageSelect">
|
20 |
+
{"".join(f"<option value='{code}'>{lang} - {code}</option>" for lang, code in flores_codes.items())}
|
21 |
+
</select>
|
22 |
+
</div>
|
23 |
+
|
24 |
+
<div class="instructions">
|
25 |
+
<p>Use the following python code to access the api endpoint</p>
|
26 |
+
<pre style="text-align: left;">
|
27 |
+
import requests
|
28 |
+
|
29 |
+
url = "{public_url}/translate/"
|
30 |
+
data = {
|
31 |
+
"userinput": "rũcinĩ rwega, niwokĩra wega?",
|
32 |
+
"target_lang": "eng_Latn",
|
33 |
+
}
|
34 |
+
|
35 |
+
response = requests.post(url, json=data)
|
36 |
+
result = response.json()
|
37 |
+
|
38 |
+
print(result)
|
39 |
+
|
40 |
+
source_language = result['source_language']
|
41 |
+
print("Source Language:", source_language)
|
42 |
+
|
43 |
+
translation = result['translated_text']
|
44 |
+
print("Translated text:", translation)
|
45 |
+
</pre>
|
46 |
+
</div>
|
47 |
+
|
48 |
+
<div class="footer">
|
49 |
+
<h1>Created by Lewis Kamau Kiamru</h1>
|
50 |
+
</div>
|
51 |
+
</body>
|
52 |
+
</html>
|
static/style.css
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.Header {
|
2 |
+
text-align: center;
|
3 |
+
}
|
4 |
+
.LanguageDropdown {
|
5 |
+
text-align: center;
|
6 |
+
}
|
7 |
+
.instructions {
|
8 |
+
background-color: #056e33;
|
9 |
+
padding: 20px;
|
10 |
+
}
|
11 |
+
.footer {
|
12 |
+
text-align: center;
|
13 |
+
}
|