Commit
·
72246a1
1
Parent(s):
b5c737b
Initial dashboard
Browse files- .gitignore +3 -0
- app.py +124 -0
- data/covered_languages.txt +87 -0
- data/merged_language_list_clean.csv +306 -0
- data/merged_language_list_with_duplicates.csv +342 -0
.gitignore
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
.venv/
|
2 |
+
*.ipynb
|
3 |
+
__pycache__/
|
app.py
ADDED
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import pandas as pd
|
3 |
+
import plotly.graph_objects as go
|
4 |
+
|
5 |
+
def get_covered_languages():
|
6 |
+
#Load data
|
7 |
+
all_languages = pd.read_csv('data/merged_language_list_with_duplicates.csv')
|
8 |
+
with open("data/covered_languages.txt") as f:
|
9 |
+
covered_languages = f.read().splitlines()
|
10 |
+
|
11 |
+
# Split strings with commas and flatten the list
|
12 |
+
covered_languages = [lang.strip() for sublist in covered_languages for lang in sublist.split(',')]
|
13 |
+
covered_languages = list(set(covered_languages))
|
14 |
+
|
15 |
+
# Get language codes
|
16 |
+
covered_language_codes = [all_languages.loc[all_languages['Language'] == lang, 'Code'].values[0] for lang in covered_languages if lang in all_languages['Language'].values]
|
17 |
+
assert len(covered_language_codes) == len(covered_languages), "Mismatch between covered languages and their codes"
|
18 |
+
return covered_language_codes
|
19 |
+
|
20 |
+
def build_dataframes(covered_language_codes):
|
21 |
+
# Load data
|
22 |
+
clean_languages = pd.read_csv('data/merged_language_list_clean.csv')
|
23 |
+
|
24 |
+
# Create a dataframe for languages with a lead
|
25 |
+
languages_with_lead = clean_languages[clean_languages['Code'].isin(covered_language_codes)]
|
26 |
+
|
27 |
+
# Create a dataframe for languages without a lead
|
28 |
+
languages_without_lead = clean_languages[~clean_languages['Code'].isin(covered_language_codes)]
|
29 |
+
return languages_with_lead, languages_without_lead
|
30 |
+
|
31 |
+
def create_progress_bar(progress):
|
32 |
+
top_labels = ['With lead', 'Without lead']
|
33 |
+
|
34 |
+
colors = ['rgba(38, 24, 74, 0.8)', 'rgba(190, 192, 213, 1)']
|
35 |
+
|
36 |
+
x_data = [len(languages_with_lead), len(languages_without_lead)]
|
37 |
+
|
38 |
+
y_data = ['Progress']
|
39 |
+
|
40 |
+
fig = go.Figure()
|
41 |
+
|
42 |
+
for i in range(0, len(x_data[0])):
|
43 |
+
for xd, yd in zip(x_data, y_data):
|
44 |
+
fig.add_trace(go.Bar(
|
45 |
+
x=[xd[i]], y=[yd],
|
46 |
+
orientation='h',
|
47 |
+
marker=dict(
|
48 |
+
color=colors[i],
|
49 |
+
line=dict(color='rgb(248, 248, 249)', width=1)
|
50 |
+
),
|
51 |
+
hoverinfo='text',
|
52 |
+
hovertext=f"{top_labels[i]} records: {xd[i]}"
|
53 |
+
))
|
54 |
+
|
55 |
+
fig.update_layout(
|
56 |
+
xaxis=dict(
|
57 |
+
showgrid=False,
|
58 |
+
showline=False,
|
59 |
+
showticklabels=False,
|
60 |
+
zeroline=False,
|
61 |
+
domain=[0.15, 1]
|
62 |
+
|
63 |
+
),
|
64 |
+
yaxis=dict(
|
65 |
+
showgrid=False,
|
66 |
+
showline=False,
|
67 |
+
showticklabels=False,
|
68 |
+
zeroline=False,
|
69 |
+
domain=[0.15, 0.5]
|
70 |
+
),
|
71 |
+
barmode='stack',
|
72 |
+
paper_bgcolor='rgb(248, 248, 255)',
|
73 |
+
plot_bgcolor='rgb(248, 248, 255)',
|
74 |
+
margin=dict(l=120, r=10, t=140, b=80),
|
75 |
+
showlegend=False
|
76 |
+
)
|
77 |
+
|
78 |
+
annotations = []
|
79 |
+
|
80 |
+
for yd, xd in zip(y_data, x_data):
|
81 |
+
# labeling the y-axis
|
82 |
+
annotations.append(dict(xref='paper', yref='y',
|
83 |
+
x=0.14, y=yd,
|
84 |
+
xanchor='right',
|
85 |
+
text=str(yd),
|
86 |
+
font=dict(family='Arial', size=14,
|
87 |
+
color='rgb(67, 67, 67)'),
|
88 |
+
showarrow=False, align='right'))
|
89 |
+
# labeling the first percentage of each bar (x_axis)
|
90 |
+
if xd[0] > 0:
|
91 |
+
annotations.append(dict(xref='x', yref='y',
|
92 |
+
x=xd[0] / 2, y=yd,
|
93 |
+
text=str(xd[0]),
|
94 |
+
font=dict(family='Arial', size=14,
|
95 |
+
color='rgb(248, 248, 255)'),
|
96 |
+
showarrow=False))
|
97 |
+
space = xd[0]
|
98 |
+
for i in range(1, len(xd)):
|
99 |
+
if xd[i] > 0:
|
100 |
+
# labeling the rest of percentages for each bar (x_axis)
|
101 |
+
annotations.append(dict(xref='x', yref='y',
|
102 |
+
x=space + (xd[i]/2), y=yd,
|
103 |
+
text=str(xd[i]),
|
104 |
+
font=dict(family='Arial', size=14,
|
105 |
+
color='rgb(248, 248, 255)'),
|
106 |
+
showarrow=False))
|
107 |
+
space += xd[i]
|
108 |
+
|
109 |
+
fig.update_layout(annotations=annotations, height=80)
|
110 |
+
return fig
|
111 |
+
|
112 |
+
with gr.Blocks() as demo:
|
113 |
+
gr.Markdown("## Language Leads Dashboard")
|
114 |
+
languages_with_lead, languages_without_lead = build_dataframes(get_covered_languages())
|
115 |
+
with gr.Row():
|
116 |
+
progress_bar_output = gr.Plot(label="Language Stats")
|
117 |
+
with gr.Tab("Looking for leads!"):
|
118 |
+
gr.Markdown("These languages don't have a lead yet! Would you like to lead one of them? Sign up using [this form](https://forms.gle/mFCMXNRjxvyFvW5q9).")
|
119 |
+
gr.DataFrame(languages_without_lead)
|
120 |
+
with gr.Tab("Languages with leads"):
|
121 |
+
gr.Markdown("We found at least one lead for these languages:")
|
122 |
+
gr.DataFrame(languages_with_lead)
|
123 |
+
|
124 |
+
demo.launch()
|
data/covered_languages.txt
ADDED
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Fulfulde
|
2 |
+
Latvian
|
3 |
+
Igbo
|
4 |
+
Tamil
|
5 |
+
Hindi, Punjabi
|
6 |
+
Bengali
|
7 |
+
Dutch
|
8 |
+
Vietnamese
|
9 |
+
Italian
|
10 |
+
Sranan Tongo
|
11 |
+
Marathi
|
12 |
+
Gujarati, Hindi
|
13 |
+
Vietnamese
|
14 |
+
Turkish
|
15 |
+
Polish
|
16 |
+
Nepali
|
17 |
+
Chinese (Mandarin)
|
18 |
+
Czech, Slovak
|
19 |
+
Slovak
|
20 |
+
French, Venetian
|
21 |
+
Chinese (Yue/Cantonese)
|
22 |
+
Hausa
|
23 |
+
Amharic
|
24 |
+
Telugu
|
25 |
+
Swahili
|
26 |
+
Yoruba
|
27 |
+
Swedish, Yoruba
|
28 |
+
Afrikaans
|
29 |
+
Nigerian Pidgin, Yoruba
|
30 |
+
Yoruba
|
31 |
+
Yoruba
|
32 |
+
Twi
|
33 |
+
Nigerian Pidgin, Yoruba
|
34 |
+
Amharic
|
35 |
+
Twi, Ewe, Dagbani, Fante
|
36 |
+
Swedish
|
37 |
+
Hebrew
|
38 |
+
Pijin
|
39 |
+
Hebrew, Russian
|
40 |
+
Dagbani
|
41 |
+
Hebrew
|
42 |
+
Sanskrit
|
43 |
+
Zulu
|
44 |
+
Kinyarwanda
|
45 |
+
Kinyarwanda
|
46 |
+
Najdi Arabic
|
47 |
+
Najdi Arabic
|
48 |
+
Somali, Sundanese, Xhosa
|
49 |
+
Urdu
|
50 |
+
Kannada
|
51 |
+
German
|
52 |
+
Kirghiz
|
53 |
+
Lingala
|
54 |
+
Fante
|
55 |
+
Ewe
|
56 |
+
Filipino
|
57 |
+
Ewe
|
58 |
+
Saraiki, Urdu
|
59 |
+
Korean
|
60 |
+
Rundi
|
61 |
+
Kikuyu
|
62 |
+
Gilaki, Persian
|
63 |
+
Amis, Chinese (Min Nan)
|
64 |
+
Japanese
|
65 |
+
Thai
|
66 |
+
Malay
|
67 |
+
Ido
|
68 |
+
Chewa, Tumbuka
|
69 |
+
Awadhi, Odia, Sindhi
|
70 |
+
Kashmiri
|
71 |
+
Kazakh
|
72 |
+
Uzbek
|
73 |
+
Asturian
|
74 |
+
Wolof
|
75 |
+
Tigrinya
|
76 |
+
Lombard
|
77 |
+
Crimean Turkish, Tatar
|
78 |
+
Bashkir
|
79 |
+
Mari
|
80 |
+
Crimean Turkish
|
81 |
+
Yakut
|
82 |
+
Chuvash
|
83 |
+
Tatar
|
84 |
+
Swati
|
85 |
+
Tuvan
|
86 |
+
Bashkir
|
87 |
+
Tatar
|
data/merged_language_list_clean.csv
ADDED
@@ -0,0 +1,306 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Language,Code
|
2 |
+
Acehnese,ace
|
3 |
+
Adyghe,ady
|
4 |
+
Afrikaans,afr
|
5 |
+
Alangan,alj
|
6 |
+
Tosk Albanian,als
|
7 |
+
Amharic,amh
|
8 |
+
Amis,ami
|
9 |
+
Anglo-Saxon,ang
|
10 |
+
Angika,anp
|
11 |
+
Standard Arabic,arb
|
12 |
+
Aragonese,arg
|
13 |
+
Najdi Arabic,ars
|
14 |
+
Moroccan Arabic,ary
|
15 |
+
Egyptian Arabic,arz
|
16 |
+
Assamese,asm
|
17 |
+
Asturian,ast
|
18 |
+
Atikamekw,atj
|
19 |
+
Avaric,ava
|
20 |
+
Kotava,avk
|
21 |
+
Awadhi,awa
|
22 |
+
Central Aymara,ayr
|
23 |
+
South Azerbaijani,azb
|
24 |
+
North Azerbaijani,azj
|
25 |
+
Bashkir,bak
|
26 |
+
Bambara,bam
|
27 |
+
Balinese,ban
|
28 |
+
Bavarian,bar
|
29 |
+
Bikol,bcl
|
30 |
+
Belarusian,bel
|
31 |
+
Bengali,ben
|
32 |
+
Betawi,bew
|
33 |
+
Bhojpuri,bho
|
34 |
+
Bislama,bis
|
35 |
+
Banjar,bjn
|
36 |
+
Pa'o Karen,blk
|
37 |
+
Tibetan,bod
|
38 |
+
Bosnian,bos
|
39 |
+
Bishnupriya,bpy
|
40 |
+
Breton,bre
|
41 |
+
Buginese,bug
|
42 |
+
Bulgarian,bul
|
43 |
+
Buryat,bxr
|
44 |
+
Catalan,cat
|
45 |
+
Chavacano,cbk
|
46 |
+
Cebuano,ceb
|
47 |
+
Czech,ces
|
48 |
+
Chechen,che
|
49 |
+
Church Slavic,chu
|
50 |
+
Chuvash,chv
|
51 |
+
Central Kurdish,ckb
|
52 |
+
Mandarin Chinese,cmn
|
53 |
+
Montenegrin,cnr
|
54 |
+
Cornish,cor
|
55 |
+
Corsican,cos
|
56 |
+
Crimean Tatar,crh
|
57 |
+
Kashubian,csb
|
58 |
+
Welsh,cym
|
59 |
+
Dagbani,dag
|
60 |
+
Danish,dan
|
61 |
+
German,deu
|
62 |
+
Dinka,dik
|
63 |
+
Doteli,dty
|
64 |
+
Dzongkha,dzo
|
65 |
+
Standard Estonian,ekk
|
66 |
+
Greek,ell
|
67 |
+
Emilian,eml
|
68 |
+
Esperanto,epo
|
69 |
+
North Alaskan Inupiatun,esi
|
70 |
+
Northwest Alaska Inupiatun,esk
|
71 |
+
Basque,eus
|
72 |
+
Ewe,ewe
|
73 |
+
Extremaduran,ext
|
74 |
+
Faroese,fao
|
75 |
+
Persian,fas
|
76 |
+
Fante,fat
|
77 |
+
Fijian,fij
|
78 |
+
Filipino,fil
|
79 |
+
Finnish,fin
|
80 |
+
Fon,fon
|
81 |
+
French,fra
|
82 |
+
Arpitan,frp
|
83 |
+
Northern Frisian,frr
|
84 |
+
Western Frisian,fry
|
85 |
+
Friulian,fur
|
86 |
+
Nigerian Fulfulde,fuv
|
87 |
+
Gagauz,gag
|
88 |
+
West Central Oromo,gaz
|
89 |
+
Guianese Creole,gcr
|
90 |
+
Scottish Gaelic,gla
|
91 |
+
Irish,gle
|
92 |
+
Galician,glg
|
93 |
+
Gilaki,glk
|
94 |
+
Manx,glv
|
95 |
+
Middle High German (ca. 1050-1500),gmh
|
96 |
+
Goan Konkani,gom
|
97 |
+
Gorontalo,gor
|
98 |
+
Ancient Greek (to 1453),grc
|
99 |
+
Swiss German,gsw
|
100 |
+
Wayuu,guc
|
101 |
+
Paraguayan Guaraní,gug
|
102 |
+
Gujarati,guj
|
103 |
+
Gurenne,gur
|
104 |
+
Gun,guw
|
105 |
+
Hakka Chinese,hak
|
106 |
+
Haitian,hat
|
107 |
+
Hausa,hau
|
108 |
+
Hawaiian,haw
|
109 |
+
Hebrew,heb
|
110 |
+
Fiji Hindi,hif
|
111 |
+
Hiligaynon,hil
|
112 |
+
Hindi,hin
|
113 |
+
Hmong Njua,hnj
|
114 |
+
Croatian,hrv
|
115 |
+
Upper Sorbian,hsb
|
116 |
+
Hungarian,hun
|
117 |
+
Armenian,hye
|
118 |
+
Western Armenian,hyw
|
119 |
+
Igbo,ibo
|
120 |
+
Ido,ido
|
121 |
+
Iloko,ilo
|
122 |
+
Interlingua,ina
|
123 |
+
Indonesian,ind
|
124 |
+
Ingush,inh
|
125 |
+
Icelandic,isl
|
126 |
+
Italian,ita
|
127 |
+
Jamaican Creole English,jam
|
128 |
+
Javanese,jav
|
129 |
+
Lojban,jbo
|
130 |
+
Japanese,jpn
|
131 |
+
Karakalpak,kaa
|
132 |
+
Kabyle,kab
|
133 |
+
Kalaallisut,kal
|
134 |
+
Kannada,kan
|
135 |
+
Kashmiri,kas
|
136 |
+
Georgian,kat
|
137 |
+
Kazakh,kaz
|
138 |
+
Kabardian,kbd
|
139 |
+
Kabiyè,kbp
|
140 |
+
Tyap,kcg
|
141 |
+
Halh Mongolian,khk
|
142 |
+
Khmer,khm
|
143 |
+
Kikuyu,kik
|
144 |
+
Kinyarwanda,kin
|
145 |
+
Kirghiz,kir
|
146 |
+
Kirmanjki ,kiu
|
147 |
+
Northern Kurdish,kmr
|
148 |
+
Kongo,kng
|
149 |
+
Komi-Permyak,koi
|
150 |
+
Korean,kor
|
151 |
+
Komi,kpv
|
152 |
+
Karachay-Balkar,krc
|
153 |
+
Colognian,ksh
|
154 |
+
Kamba,ktu
|
155 |
+
Ladino,lad
|
156 |
+
Lao,lao
|
157 |
+
Latin,lat
|
158 |
+
Lak,lbe
|
159 |
+
Lezghian,lez
|
160 |
+
Lingua Franca Nova,lfn
|
161 |
+
Ligurian,lij
|
162 |
+
Limburgish,lim
|
163 |
+
Lingala,lin
|
164 |
+
Lithuanian,lit
|
165 |
+
Ladin,lld
|
166 |
+
Lombard,lmo
|
167 |
+
Latgalian,ltg
|
168 |
+
Luxembourgish,ltz
|
169 |
+
Luganda,lug
|
170 |
+
Standard Latvian,lvs
|
171 |
+
Literary Chinese,lzh
|
172 |
+
Madurese,mad
|
173 |
+
Maithili,mai
|
174 |
+
Malayalam,mal
|
175 |
+
Marathi,mar
|
176 |
+
Moksha,mdf
|
177 |
+
Morisyen,mfe
|
178 |
+
Mari,mhr
|
179 |
+
Minangkabau,min
|
180 |
+
Macedonian,mkd
|
181 |
+
Maltese,mlt
|
182 |
+
Maori,mri
|
183 |
+
Eastern Mari,mrj
|
184 |
+
Musi,mui
|
185 |
+
Mirandese,mwl
|
186 |
+
Burmese,mya
|
187 |
+
Erzya,myv
|
188 |
+
Mazanderani,mzn
|
189 |
+
Nahuatl,nah
|
190 |
+
Min Nan Chinese,nan
|
191 |
+
Neapolitan,nap
|
192 |
+
Navajo,nav
|
193 |
+
South Ndebele,nbl
|
194 |
+
Low German,nds
|
195 |
+
Newar,new
|
196 |
+
Nias,nia
|
197 |
+
Dutch,nld
|
198 |
+
Norwegian Nynorsk,nno
|
199 |
+
Norwegian Bokmål,nob
|
200 |
+
Novial,nov
|
201 |
+
Nepali,npi
|
202 |
+
Narom,nrm
|
203 |
+
Northern Sotho,nso
|
204 |
+
Chewa,nya
|
205 |
+
Occitan,oci
|
206 |
+
Livvi,olo
|
207 |
+
Odia,ory
|
208 |
+
Ossetian,oss
|
209 |
+
Pangasinan,pag
|
210 |
+
Pampanga,pam
|
211 |
+
Punjabi,pan
|
212 |
+
Papiamento,pap
|
213 |
+
Southern Pashto,pbt
|
214 |
+
Picard,pcd
|
215 |
+
Nigerian Pidgin,pcm
|
216 |
+
Pennsylvania German,pdc
|
217 |
+
Palatine German,pfl
|
218 |
+
Pijin,pis
|
219 |
+
Plateau Malagasy,plt
|
220 |
+
Piedmontese,pms
|
221 |
+
Western Panjabi,pnb
|
222 |
+
Pontic,pnt
|
223 |
+
Polish,pol
|
224 |
+
Portuguese,por
|
225 |
+
South Bolivian Quechua,quh
|
226 |
+
Vlax Romani,rmy
|
227 |
+
Romansh,roh
|
228 |
+
Romanian,ron
|
229 |
+
Rusyn,rue
|
230 |
+
Rundi,run
|
231 |
+
Aromanian,rup
|
232 |
+
Russian,rus
|
233 |
+
Sango,sag
|
234 |
+
Yakut,sah
|
235 |
+
Sanskrit,san
|
236 |
+
Sicilian,scn
|
237 |
+
Scots,sco
|
238 |
+
Samogitian,sgs
|
239 |
+
Tachelhit,shi
|
240 |
+
Sinhala,sin
|
241 |
+
Saraiki,skr
|
242 |
+
Slovak,slk
|
243 |
+
Slovenian,slv
|
244 |
+
Northern Sami,sme
|
245 |
+
Inari Sami,smn
|
246 |
+
Samoan,smo
|
247 |
+
Shona,sna
|
248 |
+
Sindhi,snd
|
249 |
+
Somali,som
|
250 |
+
Southern Sotho,sot
|
251 |
+
Spanish,spa
|
252 |
+
Sardinian,srd
|
253 |
+
Sranan Tongo,srn
|
254 |
+
Serbian,srp
|
255 |
+
Swati,ssw
|
256 |
+
Saterland Frisian,stq
|
257 |
+
Sundanese,sun
|
258 |
+
Swedish,swe
|
259 |
+
Swahili,swh
|
260 |
+
Silesian,szl
|
261 |
+
Sakizaya,szy
|
262 |
+
Tahitian,tah
|
263 |
+
Tamil,tam
|
264 |
+
Tatar,tat
|
265 |
+
Atayal,tay
|
266 |
+
Tulu,tcy
|
267 |
+
Telugu,tel
|
268 |
+
Tetum,tet
|
269 |
+
Tajik,tgk
|
270 |
+
Thai,tha
|
271 |
+
Tigrinya,tir
|
272 |
+
Talysh,tly
|
273 |
+
Tongan,ton
|
274 |
+
Tok Pisin,tpi
|
275 |
+
Sediq,trv
|
276 |
+
Tswana,tsn
|
277 |
+
Tsonga,tso
|
278 |
+
Turkmen,tuk
|
279 |
+
Tumbuka,tum
|
280 |
+
Turkish,tur
|
281 |
+
Twi,twi
|
282 |
+
Tuvan,tyv
|
283 |
+
Central Atlas Tamazight,tzm
|
284 |
+
Udmurt,udm
|
285 |
+
Uighur,uig
|
286 |
+
Ukrainian,ukr
|
287 |
+
Urdu,urd
|
288 |
+
Northern Uzbek,uzn
|
289 |
+
Venetian,vec
|
290 |
+
Venda,ven
|
291 |
+
Veps,vep
|
292 |
+
Vietnamese,vie
|
293 |
+
West Flemish,vls
|
294 |
+
Volapük,vol
|
295 |
+
Võro,vro
|
296 |
+
Waray,war
|
297 |
+
Walloon,wln
|
298 |
+
Wolof,wol
|
299 |
+
Wu Chinese,wuu
|
300 |
+
Xhosa,xho
|
301 |
+
Yoruba,yor
|
302 |
+
Chinese (Yue/Cantonese),yue
|
303 |
+
Zeelandic,zea
|
304 |
+
Standard Malay,zsm
|
305 |
+
Zulu,zul
|
306 |
+
Yongbei Zhuang,zyb
|
data/merged_language_list_with_duplicates.csv
ADDED
@@ -0,0 +1,342 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Language,Code
|
2 |
+
Acehnese,ace
|
3 |
+
Adyghe,ady
|
4 |
+
Afrikaans,afr
|
5 |
+
Alangan,alj
|
6 |
+
Tosk Albanian,als
|
7 |
+
Albanian,als
|
8 |
+
Amharic,amh
|
9 |
+
Amis,ami
|
10 |
+
Anglo-Saxon,ang
|
11 |
+
Angika,anp
|
12 |
+
Standard Arabic,arb
|
13 |
+
Arabic,arb
|
14 |
+
Aragonese,arg
|
15 |
+
Najdi Arabic,ars
|
16 |
+
Moroccan Arabic,ary
|
17 |
+
Egyptian Arabic,arz
|
18 |
+
Assamese,asm
|
19 |
+
Asturian,ast
|
20 |
+
Atikamekw,atj
|
21 |
+
Avaric,ava
|
22 |
+
Kotava,avk
|
23 |
+
Awadhi,awa
|
24 |
+
Central Aymara,ayr
|
25 |
+
Aymara,ayr
|
26 |
+
South Azerbaijani,azb
|
27 |
+
Azerbaijani,azj
|
28 |
+
North Azerbaijani,azj
|
29 |
+
Bashkir,bak
|
30 |
+
Bambara,bam
|
31 |
+
Balinese,ban
|
32 |
+
Bavarian,bar
|
33 |
+
Bikol,bcl
|
34 |
+
Belarusian,bel
|
35 |
+
Bengali,ben
|
36 |
+
Betawi,bew
|
37 |
+
Bhojpuri,bho
|
38 |
+
Bislama,bis
|
39 |
+
Banjar,bjn
|
40 |
+
Pa'o Karen,blk
|
41 |
+
Tibetan,bod
|
42 |
+
Bosnian,bos
|
43 |
+
Bishnupriya,bpy
|
44 |
+
Breton,bre
|
45 |
+
Buginese,bug
|
46 |
+
Bulgarian,bul
|
47 |
+
Buryat,bxr
|
48 |
+
Catalan,cat
|
49 |
+
Chavacano,cbk
|
50 |
+
Cebuano,ceb
|
51 |
+
Czech,ces
|
52 |
+
Chechen,che
|
53 |
+
Church Slavic,chu
|
54 |
+
Chuvash,chv
|
55 |
+
Central Kurdish,ckb
|
56 |
+
Kurdish (Central),ckb
|
57 |
+
Mandarin Chinese,cmn
|
58 |
+
Chinese (Mandarin),cmn
|
59 |
+
Montenegrin,cnr
|
60 |
+
Cornish,cor
|
61 |
+
Corsican,cos
|
62 |
+
Crimean Tatar,crh
|
63 |
+
Crimean Turkish,crh
|
64 |
+
Kashubian,csb
|
65 |
+
Welsh,cym
|
66 |
+
Dagbani,dag
|
67 |
+
Danish,dan
|
68 |
+
German,deu
|
69 |
+
Dinka,dik
|
70 |
+
Doteli,dty
|
71 |
+
Dzongkha,dzo
|
72 |
+
Standard Estonian,ekk
|
73 |
+
Estonian,ekk
|
74 |
+
Greek,ell
|
75 |
+
Modern Greek (1453-),ell
|
76 |
+
Emilian,eml
|
77 |
+
Esperanto,epo
|
78 |
+
North Alaskan Inupiatun,esi
|
79 |
+
Northwest Alaska Inupiatun,esk
|
80 |
+
Basque,eus
|
81 |
+
Ewe,ewe
|
82 |
+
Extremaduran,ext
|
83 |
+
Faroese,fao
|
84 |
+
Persian,fas
|
85 |
+
Fante,fat
|
86 |
+
Fijian,fij
|
87 |
+
Filipino,fil
|
88 |
+
Tagalog,fil
|
89 |
+
Finnish,fin
|
90 |
+
Fon,fon
|
91 |
+
French,fra
|
92 |
+
Franco-Provençal,frp
|
93 |
+
Arpitan,frp
|
94 |
+
Northern Frisian,frr
|
95 |
+
Western Frisian,fry
|
96 |
+
Frisian,fry
|
97 |
+
Friulian,fur
|
98 |
+
Fulfulde,fuv
|
99 |
+
Nigerian Fulfulde,fuv
|
100 |
+
Gagauz,gag
|
101 |
+
West Central Oromo,gaz
|
102 |
+
Guianese Creole,gcr
|
103 |
+
Scottish Gaelic,gla
|
104 |
+
Irish,gle
|
105 |
+
Galician,glg
|
106 |
+
Gilaki,glk
|
107 |
+
Manx,glv
|
108 |
+
Middle High German (ca. 1050-1500),gmh
|
109 |
+
Goan Konkani,gom
|
110 |
+
Gorontalo,gor
|
111 |
+
Ancient Greek (to 1453),grc
|
112 |
+
Swiss German,gsw
|
113 |
+
Wayuu,guc
|
114 |
+
Guarani,gug
|
115 |
+
Paraguayan Guaraní,gug
|
116 |
+
Gujarati,guj
|
117 |
+
Gurenne,gur
|
118 |
+
Gun,guw
|
119 |
+
Hakka Chinese,hak
|
120 |
+
Haitian,hat
|
121 |
+
Hausa,hau
|
122 |
+
Hawaiian,haw
|
123 |
+
Hebrew,heb
|
124 |
+
Fiji Hindi,hif
|
125 |
+
Hiligaynon,hil
|
126 |
+
Hindi,hin
|
127 |
+
Hmong Njua,hnj
|
128 |
+
Croatian,hrv
|
129 |
+
Upper Sorbian,hsb
|
130 |
+
Hungarian,hun
|
131 |
+
Armenian,hye
|
132 |
+
Western Armenian,hyw
|
133 |
+
Igbo,ibo
|
134 |
+
Ido,ido
|
135 |
+
Iloko,ilo
|
136 |
+
Interlingua,ina
|
137 |
+
Indonesian,ind
|
138 |
+
Ingush,inh
|
139 |
+
Icelandic,isl
|
140 |
+
Italian,ita
|
141 |
+
Jamaican Creole English,jam
|
142 |
+
Javanese,jav
|
143 |
+
Lojban,jbo
|
144 |
+
Japanese,jpn
|
145 |
+
Karakalpak,kaa
|
146 |
+
Kabyle,kab
|
147 |
+
Kalaallisut,kal
|
148 |
+
Kannada,kan
|
149 |
+
Kashmiri,kas
|
150 |
+
Georgian,kat
|
151 |
+
Kazakh,kaz
|
152 |
+
Kabardian,kbd
|
153 |
+
Kabiyè,kbp
|
154 |
+
Tyap,kcg
|
155 |
+
Mongolian,khk
|
156 |
+
Halh Mongolian,khk
|
157 |
+
Khmer,khm
|
158 |
+
Kikuyu,kik
|
159 |
+
Kinyarwanda,kin
|
160 |
+
Kirghiz,kir
|
161 |
+
Kirmanjki (individual language),kiu
|
162 |
+
Kurdish (Northern),kmr
|
163 |
+
Northern Kurdish,kmr
|
164 |
+
Kongo,kng
|
165 |
+
Komi-Permyak,koi
|
166 |
+
Korean,kor
|
167 |
+
Komi,kpv
|
168 |
+
Karachay-Balkar,krc
|
169 |
+
Colognian,ksh
|
170 |
+
Kamba,ktu
|
171 |
+
Ladino,lad
|
172 |
+
Lao,lao
|
173 |
+
Latin,lat
|
174 |
+
Lak,lbe
|
175 |
+
Lezghian,lez
|
176 |
+
Lingua Franca Nova,lfn
|
177 |
+
Liguria,lij
|
178 |
+
Ligurian,lij
|
179 |
+
Limburgish,lim
|
180 |
+
Hindi,lim
|
181 |
+
Limburgan,lim
|
182 |
+
Lingala,lin
|
183 |
+
Lithuanian,lit
|
184 |
+
Ladin,lld
|
185 |
+
Lombard,lmo
|
186 |
+
Latgalian,ltg
|
187 |
+
Luxembourgish,ltz
|
188 |
+
Luganda,lug
|
189 |
+
Standard Latvian,lvs
|
190 |
+
Latvian,lvs
|
191 |
+
Literary Chinese,lzh
|
192 |
+
Madurese,mad
|
193 |
+
Maithili,mai
|
194 |
+
Malayalam,mal
|
195 |
+
Marathi,mar
|
196 |
+
Moksha,mdf
|
197 |
+
Morisyen,mfe
|
198 |
+
Mari,mhr
|
199 |
+
Minangkabau,min
|
200 |
+
Macedonian,mkd
|
201 |
+
Maltese,mlt
|
202 |
+
Maori,mri
|
203 |
+
Eastern Mari,mrj
|
204 |
+
Musi,mui
|
205 |
+
Mirandese,mwl
|
206 |
+
Burmese,mya
|
207 |
+
Erzya,myv
|
208 |
+
Mazanderani,mzn
|
209 |
+
Nahuatl,nah
|
210 |
+
Neapolitan,nap
|
211 |
+
Navajo,nav
|
212 |
+
South Ndebele,nbl
|
213 |
+
Low German,nds
|
214 |
+
Newar,new
|
215 |
+
Nias,nia
|
216 |
+
Dutch,nld
|
217 |
+
Norwegian (Nynorsk),nno
|
218 |
+
Norwegian Nynorsk,nno
|
219 |
+
Norwegian Bokmål,nob
|
220 |
+
Norwegian (Bokmål),nob
|
221 |
+
Novial,nov
|
222 |
+
Nepali,npi
|
223 |
+
Nepali (individual language),npi
|
224 |
+
Narom,nrm
|
225 |
+
Northern Sotho,nso
|
226 |
+
Pedi,nso
|
227 |
+
Chewa,nya
|
228 |
+
Occitan,oci
|
229 |
+
Occitan (post 1500),oci
|
230 |
+
Livvi,olo
|
231 |
+
Odia,ory
|
232 |
+
Ossetian,oss
|
233 |
+
Pangasinan,pag
|
234 |
+
Pampanga,pam
|
235 |
+
Panjabi,pan
|
236 |
+
Punjabi,pan
|
237 |
+
Papiamento,pap
|
238 |
+
Southern Pashto,pbt
|
239 |
+
Picard,pcd
|
240 |
+
Nigerian Pidgin,pcm
|
241 |
+
Pennsylvania German,pdc
|
242 |
+
Palatine German,pfl
|
243 |
+
Pijin,pis
|
244 |
+
Plateau Malagasy,plt
|
245 |
+
Piedmontese,pms
|
246 |
+
Western Panjabi,pnb
|
247 |
+
Pontic,pnt
|
248 |
+
Polish,pol
|
249 |
+
Portuguese,por
|
250 |
+
South Bolivian Quechua,quh
|
251 |
+
Romani,rmy
|
252 |
+
Vlax Romani,rmy
|
253 |
+
Romansh,roh
|
254 |
+
Romanian,ron
|
255 |
+
Rusyn,rue
|
256 |
+
Rundi,run
|
257 |
+
Aromanian,rup
|
258 |
+
Russian,rus
|
259 |
+
Sango,sag
|
260 |
+
Yakut,sah
|
261 |
+
Sanskrit,san
|
262 |
+
Sicilian,scn
|
263 |
+
Scots,sco
|
264 |
+
Samogitian,sgs
|
265 |
+
Tachelhit,shi
|
266 |
+
Sinhala,sin
|
267 |
+
Saraiki,skr
|
268 |
+
Slovak,slk
|
269 |
+
Slovenian,slv
|
270 |
+
Northern Sami,sme
|
271 |
+
Inari Sami,smn
|
272 |
+
Samoan,smo
|
273 |
+
Shona,sna
|
274 |
+
Sindhi,snd
|
275 |
+
Somali,som
|
276 |
+
Southern Sotho,sot
|
277 |
+
Spanish,spa
|
278 |
+
Sardinian,srd
|
279 |
+
Sranan Tongo,srn
|
280 |
+
Serbian,srp
|
281 |
+
Swati,ssw
|
282 |
+
Saterland Frisian,stq
|
283 |
+
Sundanese,sun
|
284 |
+
Swedish,swe
|
285 |
+
Swahili (individual language),swh
|
286 |
+
Swahili,swh
|
287 |
+
Silesian,szl
|
288 |
+
Sakizaya,szy
|
289 |
+
Tahitian,tah
|
290 |
+
Tamil,tam
|
291 |
+
Tatar,tat
|
292 |
+
Atayal,tay
|
293 |
+
Tulu,tcy
|
294 |
+
Telugu,tel
|
295 |
+
Tetum,tet
|
296 |
+
Tajik,tgk
|
297 |
+
Thai,tha
|
298 |
+
Tigrinya,tir
|
299 |
+
Talysh,tly
|
300 |
+
Tongan,ton
|
301 |
+
Tok Pisin,tpi
|
302 |
+
Taroko,trv
|
303 |
+
Sediq,trv
|
304 |
+
Tswana,tsn
|
305 |
+
Tsonga,tso
|
306 |
+
Turkmen,tuk
|
307 |
+
Tumbuka,tum
|
308 |
+
Turkish,tur
|
309 |
+
Twi,twi
|
310 |
+
Tuvan,tyv
|
311 |
+
Central Atlas Tamazight,tzm
|
312 |
+
Udmurt,udm
|
313 |
+
Uighur,uig
|
314 |
+
Ukrainian,ukr
|
315 |
+
Undetermined,und
|
316 |
+
Urdu,urd
|
317 |
+
Northern Uzbek,uzn
|
318 |
+
Uzbek,uzn
|
319 |
+
Venetian,vec
|
320 |
+
Venda,ven
|
321 |
+
Veps,vep
|
322 |
+
Vietnamese,vie
|
323 |
+
West Flemish,vls
|
324 |
+
Volapük,vol
|
325 |
+
Võro,vro
|
326 |
+
Waray,war
|
327 |
+
Waray (Philippines),war
|
328 |
+
Walloon,wln
|
329 |
+
Wolof,wol
|
330 |
+
Wu Chinese,wuu
|
331 |
+
Chinese (Wu),wuu
|
332 |
+
Xhosa,xho
|
333 |
+
Yoruba,yor
|
334 |
+
Chinese (Yue/Cantonese),yue
|
335 |
+
Yue Chinese,yue
|
336 |
+
Zeelandic,zea
|
337 |
+
Malay,zsm
|
338 |
+
Standard Malay,zsm
|
339 |
+
Zulu,zul
|
340 |
+
Southern Min,nan
|
341 |
+
Min Nan Chinese,nan
|
342 |
+
Chinese (Min Nan),nan
|