nisheeth commited on
Commit
f93e55e
·
verified ·
1 Parent(s): 77a9a5b

Upload 5 files

Browse files
Files changed (5) hide show
  1. .gitattributes +1 -9
  2. README.md +6 -5
  3. app.py +29 -9
  4. flores200_codes.py +172 -1
  5. requirements.txt +6 -2
.gitattributes CHANGED
@@ -2,34 +2,26 @@
2
  *.arrow filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
4
  *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
  *.ftz filter=lfs diff=lfs merge=lfs -text
7
  *.gz filter=lfs diff=lfs merge=lfs -text
8
  *.h5 filter=lfs diff=lfs merge=lfs -text
9
  *.joblib filter=lfs diff=lfs merge=lfs -text
10
  *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
  *.model filter=lfs diff=lfs merge=lfs -text
13
  *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
  *.onnx filter=lfs diff=lfs merge=lfs -text
17
  *.ot filter=lfs diff=lfs merge=lfs -text
18
  *.parquet filter=lfs diff=lfs merge=lfs -text
19
  *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
  *.pt filter=lfs diff=lfs merge=lfs -text
23
  *.pth filter=lfs diff=lfs merge=lfs -text
24
  *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
  saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
  *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
  *.tflite filter=lfs diff=lfs merge=lfs -text
30
  *.tgz filter=lfs diff=lfs merge=lfs -text
31
  *.wasm filter=lfs diff=lfs merge=lfs -text
32
  *.xz filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
2
  *.arrow filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
4
  *.bz2 filter=lfs diff=lfs merge=lfs -text
 
5
  *.ftz filter=lfs diff=lfs merge=lfs -text
6
  *.gz filter=lfs diff=lfs merge=lfs -text
7
  *.h5 filter=lfs diff=lfs merge=lfs -text
8
  *.joblib filter=lfs diff=lfs merge=lfs -text
9
  *.lfs.* filter=lfs diff=lfs merge=lfs -text
 
10
  *.model filter=lfs diff=lfs merge=lfs -text
11
  *.msgpack filter=lfs diff=lfs merge=lfs -text
 
 
12
  *.onnx filter=lfs diff=lfs merge=lfs -text
13
  *.ot filter=lfs diff=lfs merge=lfs -text
14
  *.parquet filter=lfs diff=lfs merge=lfs -text
15
  *.pb filter=lfs diff=lfs merge=lfs -text
 
 
16
  *.pt filter=lfs diff=lfs merge=lfs -text
17
  *.pth filter=lfs diff=lfs merge=lfs -text
18
  *.rar filter=lfs diff=lfs merge=lfs -text
 
19
  saved_model/**/* filter=lfs diff=lfs merge=lfs -text
20
  *.tar.* filter=lfs diff=lfs merge=lfs -text
 
21
  *.tflite filter=lfs diff=lfs merge=lfs -text
22
  *.tgz filter=lfs diff=lfs merge=lfs -text
23
  *.wasm filter=lfs diff=lfs merge=lfs -text
24
  *.xz filter=lfs diff=lfs merge=lfs -text
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,12 +1,13 @@
1
  ---
2
- title: MTDemo
3
- emoji: 🏃
4
- colorFrom: yellow
5
- colorTo: purple
6
  sdk: gradio
7
- sdk_version: 4.22.0
8
  app_file: app.py
9
  pinned: false
 
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Nllb Translation Demo
3
+ emoji: 👀
4
+ colorFrom: indigo
5
+ colorTo: green
6
  sdk: gradio
7
+ sdk_version: 3.0.26
8
  app_file: app.py
9
  pinned: false
10
+ duplicated_from: Geonmo/nllb-translation-demo
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -8,13 +8,30 @@ from flores200_codes import flores_codes
8
 
9
  def load_models():
10
  # build model and tokenizer
11
- model_name_dict = {
12
- 'nllb-1.3B': 'facebook/nllb-200-1.3B',
13
- #'nllb-distilled-600M': 'facebook/nllb-200-distilled-600M',
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  #'nllb-1.3B': 'facebook/nllb-200-1.3B',
15
  #'nllb-distilled-1.3B': 'facebook/nllb-200-distilled-1.3B',
16
  #'nllb-3.3B': 'facebook/nllb-200-3.3B',
17
- # 'nllb-distilled-600M': 'facebook/nllb-200-distilled-600M',
18
  }
19
 
20
  model_dict = {}
@@ -31,7 +48,7 @@ def load_models():
31
 
32
  def translation(source, target, text):
33
  if len(model_dict) == 2:
34
- model_name = 'nllb-3.3B'
35
 
36
  start_time = time.time()
37
  source = flores_codes[source]
@@ -53,6 +70,7 @@ def translation(source, target, text):
53
  return result
54
 
55
 
 
56
  if __name__ == '__main__':
57
  print('\tinit models')
58
 
@@ -64,18 +82,18 @@ if __name__ == '__main__':
64
  lang_codes = list(flores_codes.keys())
65
  #inputs = [gr.inputs.Radio(['nllb-distilled-600M', 'nllb-1.3B', 'nllb-distilled-1.3B'], label='NLLB Model'),
66
  inputs = [gr.inputs.Dropdown(lang_codes, default='English', label='Source'),
67
- gr.inputs.Dropdown(lang_codes, default='Hindi', label='Target'),
68
  gr.inputs.Textbox(lines=5, label="Input text"),
69
  ]
70
 
71
  outputs = gr.outputs.JSON()
72
 
73
- title = "Machine Translation Demo"
74
 
75
  demo_status = "Demo is running on CPU"
76
- description = f"{demo_status}"
77
  examples = [
78
- ['English', 'Hindi', 'Hi. nice to meet you']
79
  ]
80
 
81
  gr.Interface(translation,
@@ -84,3 +102,5 @@ if __name__ == '__main__':
84
  title=title,
85
  description=description,
86
  ).launch()
 
 
 
8
 
9
  def load_models():
10
  # build model and tokenizer
11
+ model_name_dict = {#'nllb-distilled-600M': 'facebook/nllb-200-distilled-600M',
12
+ 'nllb-1.3B': 'facebook/nllb-200-1.3B',
13
+ #'nllb-distilled-1.3B': 'facebook/nllb-200-distilled-1.3B',
14
+ #'nllb-3.3B': 'facebook/nllb-200-3.3B',
15
+ }
16
+
17
+ model_dict = {}
18
+
19
+ for call_name, real_name in model_name_dict.items():
20
+ print('\tLoading model: %s' % call_name)
21
+ model = AutoModelForSeq2SeqLM.from_pretrained(real_name)
22
+ tokenizer = AutoTokenizer.from_pretrained(real_name)
23
+ model_dict[call_name+'_model'] = model
24
+ model_dict[call_name+'_tokenizer'] = tokenizer
25
+
26
+ return model_dict
27
+
28
+
29
+ def load_models():
30
+ # build model and tokenizer
31
+ model_name_dict = {'nllb-distilled-600M': 'facebook/nllb-200-distilled-600M',
32
  #'nllb-1.3B': 'facebook/nllb-200-1.3B',
33
  #'nllb-distilled-1.3B': 'facebook/nllb-200-distilled-1.3B',
34
  #'nllb-3.3B': 'facebook/nllb-200-3.3B',
 
35
  }
36
 
37
  model_dict = {}
 
48
 
49
  def translation(source, target, text):
50
  if len(model_dict) == 2:
51
+ model_name = 'nllb-distilled-600M'
52
 
53
  start_time = time.time()
54
  source = flores_codes[source]
 
70
  return result
71
 
72
 
73
+
74
  if __name__ == '__main__':
75
  print('\tinit models')
76
 
 
82
  lang_codes = list(flores_codes.keys())
83
  #inputs = [gr.inputs.Radio(['nllb-distilled-600M', 'nllb-1.3B', 'nllb-distilled-1.3B'], label='NLLB Model'),
84
  inputs = [gr.inputs.Dropdown(lang_codes, default='English', label='Source'),
85
+ gr.inputs.Dropdown(lang_codes, default='Korean', label='Target'),
86
  gr.inputs.Textbox(lines=5, label="Input text"),
87
  ]
88
 
89
  outputs = gr.outputs.JSON()
90
 
91
+ title = "NLLB distilled 1.3B demo"
92
 
93
  demo_status = "Demo is running on CPU"
94
+ description = f"Details: https://github.com/facebookresearch/fairseq/tree/nllb. {demo_status}"
95
  examples = [
96
+ ['English', 'Korean', 'Hi. nice to meet you']
97
  ]
98
 
99
  gr.Interface(translation,
 
102
  title=title,
103
  description=description,
104
  ).launch()
105
+
106
+
flores200_codes.py CHANGED
@@ -1,36 +1,207 @@
1
  codes_as_string = '''Acehnese (Arabic script) ace_Arab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  Assamese asm_Beng
 
3
  Awadhi awa_Deva
 
 
 
 
 
 
 
 
4
  Bengali ben_Beng
5
  Bhojpuri bho_Deva
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  Dzongkha dzo_Tibt
 
7
  English eng_Latn
 
 
 
 
 
8
  Fijian fij_Latn
 
 
 
 
 
 
 
 
 
9
  Gujarati guj_Gujr
 
 
 
10
  Hindi hin_Deva
11
  Chhattisgarhi hne_Deva
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  Kannada kan_Knda
13
  Kashmiri (Arabic script) kas_Arab
14
  Kashmiri (Devanagari script) kas_Deva
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  Mizo lus_Latn
 
16
  Magahi mag_Deva
17
  Maithili mai_Deva
18
  Malayalam mal_Mlym
19
  Marathi mar_Deva
 
 
 
 
 
20
  Meitei (Bengali script) mni_Beng
 
 
 
21
  Burmese mya_Mymr
 
 
 
22
  Nepali npi_Deva
 
 
 
 
 
23
  Odia ory_Orya
 
24
  Eastern Panjabi pan_Guru
 
25
  Western Persian pes_Arab
 
 
 
 
 
 
 
 
 
26
  Sanskrit san_Deva
27
  Santali sat_Olck
 
 
28
  Sinhala sin_Sinh
 
 
 
 
29
  Sindhi snd_Arab
 
 
 
 
 
 
 
 
 
 
 
30
  Tamil tam_Taml
 
31
  Telugu tel_Telu
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  Urdu urd_Arab
33
- Vietnamese vie_Latn'''
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
  codes_as_string = codes_as_string.split('\n')
36
 
 
1
  codes_as_string = '''Acehnese (Arabic script) ace_Arab
2
+ Acehnese (Latin script) ace_Latn
3
+ Mesopotamian Arabic acm_Arab
4
+ Ta’izzi-Adeni Arabic acq_Arab
5
+ Tunisian Arabic aeb_Arab
6
+ Afrikaans afr_Latn
7
+ South Levantine Arabic ajp_Arab
8
+ Akan aka_Latn
9
+ Amharic amh_Ethi
10
+ North Levantine Arabic apc_Arab
11
+ Modern Standard Arabic arb_Arab
12
+ Modern Standard Arabic (Romanized) arb_Latn
13
+ Najdi Arabic ars_Arab
14
+ Moroccan Arabic ary_Arab
15
+ Egyptian Arabic arz_Arab
16
  Assamese asm_Beng
17
+ Asturian ast_Latn
18
  Awadhi awa_Deva
19
+ Central Aymara ayr_Latn
20
+ South Azerbaijani azb_Arab
21
+ North Azerbaijani azj_Latn
22
+ Bashkir bak_Cyrl
23
+ Bambara bam_Latn
24
+ Balinese ban_Latn
25
+ Belarusian bel_Cyrl
26
+ Bemba bem_Latn
27
  Bengali ben_Beng
28
  Bhojpuri bho_Deva
29
+ Banjar (Arabic script) bjn_Arab
30
+ Banjar (Latin script) bjn_Latn
31
+ Standard Tibetan bod_Tibt
32
+ Bosnian bos_Latn
33
+ Buginese bug_Latn
34
+ Bulgarian bul_Cyrl
35
+ Catalan cat_Latn
36
+ Cebuano ceb_Latn
37
+ Czech ces_Latn
38
+ Chokwe cjk_Latn
39
+ Central Kurdish ckb_Arab
40
+ Crimean Tatar crh_Latn
41
+ Welsh cym_Latn
42
+ Danish dan_Latn
43
+ German deu_Latn
44
+ Southwestern Dinka dik_Latn
45
+ Dyula dyu_Latn
46
  Dzongkha dzo_Tibt
47
+ Greek ell_Grek
48
  English eng_Latn
49
+ Esperanto epo_Latn
50
+ Estonian est_Latn
51
+ Basque eus_Latn
52
+ Ewe ewe_Latn
53
+ Faroese fao_Latn
54
  Fijian fij_Latn
55
+ Finnish fin_Latn
56
+ Fon fon_Latn
57
+ French fra_Latn
58
+ Friulian fur_Latn
59
+ Nigerian Fulfulde fuv_Latn
60
+ Scottish Gaelic gla_Latn
61
+ Irish gle_Latn
62
+ Galician glg_Latn
63
+ Guarani grn_Latn
64
  Gujarati guj_Gujr
65
+ Haitian Creole hat_Latn
66
+ Hausa hau_Latn
67
+ Hebrew heb_Hebr
68
  Hindi hin_Deva
69
  Chhattisgarhi hne_Deva
70
+ Croatian hrv_Latn
71
+ Hungarian hun_Latn
72
+ Armenian hye_Armn
73
+ Igbo ibo_Latn
74
+ Ilocano ilo_Latn
75
+ Indonesian ind_Latn
76
+ Icelandic isl_Latn
77
+ Italian ita_Latn
78
+ Javanese jav_Latn
79
+ Japanese jpn_Jpan
80
+ Kabyle kab_Latn
81
+ Jingpho kac_Latn
82
+ Kamba kam_Latn
83
  Kannada kan_Knda
84
  Kashmiri (Arabic script) kas_Arab
85
  Kashmiri (Devanagari script) kas_Deva
86
+ Georgian kat_Geor
87
+ Central Kanuri (Arabic script) knc_Arab
88
+ Central Kanuri (Latin script) knc_Latn
89
+ Kazakh kaz_Cyrl
90
+ Kabiyè kbp_Latn
91
+ Kabuverdianu kea_Latn
92
+ Khmer khm_Khmr
93
+ Kikuyu kik_Latn
94
+ Kinyarwanda kin_Latn
95
+ Kyrgyz kir_Cyrl
96
+ Kimbundu kmb_Latn
97
+ Northern Kurdish kmr_Latn
98
+ Kikongo kon_Latn
99
+ Korean kor_Hang
100
+ Lao lao_Laoo
101
+ Ligurian lij_Latn
102
+ Limburgish lim_Latn
103
+ Lingala lin_Latn
104
+ Lithuanian lit_Latn
105
+ Lombard lmo_Latn
106
+ Latgalian ltg_Latn
107
+ Luxembourgish ltz_Latn
108
+ Luba-Kasai lua_Latn
109
+ Ganda lug_Latn
110
+ Luo luo_Latn
111
  Mizo lus_Latn
112
+ Standard Latvian lvs_Latn
113
  Magahi mag_Deva
114
  Maithili mai_Deva
115
  Malayalam mal_Mlym
116
  Marathi mar_Deva
117
+ Minangkabau (Arabic script) min_Arab
118
+ Minangkabau (Latin script) min_Latn
119
+ Macedonian mkd_Cyrl
120
+ Plateau Malagasy plt_Latn
121
+ Maltese mlt_Latn
122
  Meitei (Bengali script) mni_Beng
123
+ Halh Mongolian khk_Cyrl
124
+ Mossi mos_Latn
125
+ Maori mri_Latn
126
  Burmese mya_Mymr
127
+ Dutch nld_Latn
128
+ Norwegian Nynorsk nno_Latn
129
+ Norwegian Bokmål nob_Latn
130
  Nepali npi_Deva
131
+ Northern Sotho nso_Latn
132
+ Nuer nus_Latn
133
+ Nyanja nya_Latn
134
+ Occitan oci_Latn
135
+ West Central Oromo gaz_Latn
136
  Odia ory_Orya
137
+ Pangasinan pag_Latn
138
  Eastern Panjabi pan_Guru
139
+ Papiamento pap_Latn
140
  Western Persian pes_Arab
141
+ Polish pol_Latn
142
+ Portuguese por_Latn
143
+ Dari prs_Arab
144
+ Southern Pashto pbt_Arab
145
+ Ayacucho Quechua quy_Latn
146
+ Romanian ron_Latn
147
+ Rundi run_Latn
148
+ Russian rus_Cyrl
149
+ Sango sag_Latn
150
  Sanskrit san_Deva
151
  Santali sat_Olck
152
+ Sicilian scn_Latn
153
+ Shan shn_Mymr
154
  Sinhala sin_Sinh
155
+ Slovak slk_Latn
156
+ Slovenian slv_Latn
157
+ Samoan smo_Latn
158
+ Shona sna_Latn
159
  Sindhi snd_Arab
160
+ Somali som_Latn
161
+ Southern Sotho sot_Latn
162
+ Spanish spa_Latn
163
+ Tosk Albanian als_Latn
164
+ Sardinian srd_Latn
165
+ Serbian srp_Cyrl
166
+ Swati ssw_Latn
167
+ Sundanese sun_Latn
168
+ Swedish swe_Latn
169
+ Swahili swh_Latn
170
+ Silesian szl_Latn
171
  Tamil tam_Taml
172
+ Tatar tat_Cyrl
173
  Telugu tel_Telu
174
+ Tajik tgk_Cyrl
175
+ Tagalog tgl_Latn
176
+ Thai tha_Thai
177
+ Tigrinya tir_Ethi
178
+ Tamasheq (Latin script) taq_Latn
179
+ Tamasheq (Tifinagh script) taq_Tfng
180
+ Tok Pisin tpi_Latn
181
+ Tswana tsn_Latn
182
+ Tsonga tso_Latn
183
+ Turkmen tuk_Latn
184
+ Tumbuka tum_Latn
185
+ Turkish tur_Latn
186
+ Twi twi_Latn
187
+ Central Atlas Tamazight tzm_Tfng
188
+ Uyghur uig_Arab
189
+ Ukrainian ukr_Cyrl
190
+ Umbundu umb_Latn
191
  Urdu urd_Arab
192
+ Northern Uzbek uzn_Latn
193
+ Venetian vec_Latn
194
+ Vietnamese vie_Latn
195
+ Waray war_Latn
196
+ Wolof wol_Latn
197
+ Xhosa xho_Latn
198
+ Eastern Yiddish ydd_Hebr
199
+ Yoruba yor_Latn
200
+ Yue Chinese yue_Hant
201
+ Chinese (Simplified) zho_Hans
202
+ Chinese (Traditional) zho_Hant
203
+ Standard Malay zsm_Latn
204
+ Zulu zul_Latn'''
205
 
206
  codes_as_string = codes_as_string.split('\n')
207
 
requirements.txt CHANGED
@@ -1,3 +1,7 @@
1
  git+https://github.com/huggingface/transformers
2
- gradio
3
- torch
 
 
 
 
 
1
  git+https://github.com/huggingface/transformers
2
+
3
+ gradio==3.8
4
+
5
+ torch
6
+
7
+ httpx==0.24.1