BeardedMonster commited on
Commit
cfc543f
·
verified ·
1 Parent(s): 5c62089

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -7
app.py CHANGED
@@ -35,7 +35,7 @@ temperature = 0.99
35
  top_k = 50
36
  top_p = 0.95
37
  repetition_penalty = 4.0
38
- length_penalty = 2.0
39
 
40
  # Create sliders in the sidebar
41
  max_length = st.sidebar.slider("Max. output length", min_value=10, max_value=500, value=max_length)
@@ -137,7 +137,8 @@ sample_texts = {
137
  "Efik: Ke eyo ...":"Ke eyo Jesus ye mme mbet esie, etop emi ama ada ifụre ọsọk mme Jew oro esịt okobụn̄ọde ke ntak idiọkido ke Israel, oro ẹkenyụn̄ ẹdude ke mfụhọ ke itie-ufụn mme nsunsu ido edinam Ido Ukpono Mme Jew eke akpa isua ikie.",
138
  "Tell me a story in pidgin": "Tell me a story Pidgin",
139
  "who are you?": "who are you?",
140
- # "Translate 'how are you?' to Yoruba": "Translate 'how are you?' to Yoruba",
 
141
  "Classify the sentiment": "Anyi na-echefu oke ike.",
142
  "what is the topic of this text": "Africa Free Trade Zone: Kò sí ìdènà láti kó ọjà láti orílẹ̀èdè kan sí òmíràn",
143
  "diacritize this text: ": "E sun, Alaga, fun ise amalayi ti e n se ni Naijiria. E maa ba a lo, egbon!",
@@ -146,9 +147,11 @@ sample_texts = {
146
  }
147
 
148
  instruction_wrap = {
 
149
  "Tell me a story in pidgin": "<prompt> Tell me a story in pidgin <response>:",
150
  "Translate 'how are you?' to Yoruba": "<prompt> Translate 'how are you?' to Yoruba <response>:",
151
  "who are you?": "<prompt> who are you? <response>:",
 
152
  "Classify the sentiment" : "<classify> Anyi na-echefu oke ike. <sentiment>",
153
  "clean this text": "<clean> Abin mamaki ne aikin da shugabaZn HNajeriya ybake yi. kCiF 39gaba Tda haRkGa sir! <pcm>",
154
  "diacritize this text: ": "<diacritize> E sun, Alaga, fun ise amalayi ti e n se ni Naijiria. E maa ba a lo, egbon! <yor>",
@@ -169,15 +172,15 @@ task_options = {
169
  "Text Cleaning": "<clean> {} "
170
  }
171
 
172
- # Language options for diacritize and clean tasks
173
  language_options = {
174
  "select": "",
175
  "Yoruba": "<yor>",
176
  "Hausa": "<hau>",
177
  "Ibo": "<ibo>",
178
  "Pidgin": "<pcm>",
179
- "Efik": "<efi>",
180
- "Urhobo": "<urh>",
181
  "Fulah": "<ful>"
182
  }
183
 
@@ -203,7 +206,7 @@ def wrap_text(text, task_value):
203
 
204
 
205
  # Text input
206
- user_input = st.text_area("Enter text below **(PLEASE, FIRST READ ALL INSTRUCTIONS IN THE SIDEBAR FOR A BETTER EXPERIENCE)**: ", sample_texts[sample_text])
207
  user_input = instruction_wrap.get(sample_texts.get(user_input, user_input), user_input)
208
  print("Final user input: ", user_input)
209
  if st.button("Generate"):
@@ -231,7 +234,7 @@ if st.button("Generate"):
231
  output = model.generate(input_ids, **generation_config)
232
  generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
233
 
234
- generated_text = re.sub(r"\|(end_f_text|end_of_text|end_ofext|end_oftext)", " ", generated_text.split("|end_of_text|")[0])
235
 
236
  if task == "Sentiment Classification":
237
  if "negative" in generated_text.lower():
@@ -243,6 +246,9 @@ if st.button("Generate"):
243
 
244
  elif task == "Topic Classification":
245
  generated_text = generated_text.split(" ")[0][:20]
 
 
 
246
 
247
  full_output = st.empty()
248
 
 
35
  top_k = 50
36
  top_p = 0.95
37
  repetition_penalty = 4.0
38
+ length_penalty = 3.0
39
 
40
  # Create sliders in the sidebar
41
  max_length = st.sidebar.slider("Max. output length", min_value=10, max_value=500, value=max_length)
 
137
  "Efik: Ke eyo ...":"Ke eyo Jesus ye mme mbet esie, etop emi ama ada ifụre ọsọk mme Jew oro esịt okobụn̄ọde ke ntak idiọkido ke Israel, oro ẹkenyụn̄ ẹdude ke mfụhọ ke itie-ufụn mme nsunsu ido edinam Ido Ukpono Mme Jew eke akpa isua ikie.",
138
  "Tell me a story in pidgin": "Tell me a story Pidgin",
139
  "who are you?": "who are you?",
140
+ "Speak Yoruba": "Speak Yoruba",
141
+ "Translate 'Often, all Yoruba children...' to Yoruba": "Often, all Yoruba children take pride in speaking the Yoruba language.",
142
  "Classify the sentiment": "Anyi na-echefu oke ike.",
143
  "what is the topic of this text": "Africa Free Trade Zone: Kò sí ìdènà láti kó ọjà láti orílẹ̀èdè kan sí òmíràn",
144
  "diacritize this text: ": "E sun, Alaga, fun ise amalayi ti e n se ni Naijiria. E maa ba a lo, egbon!",
 
147
  }
148
 
149
  instruction_wrap = {
150
+ "Translate 'Often, all Yoruba children...' to Yoruba":"<translate> Often, all Yoruba children take pride in speaking the Yoruba language. <yor>"
151
  "Tell me a story in pidgin": "<prompt> Tell me a story in pidgin <response>:",
152
  "Translate 'how are you?' to Yoruba": "<prompt> Translate 'how are you?' to Yoruba <response>:",
153
  "who are you?": "<prompt> who are you? <response>:",
154
+ "Speak Yoruba": "<prompt> Speak Yoruba <response>:",
155
  "Classify the sentiment" : "<classify> Anyi na-echefu oke ike. <sentiment>",
156
  "clean this text": "<clean> Abin mamaki ne aikin da shugabaZn HNajeriya ybake yi. kCiF 39gaba Tda haRkGa sir! <pcm>",
157
  "diacritize this text: ": "<diacritize> E sun, Alaga, fun ise amalayi ti e n se ni Naijiria. E maa ba a lo, egbon! <yor>",
 
172
  "Text Cleaning": "<clean> {} "
173
  }
174
 
175
+ # Language options for diacritize, translation and clean tasks
176
  language_options = {
177
  "select": "",
178
  "Yoruba": "<yor>",
179
  "Hausa": "<hau>",
180
  "Ibo": "<ibo>",
181
  "Pidgin": "<pcm>",
182
+ # "Efik": "<efi>",
183
+ # "Urhobo": "<urh>",
184
  "Fulah": "<ful>"
185
  }
186
 
 
206
 
207
 
208
  # Text input
209
+ user_input = st.text_area("Enter text below **(PLEASE, FIRST READ ALL INSTRUCTIONS IN THE SIDEBAR CAREFULLY FOR THE BEST EXPERIENCE)**: ", sample_texts[sample_text])
210
  user_input = instruction_wrap.get(sample_texts.get(user_input, user_input), user_input)
211
  print("Final user input: ", user_input)
212
  if st.button("Generate"):
 
234
  output = model.generate(input_ids, **generation_config)
235
  generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
236
 
237
+ generated_text = re.sub(r"\|(end_f_text|end_of_text|end_ofext|end_oftext)|:|`", " ", generated_text.split("|end_of_text|")[0])
238
 
239
  if task == "Sentiment Classification":
240
  if "negative" in generated_text.lower():
 
246
 
247
  elif task == "Topic Classification":
248
  generated_text = generated_text.split(" ")[0][:20]
249
+ elif task == "Translation":
250
+ n_sentences = len(user_input)
251
+ generated_text = ".".join(generated_text.split(".")[: n_sentences])
252
 
253
  full_output = st.empty()
254