BeardedMonster commited on
Commit
0846362
·
verified ·
1 Parent(s): ea3de3e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -14
app.py CHANGED
@@ -39,7 +39,7 @@ st.sidebar.write("""
39
  - Click the Generate button.
40
  6. **Translation Tips:**
41
  - English as the target language gives the best results.
42
- - You can also test inter-language translation i.e yoruba to igbo
43
  7. **Performance Note:**
44
  - The model's performance varies due to its size and training data. It performs best on text generation and translation.
45
  - For other tasks, try multiple times if model's output is not optimal (This is due to the generator's sampling parameter settings).
@@ -115,6 +115,11 @@ def count_sentences(text):
115
  sentences = [sentence for sentence in sentences if sentence]
116
  return len(sentences)
117
 
 
 
 
 
 
118
 
119
  async def generate_from_api(user_input, generation_config):
120
  urls = [
@@ -202,7 +207,8 @@ instruction_wrap = {
202
  # "diacritize this text: ": "<diacritize> E sun, Alaga, fun ise amalayi ti e n se ni Naijiria. E maa ba a lo, egbon! <yor>",
203
  # "what is the topic of this text": "<classify> Africa Free Trade Zone: Kò sí ìdènà láti kó ọjà láti orílẹ̀èdè kan sí òmíràn <topic>",
204
  # 'headline of this text': '<title> ** Sylvain Itté French ambassador don comot Niger Republic **. Sylvain Itté, di French ambassador for Niger don comot Niamey and currently e dey for flight from Ndjamena to Paris. Sylvain Itté, di French ambassador for Niger don comot Niamey very early dis morning and currently e dey for flight from Ndjamena to Paris.\n\nDi military detain Bazoum and im family for di presidential palace. Niger na former French colony, and France still get 1,500 sojas for di African country.\n\n"France don decide to withdraw dia ambassador. In di next hours our ambassador and several diplomats go return to France," Oga Macron tok.\n\nE add say di military co-operation dey "over" and French troops go leave in "di months to come".\n\n"Dis Sunday we celebrate one new step towards di sovereignty of Niger," di junta tok, for one statement wey AFP news agency quote.\n\nDi decision by Paris dey come afta months of hostility and protest against di presence of French for di kontri, wit regular demonstrations for di capital Niamey.\n\nDi move don scata France operations against Islamist militants for di wider Sahel region and Paris influence for there. But oga Macron tok say "putschists no go hold France hostage," <headline>',
205
- # }
 
206
  # Task options
207
  task_options = {
208
  "select": "{}",
@@ -215,6 +221,7 @@ task_options = {
215
  "Text Diacritization": "<diacritize> {} ",
216
  "Question Generation": "<prompt> {} <response>:",
217
  "Question-Answering" : "<prompt> {} <response>:",
 
218
  "Text Cleaning": "<clean> {} "
219
  }
220
  # Language options for diacritize, translation and clean tasks
@@ -239,25 +246,24 @@ if task in ["Text Diacritization", "Text Cleaning", "Translation"]:
239
  language = st.selectbox("Select a Nigerian language:", list(language_options.keys()))
240
  task_value = f"{task_options[task]} {language_options[language]}"
241
  else:
242
- task_value = task_options[task]
243
- def wrap_text(text, task_value):
244
- tasks = ["<classify>", "<prompt>", "<clean>", "<title>", "<diacritize>", "<translate>"]
245
- if any(task in text for task in tasks):
246
- return text
247
- return task_value.format(text)
248
  # Text input
249
  user_input = st.text_area("Enter text below **(PLEASE, FIRST READ ALL INSTRUCTIONS IN THE SIDEBAR CAREFULLY FOR THE BEST EXPERIENCE)**: ", sample_texts.get(sample_text, sample_text))
250
- n_sentences = count_sentences(user_input)
 
 
251
 
252
  if task == "select":
253
  user_input = instruction_wrap.get(user_input, user_input)
254
 
255
- print("Final user input: ", user_input)
256
  if st.button("Generate"):
257
  if user_input:
258
  with st.spinner("Please wait..."):
259
  wrapped_input = wrap_text(user_input, task_value)
260
- print("wrapped_input: ", wrapped_input)
261
  generation_config["max_new_tokens"]= min(max_new_tokens, 1024 - len(tokenizer.tokenize(wrapped_input)))
262
  start_time = time.time()
263
 
@@ -274,7 +280,7 @@ if st.button("Generate"):
274
  # generated_text = re.split(r"\|(end_f_text|end_of_text|end_ofext|end_of_text_||end_of_te|end_oftext)|:|`", generated_text)[0]
275
  generated_text = re.sub(r"\|(end_f_text|end_of_text|end_ofext|end_of_text_|end_of_te|end_o|end_of_tet|end_oftext)|:|`\|", "", generated_text)
276
  generated_text = generated_text.strip("\n")
277
- print("Generated text: ", generated_text)
278
 
279
  if task == "Sentiment Classification" or "<sentiment>" in wrapped_input :
280
  if "negative" in generated_text.lower():
@@ -286,12 +292,12 @@ if st.button("Generate"):
286
 
287
  elif task == "Topic Classification" or "<topic>" in wrapped_input:
288
  generated_text = generated_text[:15]
289
- print("split", generated_text.split(" ")[0], re.split(r"\.|\n|\*\*|\*", generated_text)[0], generated_text.split(" "))
290
  generated_text = re.split(r"\.|\n|\*\*|\*", generated_text)[0]
291
  generated_text = asyncio.run(assign_topic(generated_text))
292
 
293
  elif task == "Translation" or "<translate>" in wrapped_input:
294
- print("split for translation: ", n_sentences, re.split(r"\.|\n", generated_text)[:n_sentences])
295
  generated_text = ".".join(re.split(r"\.|\n", generated_text)[:n_sentences])
296
 
297
  elif task == "Question Generation" or "Question Generation:" in sample_text:
 
39
  - Click the Generate button.
40
  6. **Translation Tips:**
41
  - English as the target language gives the best results.
42
+ - You can also do inter-language translation i.e yoruba to igbo
43
  7. **Performance Note:**
44
  - The model's performance varies due to its size and training data. It performs best on text generation and translation.
45
  - For other tasks, try multiple times if model's output is not optimal (This is due to the generator's sampling parameter settings).
 
115
  sentences = [sentence for sentence in sentences if sentence]
116
  return len(sentences)
117
 
118
+ def wrap_text(text, task_value):
119
+ tasks = ["<classify>", "<prompt>", "<clean>", "<title>", "<diacritize>", "<translate>"]
120
+ if any(task in text for task in tasks):
121
+ return text
122
+ return task_value.format(text)
123
 
124
  async def generate_from_api(user_input, generation_config):
125
  urls = [
 
207
  # "diacritize this text: ": "<diacritize> E sun, Alaga, fun ise amalayi ti e n se ni Naijiria. E maa ba a lo, egbon! <yor>",
208
  # "what is the topic of this text": "<classify> Africa Free Trade Zone: Kò sí ìdènà láti kó ọjà láti orílẹ̀èdè kan sí òmíràn <topic>",
209
  # 'headline of this text': '<title> ** Sylvain Itté French ambassador don comot Niger Republic **. Sylvain Itté, di French ambassador for Niger don comot Niamey and currently e dey for flight from Ndjamena to Paris. Sylvain Itté, di French ambassador for Niger don comot Niamey very early dis morning and currently e dey for flight from Ndjamena to Paris.\n\nDi military detain Bazoum and im family for di presidential palace. Niger na former French colony, and France still get 1,500 sojas for di African country.\n\n"France don decide to withdraw dia ambassador. In di next hours our ambassador and several diplomats go return to France," Oga Macron tok.\n\nE add say di military co-operation dey "over" and French troops go leave in "di months to come".\n\n"Dis Sunday we celebrate one new step towards di sovereignty of Niger," di junta tok, for one statement wey AFP news agency quote.\n\nDi decision by Paris dey come afta months of hostility and protest against di presence of French for di kontri, wit regular demonstrations for di capital Niamey.\n\nDi move don scata France operations against Islamist militants for di wider Sahel region and Paris influence for there. But oga Macron tok say "putschists no go hold France hostage," <headline>',
210
+ #
211
+
212
  # Task options
213
  task_options = {
214
  "select": "{}",
 
221
  "Text Diacritization": "<diacritize> {} ",
222
  "Question Generation": "<prompt> {} <response>:",
223
  "Question-Answering" : "<prompt> {} <response>:",
224
+ "Text Summarization" : "<summarize> {} <summary>:",
225
  "Text Cleaning": "<clean> {} "
226
  }
227
  # Language options for diacritize, translation and clean tasks
 
246
  language = st.selectbox("Select a Nigerian language:", list(language_options.keys()))
247
  task_value = f"{task_options[task]} {language_options[language]}"
248
  else:
249
+ task_value = task_options[task
250
+
251
+
 
 
 
252
  # Text input
253
  user_input = st.text_area("Enter text below **(PLEASE, FIRST READ ALL INSTRUCTIONS IN THE SIDEBAR CAREFULLY FOR THE BEST EXPERIENCE)**: ", sample_texts.get(sample_text, sample_text))
254
+
255
+ if task == "Translation":
256
+ n_sentences = count_sentences(user_input)
257
 
258
  if task == "select":
259
  user_input = instruction_wrap.get(user_input, user_input)
260
 
261
+ # print("Final user input: ", user_input)
262
  if st.button("Generate"):
263
  if user_input:
264
  with st.spinner("Please wait..."):
265
  wrapped_input = wrap_text(user_input, task_value)
266
+ # print("wrapped_input: ", wrapped_input)
267
  generation_config["max_new_tokens"]= min(max_new_tokens, 1024 - len(tokenizer.tokenize(wrapped_input)))
268
  start_time = time.time()
269
 
 
280
  # generated_text = re.split(r"\|(end_f_text|end_of_text|end_ofext|end_of_text_||end_of_te|end_oftext)|:|`", generated_text)[0]
281
  generated_text = re.sub(r"\|(end_f_text|end_of_text|end_ofext|end_of_text_|end_of_te|end_o|end_of_tet|end_oftext)|:|`\|", "", generated_text)
282
  generated_text = generated_text.strip("\n")
283
+ # print("Generated text: ", generated_text)
284
 
285
  if task == "Sentiment Classification" or "<sentiment>" in wrapped_input :
286
  if "negative" in generated_text.lower():
 
292
 
293
  elif task == "Topic Classification" or "<topic>" in wrapped_input:
294
  generated_text = generated_text[:15]
295
+ # print("split", generated_text.split(" ")[0], re.split(r"\.|\n|\*\*|\*", generated_text)[0], generated_text.split(" "))
296
  generated_text = re.split(r"\.|\n|\*\*|\*", generated_text)[0]
297
  generated_text = asyncio.run(assign_topic(generated_text))
298
 
299
  elif task == "Translation" or "<translate>" in wrapped_input:
300
+ # print("split for translation: ", n_sentences, re.split(r"\.|\n", generated_text)[:n_sentences])
301
  generated_text = ".".join(re.split(r"\.|\n", generated_text)[:n_sentences])
302
 
303
  elif task == "Question Generation" or "Question Generation:" in sample_text: