BeardedMonster commited on
Commit
76c917a
·
verified ·
1 Parent(s): 9748b7f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -9
app.py CHANGED
@@ -40,6 +40,7 @@ st.sidebar.write("""
40
  6. **Translation Tips:**
41
  - English as the target language gives the best results.
42
  - You can also do inter-language translation i.e yoruba to igbo
 
43
  7. **Performance Note:**
44
  - The model's performance varies due to its size and training data. It performs best on text generation and translation.
45
  - For other tasks, try multiple times if model's output is not optimal (This is due to the generator's sampling parameter settings).
@@ -90,7 +91,7 @@ st.title("SabiYarn-125M : Generates text in multiple Nigerian languages.")
90
  st.write("**Supported Languages: English, Yoruba, Igbo, Hausa, Pidgin, Efik, Urhobo, Fulfulde, Fulah. \nResults may not be coherent for less represented languages (i.e Efik, \
91
  Urhobo, Fulfulde, Fulah).**")
92
  st.write("**It takes a while (~25s) to return an output on the first 'generate' click. Avg response time: 1-2s on GPU, 40s on CPU**")
93
- st.write("**Model outputs 80 tokens as default. Adjust in the side bar.**")
94
  st.write("**For convenience, you can use chatgpt to provide input text and translate/evaluate model output.**")
95
  st.write("-" * 50)
96
  popular_topics = [
@@ -101,6 +102,12 @@ popular_topics = [
101
  "Philosophy", "Religion", "Society", "World"
102
  ]
103
 
 
 
 
 
 
 
104
  async def assign_topic(generated_text, topic_list=popular_topics):
105
  lower_generated_text = generated_text.lower()
106
  for topic in topic_list:
@@ -113,7 +120,7 @@ def count_sentences(text):
113
  sentences = re.split(r'[.!?]+\s*', text.strip())
114
  # Filter out any empty strings from the resulting list
115
  sentences = [sentence for sentence in sentences if sentence]
116
- return len(sentences)
117
 
118
  def wrap_text(text, task_value):
119
  tasks = ["<classify>", "<prompt>", "<clean>", "<title>", "<diacritize>", "<translate>"]
@@ -170,8 +177,8 @@ sample_texts = {
170
  "Translate 'Often, all Yoruba children...' to Yoruba": "Often, all Yoruba children take pride in speaking the Yoruba language.",
171
  "Classify the sentiment": "Anyi na-echefu oke ike.",
172
  "what is the topic of this text": "Africa Free Trade Zone: Kò sí ìdènà láti kó ọjà láti orílẹ̀èdè kan sí òmíràn",
173
- # "diacritize this text: ": "E sun, Alaga, fun ise amalayi ti e n se ni Naijiria. E maa ba a lo, egbon!",
174
- # "clean this text": "Abin mamaki ne aikin da shugabaZn HNajeriya ybake yi. kCiF 39gaba Tda haRkGa sir!",
175
  "headline of this text": '** Sylvain Itté French ambassador don comot Niger Republic **. Sylvain Itté, di French ambassador for Niger don comot Niamey and currently e dey for flight from Ndjamena to Paris. Sylvain Itté, di French ambassador for Niger don comot Niamey very early dis morning and currently e dey for flight from Ndjamena to Paris.\n\nDi military detain Bazoum and im family for di presidential palace. Niger na former French colony, and France still get 1,500 sojas for di African country.\n\n"France don decide to withdraw dia ambassador. In di next hours our ambassador and several diplomats go return to France," Oga Macron tok.\n\nE add say di military co-operation dey "over" and French troops go leave in "di months to come".\n\n"Dis Sunday we celebrate one new step towards di sovereignty of Niger," di junta tok, for one statement wey AFP news agency quote.\n\nDi decision by Paris dey come afta months of hostility and protest against di presence of French for di kontri, wit regular demonstrations for di capital Niamey.\n\nDi move don scata France operations against Islamist militants for di wider Sahel region and Paris influence for there. But oga Macron tok say "putschists no go hold France hostage,"'
176
  }
177
  instruction_wrap = {
@@ -280,7 +287,7 @@ if st.button("Generate"):
280
 
281
 
282
  # generated_text = re.split(r"\|(end_f_text|end_of_text|end_ofext|end_of_text_||end_of_te|end_oftext)|:|`", generated_text)[0]
283
- generated_text = re.sub(r"\|(end_f_text|end_of_text|end_ofext|end_of_text_|end_of_te|end_o|end_of_tet|end_oftext)|:|`\|", "", generated_text)
284
  generated_text = generated_text.strip("\n")
285
  # print("Generated text: ", generated_text)
286
 
@@ -295,17 +302,18 @@ if st.button("Generate"):
295
  elif task == "Topic Classification" or "<topic>" in wrapped_input:
296
  generated_text = generated_text[:15]
297
  # print("split", generated_text.split(" ")[0], re.split(r"\.|\n|\*\*|\*", generated_text)[0], generated_text.split(" "))
298
- generated_text = re.split(r"\.|\n|\*\*|\*", generated_text)[0]
299
  generated_text = asyncio.run(assign_topic(generated_text))
300
 
301
  elif task == "Translation" or "<translate>" in wrapped_input:
302
  # print("split for translation: ", n_sentences, re.split(r"\.|\n", generated_text)[:n_sentences])
303
- n_sentences = count_sentences(initial_input)
304
- generated_text = ".".join(re.split(r"\.|\n", generated_text)[:n_sentences])
 
305
 
306
  elif task == "Question Generation" or "Question Generation:" in sample_text:
307
  if "?" in generated_text:
308
- generated_text = "?".join(re.split(r"\?", generated_text)[:-1]) + "?"
309
 
310
 
311
  full_output = st.empty()
 
40
  6. **Translation Tips:**
41
  - English as the target language gives the best results.
42
  - You can also do inter-language translation i.e yoruba to igbo
43
+ - Use sentences instead of words for better results.
44
  7. **Performance Note:**
45
  - The model's performance varies due to its size and training data. It performs best on text generation and translation.
46
  - For other tasks, try multiple times if model's output is not optimal (This is due to the generator's sampling parameter settings).
 
91
  st.write("**Supported Languages: English, Yoruba, Igbo, Hausa, Pidgin, Efik, Urhobo, Fulfulde, Fulah. \nResults may not be coherent for less represented languages (i.e Efik, \
92
  Urhobo, Fulfulde, Fulah).**")
93
  st.write("**It takes a while (~25s) to return an output on the first 'generate' click. Avg response time: 1-2s on GPU, 40s on CPU**")
94
+ st.write("**Model outputs 80 tokens as default. Adjust in the side bar (longer inputs/tokens will increase response time). MAX TOKENS=1024**")
95
  st.write("**For convenience, you can use chatgpt to provide input text and translate/evaluate model output.**")
96
  st.write("-" * 50)
97
  popular_topics = [
 
102
  "Philosophy", "Religion", "Society", "World"
103
  ]
104
 
105
+ def extract_answer(text):
106
+ pattern = r'[a-z][A-Z]'
107
+ result = re.split(pattern, text)[0]
108
+ result = text[:len(result) + 1]
109
+ return result
110
+
111
  async def assign_topic(generated_text, topic_list=popular_topics):
112
  lower_generated_text = generated_text.lower()
113
  for topic in topic_list:
 
120
  sentences = re.split(r'[.!?]+\s*', text.strip())
121
  # Filter out any empty strings from the resulting list
122
  sentences = [sentence for sentence in sentences if sentence]
123
+ return len(sentences), sentences
124
 
125
  def wrap_text(text, task_value):
126
  tasks = ["<classify>", "<prompt>", "<clean>", "<title>", "<diacritize>", "<translate>"]
 
177
  "Translate 'Often, all Yoruba children...' to Yoruba": "Often, all Yoruba children take pride in speaking the Yoruba language.",
178
  "Classify the sentiment": "Anyi na-echefu oke ike.",
179
  "what is the topic of this text": "Africa Free Trade Zone: Kò sí ìdènà láti kó ọjà láti orílẹ̀èdè kan sí òmíràn",
180
+ "diacritize this text: ": "E sun, Alaga, fun ise amalayi ti e n se ni Naijiria. E maa ba a lo, egbon!",
181
+ "clean this text": "Abin mamaki ne aikin da shugabaZn HNajeriya ybake yi. kCiF 39gaba Tda haRkGa sir!",
182
  "headline of this text": '** Sylvain Itté French ambassador don comot Niger Republic **. Sylvain Itté, di French ambassador for Niger don comot Niamey and currently e dey for flight from Ndjamena to Paris. Sylvain Itté, di French ambassador for Niger don comot Niamey very early dis morning and currently e dey for flight from Ndjamena to Paris.\n\nDi military detain Bazoum and im family for di presidential palace. Niger na former French colony, and France still get 1,500 sojas for di African country.\n\n"France don decide to withdraw dia ambassador. In di next hours our ambassador and several diplomats go return to France," Oga Macron tok.\n\nE add say di military co-operation dey "over" and French troops go leave in "di months to come".\n\n"Dis Sunday we celebrate one new step towards di sovereignty of Niger," di junta tok, for one statement wey AFP news agency quote.\n\nDi decision by Paris dey come afta months of hostility and protest against di presence of French for di kontri, wit regular demonstrations for di capital Niamey.\n\nDi move don scata France operations against Islamist militants for di wider Sahel region and Paris influence for there. But oga Macron tok say "putschists no go hold France hostage,"'
183
  }
184
  instruction_wrap = {
 
287
 
288
 
289
  # generated_text = re.split(r"\|(end_f_text|end_of_text|end_ofext|end_of_text_||end_of_te|end_oftext)|:|`", generated_text)[0]
290
+ generated_text = re.sub(r"\|(end_f_text|end_of_text|end_ofext|end_of_text_|end_of_te|end_o|end_of_tet|end_oftext)|:|`", "", generated_text)
291
  generated_text = generated_text.strip("\n")
292
  # print("Generated text: ", generated_text)
293
 
 
302
  elif task == "Topic Classification" or "<topic>" in wrapped_input:
303
  generated_text = generated_text[:15]
304
  # print("split", generated_text.split(" ")[0], re.split(r"\.|\n|\*\*|\*", generated_text)[0], generated_text.split(" "))
305
+ generated_text = re.split(r"\.|\n|\*\*|\*", generated_text)[0]
306
  generated_text = asyncio.run(assign_topic(generated_text))
307
 
308
  elif task == "Translation" or "<translate>" in wrapped_input:
309
  # print("split for translation: ", n_sentences, re.split(r"\.|\n", generated_text)[:n_sentences])
310
+ n_sentences, split_= count_sentences(initial_input)
311
+ print(n_sentences,split_)
312
+ generated_text = ". ".join(re.split(r"\.|\n", generated_text)[:n_sentences]) + "."
313
 
314
  elif task == "Question Generation" or "Question Generation:" in sample_text:
315
  if "?" in generated_text:
316
+ generated_text = "? ".join(re.split(r"\?", generated_text)[:-1]) + "?"
317
 
318
 
319
  full_output = st.empty()