Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -39,7 +39,7 @@ st.sidebar.write("""
|
|
39 |
- Click the Generate button.
|
40 |
6. **Translation Tips:**
|
41 |
- English as the target language gives the best results.
|
42 |
-
- You can also
|
43 |
7. **Performance Note:**
|
44 |
- The model's performance varies due to its size and training data. It performs best on text generation and translation.
|
45 |
- For other tasks, try multiple times if model's output is not optimal (This is due to the generator's sampling parameter settings).
|
@@ -115,6 +115,11 @@ def count_sentences(text):
|
|
115 |
sentences = [sentence for sentence in sentences if sentence]
|
116 |
return len(sentences)
|
117 |
|
|
|
|
|
|
|
|
|
|
|
118 |
|
119 |
async def generate_from_api(user_input, generation_config):
|
120 |
urls = [
|
@@ -202,7 +207,8 @@ instruction_wrap = {
|
|
202 |
# "diacritize this text: ": "<diacritize> E sun, Alaga, fun ise amalayi ti e n se ni Naijiria. E maa ba a lo, egbon! <yor>",
|
203 |
# "what is the topic of this text": "<classify> Africa Free Trade Zone: Kò sí ìdènà láti kó ọjà láti orílẹ̀èdè kan sí òmíràn <topic>",
|
204 |
# 'headline of this text': '<title> ** Sylvain Itté French ambassador don comot Niger Republic **. Sylvain Itté, di French ambassador for Niger don comot Niamey and currently e dey for flight from Ndjamena to Paris. Sylvain Itté, di French ambassador for Niger don comot Niamey very early dis morning and currently e dey for flight from Ndjamena to Paris.\n\nDi military detain Bazoum and im family for di presidential palace. Niger na former French colony, and France still get 1,500 sojas for di African country.\n\n"France don decide to withdraw dia ambassador. In di next hours our ambassador and several diplomats go return to France," Oga Macron tok.\n\nE add say di military co-operation dey "over" and French troops go leave in "di months to come".\n\n"Dis Sunday we celebrate one new step towards di sovereignty of Niger," di junta tok, for one statement wey AFP news agency quote.\n\nDi decision by Paris dey come afta months of hostility and protest against di presence of French for di kontri, wit regular demonstrations for di capital Niamey.\n\nDi move don scata France operations against Islamist militants for di wider Sahel region and Paris influence for there. But oga Macron tok say "putschists no go hold France hostage," <headline>',
|
205 |
-
#
|
|
|
206 |
# Task options
|
207 |
task_options = {
|
208 |
"select": "{}",
|
@@ -215,6 +221,7 @@ task_options = {
|
|
215 |
"Text Diacritization": "<diacritize> {} ",
|
216 |
"Question Generation": "<prompt> {} <response>:",
|
217 |
"Question-Answering" : "<prompt> {} <response>:",
|
|
|
218 |
"Text Cleaning": "<clean> {} "
|
219 |
}
|
220 |
# Language options for diacritize, translation and clean tasks
|
@@ -239,25 +246,24 @@ if task in ["Text Diacritization", "Text Cleaning", "Translation"]:
|
|
239 |
language = st.selectbox("Select a Nigerian language:", list(language_options.keys()))
|
240 |
task_value = f"{task_options[task]} {language_options[language]}"
|
241 |
else:
|
242 |
-
task_value = task_options[task
|
243 |
-
|
244 |
-
|
245 |
-
if any(task in text for task in tasks):
|
246 |
-
return text
|
247 |
-
return task_value.format(text)
|
248 |
# Text input
|
249 |
user_input = st.text_area("Enter text below **(PLEASE, FIRST READ ALL INSTRUCTIONS IN THE SIDEBAR CAREFULLY FOR THE BEST EXPERIENCE)**: ", sample_texts.get(sample_text, sample_text))
|
250 |
-
|
|
|
|
|
251 |
|
252 |
if task == "select":
|
253 |
user_input = instruction_wrap.get(user_input, user_input)
|
254 |
|
255 |
-
print("Final user input: ", user_input)
|
256 |
if st.button("Generate"):
|
257 |
if user_input:
|
258 |
with st.spinner("Please wait..."):
|
259 |
wrapped_input = wrap_text(user_input, task_value)
|
260 |
-
print("wrapped_input: ", wrapped_input)
|
261 |
generation_config["max_new_tokens"]= min(max_new_tokens, 1024 - len(tokenizer.tokenize(wrapped_input)))
|
262 |
start_time = time.time()
|
263 |
|
@@ -274,7 +280,7 @@ if st.button("Generate"):
|
|
274 |
# generated_text = re.split(r"\|(end_f_text|end_of_text|end_ofext|end_of_text_||end_of_te|end_oftext)|:|`", generated_text)[0]
|
275 |
generated_text = re.sub(r"\|(end_f_text|end_of_text|end_ofext|end_of_text_|end_of_te|end_o|end_of_tet|end_oftext)|:|`\|", "", generated_text)
|
276 |
generated_text = generated_text.strip("\n")
|
277 |
-
print("Generated text: ", generated_text)
|
278 |
|
279 |
if task == "Sentiment Classification" or "<sentiment>" in wrapped_input :
|
280 |
if "negative" in generated_text.lower():
|
@@ -286,12 +292,12 @@ if st.button("Generate"):
|
|
286 |
|
287 |
elif task == "Topic Classification" or "<topic>" in wrapped_input:
|
288 |
generated_text = generated_text[:15]
|
289 |
-
print("split", generated_text.split(" ")[0], re.split(r"\.|\n|\*\*|\*", generated_text)[0], generated_text.split(" "))
|
290 |
generated_text = re.split(r"\.|\n|\*\*|\*", generated_text)[0]
|
291 |
generated_text = asyncio.run(assign_topic(generated_text))
|
292 |
|
293 |
elif task == "Translation" or "<translate>" in wrapped_input:
|
294 |
-
print("split for translation: ", n_sentences, re.split(r"\.|\n", generated_text)[:n_sentences])
|
295 |
generated_text = ".".join(re.split(r"\.|\n", generated_text)[:n_sentences])
|
296 |
|
297 |
elif task == "Question Generation" or "Question Generation:" in sample_text:
|
|
|
39 |
- Click the Generate button.
|
40 |
6. **Translation Tips:**
|
41 |
- English as the target language gives the best results.
|
42 |
+
- You can also do inter-language translation i.e yoruba to igbo
|
43 |
7. **Performance Note:**
|
44 |
- The model's performance varies due to its size and training data. It performs best on text generation and translation.
|
45 |
- For other tasks, try multiple times if model's output is not optimal (This is due to the generator's sampling parameter settings).
|
|
|
115 |
sentences = [sentence for sentence in sentences if sentence]
|
116 |
return len(sentences)
|
117 |
|
118 |
+
def wrap_text(text, task_value):
|
119 |
+
tasks = ["<classify>", "<prompt>", "<clean>", "<title>", "<diacritize>", "<translate>"]
|
120 |
+
if any(task in text for task in tasks):
|
121 |
+
return text
|
122 |
+
return task_value.format(text)
|
123 |
|
124 |
async def generate_from_api(user_input, generation_config):
|
125 |
urls = [
|
|
|
207 |
# "diacritize this text: ": "<diacritize> E sun, Alaga, fun ise amalayi ti e n se ni Naijiria. E maa ba a lo, egbon! <yor>",
|
208 |
# "what is the topic of this text": "<classify> Africa Free Trade Zone: Kò sí ìdènà láti kó ọjà láti orílẹ̀èdè kan sí òmíràn <topic>",
|
209 |
# 'headline of this text': '<title> ** Sylvain Itté French ambassador don comot Niger Republic **. Sylvain Itté, di French ambassador for Niger don comot Niamey and currently e dey for flight from Ndjamena to Paris. Sylvain Itté, di French ambassador for Niger don comot Niamey very early dis morning and currently e dey for flight from Ndjamena to Paris.\n\nDi military detain Bazoum and im family for di presidential palace. Niger na former French colony, and France still get 1,500 sojas for di African country.\n\n"France don decide to withdraw dia ambassador. In di next hours our ambassador and several diplomats go return to France," Oga Macron tok.\n\nE add say di military co-operation dey "over" and French troops go leave in "di months to come".\n\n"Dis Sunday we celebrate one new step towards di sovereignty of Niger," di junta tok, for one statement wey AFP news agency quote.\n\nDi decision by Paris dey come afta months of hostility and protest against di presence of French for di kontri, wit regular demonstrations for di capital Niamey.\n\nDi move don scata France operations against Islamist militants for di wider Sahel region and Paris influence for there. But oga Macron tok say "putschists no go hold France hostage," <headline>',
|
210 |
+
#
|
211 |
+
|
212 |
# Task options
|
213 |
task_options = {
|
214 |
"select": "{}",
|
|
|
221 |
"Text Diacritization": "<diacritize> {} ",
|
222 |
"Question Generation": "<prompt> {} <response>:",
|
223 |
"Question-Answering" : "<prompt> {} <response>:",
|
224 |
+
"Text Summarization" : "<summarize> {} <summary>:",
|
225 |
"Text Cleaning": "<clean> {} "
|
226 |
}
|
227 |
# Language options for diacritize, translation and clean tasks
|
|
|
246 |
language = st.selectbox("Select a Nigerian language:", list(language_options.keys()))
|
247 |
task_value = f"{task_options[task]} {language_options[language]}"
|
248 |
else:
|
249 |
+
task_value = task_options[task
|
250 |
+
|
251 |
+
|
|
|
|
|
|
|
252 |
# Text input
|
253 |
user_input = st.text_area("Enter text below **(PLEASE, FIRST READ ALL INSTRUCTIONS IN THE SIDEBAR CAREFULLY FOR THE BEST EXPERIENCE)**: ", sample_texts.get(sample_text, sample_text))
|
254 |
+
|
255 |
+
if task == "Translation":
|
256 |
+
n_sentences = count_sentences(user_input)
|
257 |
|
258 |
if task == "select":
|
259 |
user_input = instruction_wrap.get(user_input, user_input)
|
260 |
|
261 |
+
# print("Final user input: ", user_input)
|
262 |
if st.button("Generate"):
|
263 |
if user_input:
|
264 |
with st.spinner("Please wait..."):
|
265 |
wrapped_input = wrap_text(user_input, task_value)
|
266 |
+
# print("wrapped_input: ", wrapped_input)
|
267 |
generation_config["max_new_tokens"]= min(max_new_tokens, 1024 - len(tokenizer.tokenize(wrapped_input)))
|
268 |
start_time = time.time()
|
269 |
|
|
|
280 |
# generated_text = re.split(r"\|(end_f_text|end_of_text|end_ofext|end_of_text_||end_of_te|end_oftext)|:|`", generated_text)[0]
|
281 |
generated_text = re.sub(r"\|(end_f_text|end_of_text|end_ofext|end_of_text_|end_of_te|end_o|end_of_tet|end_oftext)|:|`\|", "", generated_text)
|
282 |
generated_text = generated_text.strip("\n")
|
283 |
+
# print("Generated text: ", generated_text)
|
284 |
|
285 |
if task == "Sentiment Classification" or "<sentiment>" in wrapped_input :
|
286 |
if "negative" in generated_text.lower():
|
|
|
292 |
|
293 |
elif task == "Topic Classification" or "<topic>" in wrapped_input:
|
294 |
generated_text = generated_text[:15]
|
295 |
+
# print("split", generated_text.split(" ")[0], re.split(r"\.|\n|\*\*|\*", generated_text)[0], generated_text.split(" "))
|
296 |
generated_text = re.split(r"\.|\n|\*\*|\*", generated_text)[0]
|
297 |
generated_text = asyncio.run(assign_topic(generated_text))
|
298 |
|
299 |
elif task == "Translation" or "<translate>" in wrapped_input:
|
300 |
+
# print("split for translation: ", n_sentences, re.split(r"\.|\n", generated_text)[:n_sentences])
|
301 |
generated_text = ".".join(re.split(r"\.|\n", generated_text)[:n_sentences])
|
302 |
|
303 |
elif task == "Question Generation" or "Question Generation:" in sample_text:
|