Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -35,7 +35,7 @@ temperature = 0.99
|
|
35 |
top_k = 50
|
36 |
top_p = 0.95
|
37 |
repetition_penalty = 4.0
|
38 |
-
length_penalty =
|
39 |
|
40 |
# Create sliders in the sidebar
|
41 |
max_length = st.sidebar.slider("Max. output length", min_value=10, max_value=500, value=max_length)
|
@@ -137,7 +137,8 @@ sample_texts = {
|
|
137 |
"Efik: Ke eyo ...":"Ke eyo Jesus ye mme mbet esie, etop emi ama ada ifụre ọsọk mme Jew oro esịt okobụn̄ọde ke ntak idiọkido ke Israel, oro ẹkenyụn̄ ẹdude ke mfụhọ ke itie-ufụn mme nsunsu ido edinam Ido Ukpono Mme Jew eke akpa isua ikie.",
|
138 |
"Tell me a story in pidgin": "Tell me a story Pidgin",
|
139 |
"who are you?": "who are you?",
|
140 |
-
|
|
|
141 |
"Classify the sentiment": "Anyi na-echefu oke ike.",
|
142 |
"what is the topic of this text": "Africa Free Trade Zone: Kò sí ìdènà láti kó ọjà láti orílẹ̀èdè kan sí òmíràn",
|
143 |
"diacritize this text: ": "E sun, Alaga, fun ise amalayi ti e n se ni Naijiria. E maa ba a lo, egbon!",
|
@@ -146,9 +147,11 @@ sample_texts = {
|
|
146 |
}
|
147 |
|
148 |
instruction_wrap = {
|
|
|
149 |
"Tell me a story in pidgin": "<prompt> Tell me a story in pidgin <response>:",
|
150 |
"Translate 'how are you?' to Yoruba": "<prompt> Translate 'how are you?' to Yoruba <response>:",
|
151 |
"who are you?": "<prompt> who are you? <response>:",
|
|
|
152 |
"Classify the sentiment" : "<classify> Anyi na-echefu oke ike. <sentiment>",
|
153 |
"clean this text": "<clean> Abin mamaki ne aikin da shugabaZn HNajeriya ybake yi. kCiF 39gaba Tda haRkGa sir! <pcm>",
|
154 |
"diacritize this text: ": "<diacritize> E sun, Alaga, fun ise amalayi ti e n se ni Naijiria. E maa ba a lo, egbon! <yor>",
|
@@ -169,15 +172,15 @@ task_options = {
|
|
169 |
"Text Cleaning": "<clean> {} "
|
170 |
}
|
171 |
|
172 |
-
# Language options for diacritize and clean tasks
|
173 |
language_options = {
|
174 |
"select": "",
|
175 |
"Yoruba": "<yor>",
|
176 |
"Hausa": "<hau>",
|
177 |
"Ibo": "<ibo>",
|
178 |
"Pidgin": "<pcm>",
|
179 |
-
"Efik": "<efi>",
|
180 |
-
"Urhobo": "<urh>",
|
181 |
"Fulah": "<ful>"
|
182 |
}
|
183 |
|
@@ -203,7 +206,7 @@ def wrap_text(text, task_value):
|
|
203 |
|
204 |
|
205 |
# Text input
|
206 |
-
user_input = st.text_area("Enter text below **(PLEASE, FIRST READ ALL INSTRUCTIONS IN THE SIDEBAR FOR
|
207 |
user_input = instruction_wrap.get(sample_texts.get(user_input, user_input), user_input)
|
208 |
print("Final user input: ", user_input)
|
209 |
if st.button("Generate"):
|
@@ -231,7 +234,7 @@ if st.button("Generate"):
|
|
231 |
output = model.generate(input_ids, **generation_config)
|
232 |
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
|
233 |
|
234 |
-
generated_text = re.sub(r"\|(end_f_text|end_of_text|end_ofext|end_oftext)", " ", generated_text.split("|end_of_text|")[0])
|
235 |
|
236 |
if task == "Sentiment Classification":
|
237 |
if "negative" in generated_text.lower():
|
@@ -243,6 +246,9 @@ if st.button("Generate"):
|
|
243 |
|
244 |
elif task == "Topic Classification":
|
245 |
generated_text = generated_text.split(" ")[0][:20]
|
|
|
|
|
|
|
246 |
|
247 |
full_output = st.empty()
|
248 |
|
|
|
35 |
top_k = 50
|
36 |
top_p = 0.95
|
37 |
repetition_penalty = 4.0
|
38 |
+
length_penalty = 3.0
|
39 |
|
40 |
# Create sliders in the sidebar
|
41 |
max_length = st.sidebar.slider("Max. output length", min_value=10, max_value=500, value=max_length)
|
|
|
137 |
"Efik: Ke eyo ...":"Ke eyo Jesus ye mme mbet esie, etop emi ama ada ifụre ọsọk mme Jew oro esịt okobụn̄ọde ke ntak idiọkido ke Israel, oro ẹkenyụn̄ ẹdude ke mfụhọ ke itie-ufụn mme nsunsu ido edinam Ido Ukpono Mme Jew eke akpa isua ikie.",
|
138 |
"Tell me a story in pidgin": "Tell me a story Pidgin",
|
139 |
"who are you?": "who are you?",
|
140 |
+
"Speak Yoruba": "Speak Yoruba",
|
141 |
+
"Translate 'Often, all Yoruba children...' to Yoruba": "Often, all Yoruba children take pride in speaking the Yoruba language.",
|
142 |
"Classify the sentiment": "Anyi na-echefu oke ike.",
|
143 |
"what is the topic of this text": "Africa Free Trade Zone: Kò sí ìdènà láti kó ọjà láti orílẹ̀èdè kan sí òmíràn",
|
144 |
"diacritize this text: ": "E sun, Alaga, fun ise amalayi ti e n se ni Naijiria. E maa ba a lo, egbon!",
|
|
|
147 |
}
|
148 |
|
149 |
instruction_wrap = {
|
150 |
+
"Translate 'Often, all Yoruba children...' to Yoruba":"<translate> Often, all Yoruba children take pride in speaking the Yoruba language. <yor>"
|
151 |
"Tell me a story in pidgin": "<prompt> Tell me a story in pidgin <response>:",
|
152 |
"Translate 'how are you?' to Yoruba": "<prompt> Translate 'how are you?' to Yoruba <response>:",
|
153 |
"who are you?": "<prompt> who are you? <response>:",
|
154 |
+
"Speak Yoruba": "<prompt> Speak Yoruba <response>:",
|
155 |
"Classify the sentiment" : "<classify> Anyi na-echefu oke ike. <sentiment>",
|
156 |
"clean this text": "<clean> Abin mamaki ne aikin da shugabaZn HNajeriya ybake yi. kCiF 39gaba Tda haRkGa sir! <pcm>",
|
157 |
"diacritize this text: ": "<diacritize> E sun, Alaga, fun ise amalayi ti e n se ni Naijiria. E maa ba a lo, egbon! <yor>",
|
|
|
172 |
"Text Cleaning": "<clean> {} "
|
173 |
}
|
174 |
|
175 |
+
# Language options for diacritize, translation and clean tasks
|
176 |
language_options = {
|
177 |
"select": "",
|
178 |
"Yoruba": "<yor>",
|
179 |
"Hausa": "<hau>",
|
180 |
"Ibo": "<ibo>",
|
181 |
"Pidgin": "<pcm>",
|
182 |
+
# "Efik": "<efi>",
|
183 |
+
# "Urhobo": "<urh>",
|
184 |
"Fulah": "<ful>"
|
185 |
}
|
186 |
|
|
|
206 |
|
207 |
|
208 |
# Text input
|
209 |
+
user_input = st.text_area("Enter text below **(PLEASE, FIRST READ ALL INSTRUCTIONS IN THE SIDEBAR CAREFULLY FOR THE BEST EXPERIENCE)**: ", sample_texts[sample_text])
|
210 |
user_input = instruction_wrap.get(sample_texts.get(user_input, user_input), user_input)
|
211 |
print("Final user input: ", user_input)
|
212 |
if st.button("Generate"):
|
|
|
234 |
output = model.generate(input_ids, **generation_config)
|
235 |
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
|
236 |
|
237 |
+
generated_text = re.sub(r"\|(end_f_text|end_of_text|end_ofext|end_oftext)|:|`", " ", generated_text.split("|end_of_text|")[0])
|
238 |
|
239 |
if task == "Sentiment Classification":
|
240 |
if "negative" in generated_text.lower():
|
|
|
246 |
|
247 |
elif task == "Topic Classification":
|
248 |
generated_text = generated_text.split(" ")[0][:20]
|
249 |
+
elif task == "Translation":
|
250 |
+
n_sentences = len(user_input)
|
251 |
+
generated_text = ".".join(generated_text.split(".")[: n_sentences])
|
252 |
|
253 |
full_output = st.empty()
|
254 |
|