Spaces:

BeardedMonster
/

SabiYarn_125M

Running

App Files Files Community

BeardedMonster commited on Jul 15, 2024

Commit

cfc543f

verified ·

1 Parent(s): 5c62089

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -7

app.py CHANGED Viewed

@@ -35,7 +35,7 @@ temperature = 0.99
 top_k = 50
 top_p = 0.95
 repetition_penalty = 4.0
-length_penalty = 2.0
 # Create sliders in the sidebar
 max_length = st.sidebar.slider("Max. output length", min_value=10, max_value=500, value=max_length)
@@ -137,7 +137,8 @@ sample_texts = {
     "Efik: Ke eyo ...":"Ke eyo Jesus ye mme mbet esie, etop emi ama ada ifụre ọsọk mme Jew oro esịt okobụn̄ọde ke ntak idiọkido ke Israel, oro ẹkenyụn̄ ẹdude ke mfụhọ ke itie-ufụn mme nsunsu ido edinam Ido Ukpono Mme Jew eke akpa isua ikie.",
     "Tell me a story in pidgin": "Tell me a story Pidgin",
     "who are you?": "who are you?",
-    # "Translate 'how are you?' to Yoruba": "Translate 'how are you?' to Yoruba",
     "Classify the sentiment": "Anyi na-echefu oke ike.",
     "what is the topic of this text": "Africa Free Trade Zone: Kò sí ìdènà láti kó ọjà láti orílẹ̀èdè kan sí òmíràn",
     "diacritize this text: ": "E sun, Alaga, fun ise amalayi ti e n se ni Naijiria. E maa ba a lo, egbon!",
@@ -146,9 +147,11 @@ sample_texts = {
 }
 instruction_wrap = {
     "Tell me a story in pidgin": "<prompt> Tell me a story in pidgin <response>:",
     "Translate 'how are you?' to Yoruba": "<prompt> Translate 'how are you?' to Yoruba <response>:",
     "who are you?": "<prompt> who are you? <response>:",
     "Classify the sentiment" : "<classify> Anyi na-echefu oke ike. <sentiment>",
     "clean this text": "<clean> Abin mamaki ne aikin da shugabaZn HNajeriya ybake yi. kCiF 39gaba Tda haRkGa sir! <pcm>",
     "diacritize this text: ": "<diacritize> E sun, Alaga, fun ise amalayi ti e n se ni Naijiria. E maa ba a lo, egbon!  <yor>",
@@ -169,15 +172,15 @@ task_options = {
     "Text Cleaning": "<clean> {} "
 }
-# Language options for diacritize and clean tasks
 language_options = {
     "select": "",
     "Yoruba": "<yor>",
     "Hausa": "<hau>",
     "Ibo": "<ibo>",
     "Pidgin": "<pcm>",
-    "Efik": "<efi>",
-    "Urhobo": "<urh>",
     "Fulah": "<ful>"
 }
@@ -203,7 +206,7 @@ def wrap_text(text, task_value):
 # Text input
-user_input = st.text_area("Enter text below **(PLEASE, FIRST READ ALL INSTRUCTIONS IN THE SIDEBAR FOR A BETTER EXPERIENCE)**: ", sample_texts[sample_text])
 user_input = instruction_wrap.get(sample_texts.get(user_input, user_input), user_input)
 print("Final user input: ", user_input)
 if st.button("Generate"):
@@ -231,7 +234,7 @@ if st.button("Generate"):
                 output = model.generate(input_ids, **generation_config)
                 generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
-            generated_text  = re.sub(r"\|(end_f_text|end_of_text|end_ofext|end_oftext)", " ", generated_text.split("|end_of_text|")[0])
             if task == "Sentiment Classification":
                 if "negative" in generated_text.lower():
@@ -243,6 +246,9 @@ if st.button("Generate"):
             elif task == "Topic Classification":
                 generated_text  = generated_text.split(" ")[0][:20]
             full_output = st.empty()

 top_k = 50
 top_p = 0.95
 repetition_penalty = 4.0
+length_penalty = 3.0
 # Create sliders in the sidebar
 max_length = st.sidebar.slider("Max. output length", min_value=10, max_value=500, value=max_length)
     "Efik: Ke eyo ...":"Ke eyo Jesus ye mme mbet esie, etop emi ama ada ifụre ọsọk mme Jew oro esịt okobụn̄ọde ke ntak idiọkido ke Israel, oro ẹkenyụn̄ ẹdude ke mfụhọ ke itie-ufụn mme nsunsu ido edinam Ido Ukpono Mme Jew eke akpa isua ikie.",
     "Tell me a story in pidgin": "Tell me a story Pidgin",
     "who are you?": "who are you?",
+    "Speak Yoruba": "Speak Yoruba",
+    "Translate 'Often, all Yoruba children...' to Yoruba": "Often, all Yoruba children take pride in speaking the Yoruba language.",
     "Classify the sentiment": "Anyi na-echefu oke ike.",
     "what is the topic of this text": "Africa Free Trade Zone: Kò sí ìdènà láti kó ọjà láti orílẹ̀èdè kan sí òmíràn",
     "diacritize this text: ": "E sun, Alaga, fun ise amalayi ti e n se ni Naijiria. E maa ba a lo, egbon!",
 }
 instruction_wrap = {
+    "Translate 'Often, all Yoruba children...' to Yoruba":"<translate> Often, all Yoruba children take pride in speaking the Yoruba language. <yor>"
     "Tell me a story in pidgin": "<prompt> Tell me a story in pidgin <response>:",
     "Translate 'how are you?' to Yoruba": "<prompt> Translate 'how are you?' to Yoruba <response>:",
     "who are you?": "<prompt> who are you? <response>:",
+    "Speak Yoruba": "<prompt> Speak Yoruba <response>:",
     "Classify the sentiment" : "<classify> Anyi na-echefu oke ike. <sentiment>",
     "clean this text": "<clean> Abin mamaki ne aikin da shugabaZn HNajeriya ybake yi. kCiF 39gaba Tda haRkGa sir! <pcm>",
     "diacritize this text: ": "<diacritize> E sun, Alaga, fun ise amalayi ti e n se ni Naijiria. E maa ba a lo, egbon!  <yor>",
     "Text Cleaning": "<clean> {} "
 }
+# Language options for diacritize, translation and clean tasks
 language_options = {
     "select": "",
     "Yoruba": "<yor>",
     "Hausa": "<hau>",
     "Ibo": "<ibo>",
     "Pidgin": "<pcm>",
+    # "Efik": "<efi>",
+    # "Urhobo": "<urh>",
     "Fulah": "<ful>"
 }
 # Text input
+user_input = st.text_area("Enter text below **(PLEASE, FIRST READ ALL INSTRUCTIONS IN THE SIDEBAR CAREFULLY FOR THE BEST EXPERIENCE)**: ", sample_texts[sample_text])
 user_input = instruction_wrap.get(sample_texts.get(user_input, user_input), user_input)
 print("Final user input: ", user_input)
 if st.button("Generate"):
                 output = model.generate(input_ids, **generation_config)
                 generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
+            generated_text  = re.sub(r"\|(end_f_text|end_of_text|end_ofext|end_oftext)|:|`", " ", generated_text.split("|end_of_text|")[0])
             if task == "Sentiment Classification":
                 if "negative" in generated_text.lower():
             elif task == "Topic Classification":
                 generated_text  = generated_text.split(" ")[0][:20]
+            elif task == "Translation":
+                n_sentences = len(user_input)
+                generated_text = ".".join(generated_text.split(".")[: n_sentences])
             full_output = st.empty()