Spaces:

VOCALINLP
/

punctuation_and_capitalization_restoration_sanivert

Runtime error

App Files Files Community

jcg00v commited on Mar 2, 2024

Commit

9e95735

verified ·

1 Parent(s): 67037c0

Upload app.py

Browse files

Files changed (1) hide show

app.py +31 -34

app.py CHANGED Viewed

@@ -3,10 +3,9 @@ from PIL import Image
 from transformers import pipeline, AutoModelForTokenClassification, AutoTokenizer
 from streamlit_extras.app_logo import add_logo
 def logo():
-	add_logo("vocali_logo.jpg", height=300)
 def get_result_text_es_pt (list_entity, text, lang):
     result_words = []
@@ -45,7 +44,7 @@ def get_result_text_es_pt (list_entity, text, lang):
                 word = (punc_in + word.capitalize()) if punc_in in ["¿", "¡"] else (word.capitalize() + punc_in)
         if tag != "l":
-            word = '<span style="font-weight:bold; color:rgb(142, 208, 129);">' + word + '</span>''
         if subword == True:
             result_words[-1] = word
@@ -90,6 +89,7 @@ def get_result_text_ca (list_entity, text):
                 word = (punc_in + word) if punc_in in ["¿", "¡"] else (word + punc_in)
             elif tag[-1] == "u":
                 word = (punc_in + word.capitalize()) if punc_in in ["¿", "¡"] else (word.capitalize() + punc_in)
         if tag != "l":
             word = '<span style="font-weight:bold; color:rgb(142, 208, 129);">' + word + '</span>'
@@ -99,44 +99,41 @@ def get_result_text_ca (list_entity, text):
             result_words.append(word)
     return " ".join(result_words)
 if __name__ == "__main__":
-	logo()
-	st.title('Sanivert Punctuation And Capitalization Restoration')
-	model_es = AutoModelForTokenClassification.from_pretrained("VOCALINLP/spanish_capitalization_punctuation_restoration_sanivert")
     tokenizer_es = AutoTokenizer.from_pretrained("VOCALINLP/spanish_capitalization_punctuation_restoration_sanivert")
-	pipe_es = pipeline("token-classification", model=model_es, tokenizer=tokenizer_es)
-	model_ca = ModelForTokenClassification.from_pretrained("VOCALINLP/catalan_capitalization_punctuation_restoration_sanivert")
     tokenizer_ca = AutoTokenizer.from_pretrained("VOCALINLP/catalan_capitalization_punctuation_restoration_sanivert")
-	pipe_ca = pipeline("token-classification", model=model_ca, tokenizer=tokenizer_ca)
-	model_pt = AutoModelForTokenClassification.from_pretrained("VOCALINLP/portuguese_capitalization_punctuation_restoration_sanivert")
     tokenizer_pt = AutoTokenizer.from_pretrained("VOCALINLP/portuguese_capitalization_punctuation_restoration_sanivert")
-	pipe_pt = pipeline("token-classification", model=model_ca, tokenizer=tokenizer_ca)
-	input_text = st.selectbox(
       label = "Choose an language",
       options = ["Spanish", "Portuguese", "Catalan"]
 	)
-	st.subheader("Enter the text to be analyzed.")
-	text = st.text_input('Enter text') #text is stored in this variable
-	if input_text == "Spanish":
-		result_pipe = pipe_es(text)
-		out = get_result_text_es_pt(result_pipe, text, "es")
-	elif input_text == "Portuguese":
-		result_pipe = pipe_pt(text)
-		out = get_result_text_es_pt(result_pipe, text, "pt")
-	elif input_text == "Catalan":
-		result_pipe = pipe_ca(text)
-		out = get_result_text_ca(result_pipe, text)
-	out = get_prediction(text, input_text)
-	st.markdown(out, unsafe_allow_html=True)
-	text = ""

 from transformers import pipeline, AutoModelForTokenClassification, AutoTokenizer
 from streamlit_extras.app_logo import add_logo
 def logo():
+	add_logo("vocali_logo.jpeg", height=300)
 def get_result_text_es_pt (list_entity, text, lang):
     result_words = []
                 word = (punc_in + word.capitalize()) if punc_in in ["¿", "¡"] else (word.capitalize() + punc_in)
         if tag != "l":
+            word = '<span style="font-weight:bold; color:rgb(142, 208, 129);">' + word + '</span>'
         if subword == True:
             result_words[-1] = word
                 word = (punc_in + word) if punc_in in ["¿", "¡"] else (word + punc_in)
             elif tag[-1] == "u":
                 word = (punc_in + word.capitalize()) if punc_in in ["¿", "¡"] else (word.capitalize() + punc_in)
         if tag != "l":
             word = '<span style="font-weight:bold; color:rgb(142, 208, 129);">' + word + '</span>'
             result_words.append(word)
     return " ".join(result_words)
 if __name__ == "__main__":
+    logo()
+    st.title('Sanivert Punctuation And Capitalization Restoration')
+    model_es = AutoModelForTokenClassification.from_pretrained("VOCALINLP/spanish_capitalization_punctuation_restoration_sanivert")
     tokenizer_es = AutoTokenizer.from_pretrained("VOCALINLP/spanish_capitalization_punctuation_restoration_sanivert")
+    pipe_es = pipeline("token-classification", model=model_es, tokenizer=tokenizer_es)
+    model_ca = AutoModelForTokenClassification.from_pretrained("VOCALINLP/catalan_capitalization_punctuation_restoration_sanivert")
     tokenizer_ca = AutoTokenizer.from_pretrained("VOCALINLP/catalan_capitalization_punctuation_restoration_sanivert")
+    pipe_ca = pipeline("token-classification", model=model_ca, tokenizer=tokenizer_ca)
+    model_pt = AutoModelForTokenClassification.from_pretrained("VOCALINLP/portuguese_capitalization_punctuation_restoration_sanivert")
     tokenizer_pt = AutoTokenizer.from_pretrained("VOCALINLP/portuguese_capitalization_punctuation_restoration_sanivert")
+    pipe_pt = pipeline("token-classification", model=model_ca, tokenizer=tokenizer_ca)
+    input_text = st.selectbox(
       label = "Choose an language",
       options = ["Spanish", "Portuguese", "Catalan"]
 	)
+    st.subheader("Enter the text to be analyzed.")
+    text = st.text_input('Enter text') #text is stored in this variable
+    if input_text == "Spanish":
+        result_pipe = pipe_es(text)
+        out = get_result_text_es_pt(result_pipe, text, "es")
+    elif input_text == "Portuguese":
+        result_pipe = pipe_pt(text)
+        out = get_result_text_es_pt(result_pipe, text, "pt")
+    elif input_text == "Catalan":
+        result_pipe = pipe_ca(text)
+        out = get_result_text_ca(result_pipe, text)
+    out = get_prediction(text, input_text)
+    st.markdown(out, unsafe_allow_html=True)
+    text = ""