Update app.py
Browse files
app.py
CHANGED
@@ -183,12 +183,11 @@ def footer():
|
|
183 |
|
184 |
myargs = [
|
185 |
"Made in ",
|
186 |
-
"<img src='data:image/jpg;base64,{}' class='img-fluid' width='
|
187 |
img_to_bytes(str(logo_path) + "/vocali_logo.jpg")
|
188 |
),
|
189 |
-
link("https://vocali.net/", "VÓCALI"),
|
190 |
" with funding ",
|
191 |
-
"<img src='data:image/png;base64,{}' class='img-fluid' width='
|
192 |
img_to_bytes(str(funding_path) + "/logo_funding.png")
|
193 |
),
|
194 |
br(),
|
@@ -203,6 +202,8 @@ if __name__ == "__main__":
|
|
203 |
st.session_state.text = ""
|
204 |
|
205 |
st.title('Sanivert Punctuation And Capitalization Restoration')
|
|
|
|
|
206 |
model_es = AutoModelForTokenClassification.from_pretrained("VOCALINLP/spanish_capitalization_punctuation_restoration_sanivert")
|
207 |
tokenizer_es = AutoTokenizer.from_pretrained("VOCALINLP/spanish_capitalization_punctuation_restoration_sanivert")
|
208 |
pipe_es = pipeline("token-classification", model=model_es, tokenizer=tokenizer_es)
|
@@ -215,7 +216,31 @@ if __name__ == "__main__":
|
|
215 |
tokenizer_pt = AutoTokenizer.from_pretrained("VOCALINLP/portuguese_capitalization_punctuation_restoration_sanivert")
|
216 |
pipe_pt = pipeline("token-classification", model=model_pt, tokenizer=tokenizer_pt)
|
217 |
|
|
|
218 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
219 |
input_text = st.selectbox(
|
220 |
label = "Choose an language",
|
221 |
options = ["Spanish", "Portuguese", "Catalan"]
|
|
|
183 |
|
184 |
myargs = [
|
185 |
"Made in ",
|
186 |
+
"<img src='data:image/jpg;base64,{}' class='img-fluid' width='100' height='100'>".format(
|
187 |
img_to_bytes(str(logo_path) + "/vocali_logo.jpg")
|
188 |
),
|
|
|
189 |
" with funding ",
|
190 |
+
"<img src='data:image/png;base64,{}' class='img-fluid' width='350' height='100'>".format(
|
191 |
img_to_bytes(str(funding_path) + "/logo_funding.png")
|
192 |
),
|
193 |
br(),
|
|
|
202 |
st.session_state.text = ""
|
203 |
|
204 |
st.title('Sanivert Punctuation And Capitalization Restoration')
|
205 |
+
st.markdown("The model restores the following punctuation -- [? ! , . :] and also the capitalization of words.")
|
206 |
+
|
207 |
model_es = AutoModelForTokenClassification.from_pretrained("VOCALINLP/spanish_capitalization_punctuation_restoration_sanivert")
|
208 |
tokenizer_es = AutoTokenizer.from_pretrained("VOCALINLP/spanish_capitalization_punctuation_restoration_sanivert")
|
209 |
pipe_es = pipeline("token-classification", model=model_es, tokenizer=tokenizer_es)
|
|
|
216 |
tokenizer_pt = AutoTokenizer.from_pretrained("VOCALINLP/portuguese_capitalization_punctuation_restoration_sanivert")
|
217 |
pipe_pt = pipeline("token-classification", model=model_pt, tokenizer=tokenizer_pt)
|
218 |
|
219 |
+
st.subheader('Text examples in Spanish')
|
220 |
|
221 |
+
data_spanish = [['has tenido alguna enfermedad en la última semana', '¿Has tenido alguna enfermedad en la última semana?'],
|
222 |
+
['sufre la enfermedad de parkinson', 'Sugre la enfermedad de Parkinson'],
|
223 |
+
['el paciente presenta los siguientes síntomas náuseas vértigo disnea fiebre y dolor abdominal', 'El paciente presenta los siguientes síntomas: náuseas, vértigo, disnea, fiebre y dolor abdominal.']]
|
224 |
+
|
225 |
+
st.table(pd.DataFrame(data_spanish, columns=['Input', 'Output']))
|
226 |
+
|
227 |
+
st.subheader('Text examples in Catalan')
|
228 |
+
|
229 |
+
data = [['has tingut alguna malaltia a la darrera setmana', 'Has tingut alguna malaltia a la darrera setmana?'],
|
230 |
+
['pateix la malaltia de parkinson', 'Pateix la malaltia de Parkinson.'],
|
231 |
+
["pacient presenta els següents símptomes nàusees vertigen dispnea febre i dolor abdominal", "Pacient presenta els següents símptomes: nàusees, vertigen, dispnea, febre i dolor abdominal."]]
|
232 |
+
|
233 |
+
st.table(pd.DataFrame(data, columns=['Input', 'Output']))
|
234 |
+
|
235 |
+
st.subheader('Text examples in Portuguese')
|
236 |
+
|
237 |
+
data_pt = [['sofre da doença de parkinson', 'Sofre da doença de parkinson?'],
|
238 |
+
['teve alguma doença na última semana', 'Teve alguma doença na última semana?'],
|
239 |
+
['o doente apresenta os seguintes sintomas náuseas vertigens dispneia febre e dor abdominal', 'O doente apresenta os seguintes sintomas: náuseas, vertigens, dispneia, febre e dor abdominal.']]
|
240 |
+
|
241 |
+
st.table(pd.DataFrame(data_pt, columns=['Input', 'Output']))
|
242 |
+
|
243 |
+
|
244 |
input_text = st.selectbox(
|
245 |
label = "Choose an language",
|
246 |
options = ["Spanish", "Portuguese", "Catalan"]
|