File size: 1,035 Bytes
561e37f b4a47e2 3d3f4ed b4a47e2 561e37f 3d3f4ed 14388f0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 |
import streamlit as st
from transformers import pipeline
import unicodedata
import re
def contains_text(text):
return re.search('[A-Za-z]', text) or re.search('[א-ת]', text)
def normalize(text):
return unicodedata.normalize('NFC', text
).replace('\u05ba', '\u05b9'
).replace('\u05be', '-'
).replace('״', '"'
).replace("׳", "'")
with st.spinner('Loading TaatikNet framework...'):
pipe = pipeline("text2text-generation", model='malper/taatiknet', device_map="auto")
st.success('Loaded!')
text = st.text_area('Enter text and press ctrl/command+enter:')
if text:
words = [normalize(x) for x in text.split() if contains_text(x)]
if len(words) > 0:
outputs = pipe(words, max_length=200, num_beams=5, num_return_sequences=5)
texts = [
' '.join(x['generated_text'] for x in option)
for option in zip(*outputs)
]
st.write(texts[0])
st.write('Other options:')
for option in texts[1:]:
st.write(option) |