|
import streamlit as st |
|
from transformers import pipeline |
|
import unicodedata |
|
import re |
|
|
|
def contains_text(text): |
|
return re.search('[A-Za-z]', text) or re.search('[א-ת]', text) |
|
|
|
def normalize(text): |
|
return unicodedata.normalize('NFC', text |
|
).replace('\u05ba', '\u05b9' |
|
).replace('\u05be', '-' |
|
).replace('״', '"' |
|
).replace("׳", "'") |
|
|
|
with st.spinner('Loading TaatikNet framework...'): |
|
pipe = pipeline("text2text-generation", model='malper/taatiknet', device_map="auto") |
|
st.success('Loaded!') |
|
|
|
text = st.text_area('Enter text and press ctrl/command+enter:') |
|
if text: |
|
words = [normalize(x) for x in text.split() if contains_text(x)] |
|
if len(words) > 0: |
|
outputs = pipe(words, max_length=200, num_beams=5, num_return_sequences=5) |
|
texts = [ |
|
' '.join(x['generated_text'] for x in option) |
|
for option in zip(*outputs) |
|
] |
|
st.write(texts[0]) |
|
st.write('Other options:') |
|
for option in texts[1:]: |
|
st.write(option) |