vishnun's picture
Update app.py
204b010
raw
history blame
1.93 kB
import streamlit as st
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
st.title("SpellCorrectorT5")
st.markdown('SpellCorrectorT5 is a fine-tuned version of **pre-trained t5-small model** modelled on randomly selected 50000 sentences modified by [imputing random noises/errors](./random_noiser.py) and trained using transformers. It not only looks for _spelling errors but also looks for the semantics_ in the sentence and suggest other possible words for the incorrect word.')
m_name = "vishnun/tinygram"
ttokenizer = AutoTokenizer.from_pretrained(m_name)
tmodel = AutoModelForSeq2SeqLM.from_pretrained(m_name)
form = st.form("T5-form")
examples = ["I will return it to yu once it is donr",
"Iu is going to rain",
"Wheir do you live?",
"It wis great mieting with you all"]
input_text = form.selectbox(label="Choose an example",
options=examples)
form.write("(or)")
input_text = form.text_input(label='Enter your own sentence', value=input_text)
submit = form.form_submit_button("Submit")
if submit:
input_ids = ttokenizer.encode(input_text, return_tensors='pt')
# generate text until the output length (which includes the context length) reaches 50
outputs = tmodel.generate(
input_ids,
do_sample=True,
max_length=50,
top_p=0.999,
top_k=45,
num_return_sequences=2
)
st.subheader("Most probable: ")
for y in outputs:
out_text = ttokenizer.decode(y, skip_special_tokens=True)
st.success(out_text.capitalize())
c_text = ""
for x in out_text.lower().split(" "):
if x in input_text.lower().split(" "):
c_text = c_text + x + " "
else:
c_text = c_text + '<span style="font-weight:bold; color:rgb(150,255,100);">' + x + '</span>' + " "
ct = c_text.capitalize()
st.markdown(str(ct), unsafe_allow_html=True)
st.markdown("***", unsafe_allow_html=True)