Spaces:

vishnun
/

SpellCorrectorT5

Runtime error

File size: 1,925 Bytes

5071fce
41faa11
5071fce
6afb4da
a074b00
41faa11
 
403d5d1
d98a25d
a5975a1
41faa11
a5975a1
 
d98a25d
5c031c4
d98a25d
19d6696
5c031c4
5071fce
 
 
41faa11
5071fce
 
 
 
 
 
41faa11
 
 
5071fce
 
aa1d224
 
4a0240d
90eac43
4a0240d
7945e1e
 
 
 
 
 
 
 
 
aa1d224
96d8424

import streamlit as st
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

st.title("SpellCorrectorT5")
st.markdown('SpellCorrectorT5 is a fine-tuned version of **pre-trained t5-small model** modelled on randomly selected 50000 sentences modified by [imputing random noises/errors](./random_noiser.py) and trained using transformers. It not only looks for _spelling errors but also looks for the semantics_ in the sentence and suggest other possible words for the incorrect word.')
ttokenizer = AutoTokenizer.from_pretrained("vishnun/tinygram")
tmodel = AutoModelForSeq2SeqLM.from_pretrained("vishnun/tinygram")
form = st.form("T5-form")

examples = ["I will return it to yu once it is donr",
            "Iu is going to rain",,
            "Wheir do you live?",
            "It wis great mieting with you all"]

input_text = form.selectbox(label="Choose an example",
        options=examples)        
form.write("(or)")
input_text = form.text_input(label='Enter your own sentence', value=input_text)
submit = form.form_submit_button("Submit")

if submit:
  input_ids = ttokenizer.encode(input_text, return_tensors='pt')
  
  # generate text until the output length (which includes the context length) reaches 50
  outputs = tmodel.generate(
    input_ids,
    do_sample=True, 
    max_length=50,
    top_p=0.999, 
    top_k=45,
    num_return_sequences=2
  )
  
  st.subheader("Most probable: ")
  
  for y in outputs:
    
    out_text = ttokenizer.decode(y, skip_special_tokens=True)
    st.success(out_text.capitalize())
    c_text = ""
    for x in out_text.lower().split(" "):
      if x in input_text.lower().split(" "):
        c_text = c_text + x + " "
      else:
        c_text = c_text + '<span style="font-weight:bold; color:rgb(150,255,100);">' + x + '</span>' + " "
        
    ct = c_text.capitalize()
    st.markdown(str(ct), unsafe_allow_html=True)
    st.markdown("***", unsafe_allow_html=True)