File size: 1,925 Bytes
5071fce
41faa11
5071fce
6afb4da
a074b00
41faa11
 
403d5d1
d98a25d
a5975a1
41faa11
a5975a1
 
d98a25d
5c031c4
d98a25d
19d6696
5c031c4
5071fce
 
 
41faa11
5071fce
 
 
 
 
 
41faa11
 
 
5071fce
 
aa1d224
 
4a0240d
90eac43
4a0240d
7945e1e
 
 
 
 
 
 
 
 
aa1d224
96d8424
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import streamlit as st
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

st.title("SpellCorrectorT5")
st.markdown('SpellCorrectorT5 is a fine-tuned version of **pre-trained t5-small model** modelled on randomly selected 50000 sentences modified by [imputing random noises/errors](./random_noiser.py) and trained using transformers. It not only looks for _spelling errors but also looks for the semantics_ in the sentence and suggest other possible words for the incorrect word.')
ttokenizer = AutoTokenizer.from_pretrained("vishnun/tinygram")
tmodel = AutoModelForSeq2SeqLM.from_pretrained("vishnun/tinygram")
form = st.form("T5-form")

examples = ["I will return it to yu once it is donr",
            "Iu is going to rain",,
            "Wheir do you live?",
            "It wis great mieting with you all"]

input_text = form.selectbox(label="Choose an example",
        options=examples)        
form.write("(or)")
input_text = form.text_input(label='Enter your own sentence', value=input_text)
submit = form.form_submit_button("Submit")

if submit:
  input_ids = ttokenizer.encode(input_text, return_tensors='pt')
  
  # generate text until the output length (which includes the context length) reaches 50
  outputs = tmodel.generate(
    input_ids,
    do_sample=True, 
    max_length=50,
    top_p=0.999, 
    top_k=45,
    num_return_sequences=2
  )
  
  st.subheader("Most probable: ")
  
  for y in outputs:
    
    out_text = ttokenizer.decode(y, skip_special_tokens=True)
    st.success(out_text.capitalize())
    c_text = ""
    for x in out_text.lower().split(" "):
      if x in input_text.lower().split(" "):
        c_text = c_text + x + " "
      else:
        c_text = c_text + '<span style="font-weight:bold; color:rgb(150,255,100);">' + x + '</span>' + " "
        
    ct = c_text.capitalize()
    st.markdown(str(ct), unsafe_allow_html=True)
    st.markdown("***", unsafe_allow_html=True)