Soumen commited on
Commit
a07988a
·
1 Parent(s): 7af9178

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -32
app.py CHANGED
@@ -30,7 +30,7 @@ os.environ["TOKENIZERS_PARALLELISM"] = "false"
30
  import streamlit as st
31
  st.set_page_config(page_title="Anomaly_Detection_Tool", layout="wide", initial_sidebar_state="expanded")
32
  import torch
33
- from transformers import AutoTokenizer, AutoModelWithLMHead, GPT2LMHeadModel
34
  import docx2txt
35
  from PIL import Image
36
  from PyPDF2 import PdfFileReader
@@ -52,7 +52,10 @@ import line_cor
52
  import altair as alt
53
  #pytesseract.pytesseract.tesseract_cmd = r"./Tesseract-OCR/tesseract.exe"
54
  from PIL import Image
55
-
 
 
 
56
  def read_pdf(file):
57
  # images=pdf2image.convert_from_path(file)
58
  # # print(type(images))
@@ -110,9 +113,9 @@ def entity_analyzer(my_text):
110
  allData = ['"Token":{},\n"Entities":{}'.format(tokens,entities)]
111
  return allData
112
  def main():
113
- global tokenizer, model
114
- tokenizer = AutoTokenizer.from_pretrained('t5-base')
115
- model = AutoModelWithLMHead.from_pretrained('t5-base', return_dict=True)
116
  """ NLP Based Application with Streamlit """
117
  st.markdown("""
118
  #### Description
@@ -179,10 +182,7 @@ def main():
179
  st.success(result_sentiment)
180
  if st.checkbox("Spell Corrections for English"):
181
  st.success(TextBlob(text).correct())
182
- if st.checkbox("Text Generation"):
183
- API_URL = "https://api-inference.huggingface.co/models/gpt2-large"
184
- headers = {"Authorization": "Bearer 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"}
185
-
186
  def query(payload):
187
  response = requests.post(API_URL, headers=headers, json=payload)
188
  return response.json()
@@ -191,29 +191,12 @@ def main():
191
  "inputs": text,
192
  })
193
  st.success(output)
194
- # tokenizer, model = load_models()
195
- # input_ids = tokenizer(text, return_tensors='pt').input_ids
196
- # st.text("Using Hugging Face Transformer, Contrastive Search ..")
197
- # output = model.generate(input_ids, max_length=128)
198
- #st.success(tokenizer.decode(output[0], skip_special_tokens=True))
199
- # if st.checkbox("Mark here, Text Summarization for English or Bangla!"):
200
- # st.subheader("Summarize Your Text for English and Bangla Texts!")
201
- # message = st.text_area("Enter the Text","Type please ..")
202
- # st.text("Using Gensim Summarizer ..")
203
- # st.success(message)
204
- # summary_result = summarize(text)
205
- # st.success(summary_result)
206
  if st.checkbox("Mark to English Text Summarization!"):
207
- #st.title("Summarize Your Text for English only!")
208
- #st.text("Using Google T5 Transformer ..")
209
- inputs = tokenizer.encode("summarize: " + text,
210
- return_tensors='pt',
211
- max_length= 512,
212
- truncation=True)
213
- summary_ids = model.generate(inputs, max_length=150, min_length=80, length_penalty=5., num_beams=2)
214
- summary = tokenizer.decode(summary_ids[0])
215
- st.success(summary)
216
- #if st.button("refresh"):
217
- # st.experimental_rerun()
218
  if __name__ == '__main__':
219
  main()
 
30
  import streamlit as st
31
  st.set_page_config(page_title="Anomaly_Detection_Tool", layout="wide", initial_sidebar_state="expanded")
32
  import torch
33
+ #from transformers import AutoTokenizer, AutoModelWithLMHead, GPT2LMHeadModel
34
  import docx2txt
35
  from PIL import Image
36
  from PyPDF2 import PdfFileReader
 
52
  import altair as alt
53
  #pytesseract.pytesseract.tesseract_cmd = r"./Tesseract-OCR/tesseract.exe"
54
  from PIL import Image
55
+ API_URL = "https://api-inference.huggingface.co/models/gpt2-large"
56
+ headers = {"Authorization": "Bearer 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"}
57
+ API_URL0 = "https://api-inference.huggingface.co/models/t5-base"
58
+ headers0 = {"Authorization": "Bearer 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"}
59
  def read_pdf(file):
60
  # images=pdf2image.convert_from_path(file)
61
  # # print(type(images))
 
113
  allData = ['"Token":{},\n"Entities":{}'.format(tokens,entities)]
114
  return allData
115
  def main():
116
+ #global tokenizer, model
117
+ #tokenizer = AutoTokenizer.from_pretrained('t5-base')
118
+ #model = AutoModelWithLMHead.from_pretrained('t5-base', return_dict=True)
119
  """ NLP Based Application with Streamlit """
120
  st.markdown("""
121
  #### Description
 
182
  st.success(result_sentiment)
183
  if st.checkbox("Spell Corrections for English"):
184
  st.success(TextBlob(text).correct())
185
+ if st.checkbox("Text Generation"):
 
 
 
186
  def query(payload):
187
  response = requests.post(API_URL, headers=headers, json=payload)
188
  return response.json()
 
191
  "inputs": text,
192
  })
193
  st.success(output)
 
 
 
 
 
 
 
 
 
 
 
 
194
  if st.checkbox("Mark to English Text Summarization!"):
195
+ def query(payload):
196
+ response = requests.post(API_URL0, headers=headers0, json=payload)
197
+ return response.json()
198
+ output = query({
199
+ "inputs": text})
200
+ st.success(output)
 
 
 
 
 
201
  if __name__ == '__main__':
202
  main()