Update app.py
Browse files
app.py
CHANGED
@@ -30,7 +30,7 @@ os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
|
30 |
import streamlit as st
|
31 |
st.set_page_config(page_title="Anomaly_Detection_Tool", layout="wide", initial_sidebar_state="expanded")
|
32 |
import torch
|
33 |
-
from transformers import AutoTokenizer, AutoModelWithLMHead, GPT2LMHeadModel
|
34 |
import docx2txt
|
35 |
from PIL import Image
|
36 |
from PyPDF2 import PdfFileReader
|
@@ -52,7 +52,10 @@ import line_cor
|
|
52 |
import altair as alt
|
53 |
#pytesseract.pytesseract.tesseract_cmd = r"./Tesseract-OCR/tesseract.exe"
|
54 |
from PIL import Image
|
55 |
-
|
|
|
|
|
|
|
56 |
def read_pdf(file):
|
57 |
# images=pdf2image.convert_from_path(file)
|
58 |
# # print(type(images))
|
@@ -110,9 +113,9 @@ def entity_analyzer(my_text):
|
|
110 |
allData = ['"Token":{},\n"Entities":{}'.format(tokens,entities)]
|
111 |
return allData
|
112 |
def main():
|
113 |
-
global tokenizer, model
|
114 |
-
tokenizer = AutoTokenizer.from_pretrained('t5-base')
|
115 |
-
model = AutoModelWithLMHead.from_pretrained('t5-base', return_dict=True)
|
116 |
""" NLP Based Application with Streamlit """
|
117 |
st.markdown("""
|
118 |
#### Description
|
@@ -179,10 +182,7 @@ def main():
|
|
179 |
st.success(result_sentiment)
|
180 |
if st.checkbox("Spell Corrections for English"):
|
181 |
st.success(TextBlob(text).correct())
|
182 |
-
if st.checkbox("Text Generation"):
|
183 |
-
API_URL = "https://api-inference.huggingface.co/models/gpt2-large"
|
184 |
-
headers = {"Authorization": "Bearer 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"}
|
185 |
-
|
186 |
def query(payload):
|
187 |
response = requests.post(API_URL, headers=headers, json=payload)
|
188 |
return response.json()
|
@@ -191,29 +191,12 @@ def main():
|
|
191 |
"inputs": text,
|
192 |
})
|
193 |
st.success(output)
|
194 |
-
# tokenizer, model = load_models()
|
195 |
-
# input_ids = tokenizer(text, return_tensors='pt').input_ids
|
196 |
-
# st.text("Using Hugging Face Transformer, Contrastive Search ..")
|
197 |
-
# output = model.generate(input_ids, max_length=128)
|
198 |
-
#st.success(tokenizer.decode(output[0], skip_special_tokens=True))
|
199 |
-
# if st.checkbox("Mark here, Text Summarization for English or Bangla!"):
|
200 |
-
# st.subheader("Summarize Your Text for English and Bangla Texts!")
|
201 |
-
# message = st.text_area("Enter the Text","Type please ..")
|
202 |
-
# st.text("Using Gensim Summarizer ..")
|
203 |
-
# st.success(message)
|
204 |
-
# summary_result = summarize(text)
|
205 |
-
# st.success(summary_result)
|
206 |
if st.checkbox("Mark to English Text Summarization!"):
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
summary_ids = model.generate(inputs, max_length=150, min_length=80, length_penalty=5., num_beams=2)
|
214 |
-
summary = tokenizer.decode(summary_ids[0])
|
215 |
-
st.success(summary)
|
216 |
-
#if st.button("refresh"):
|
217 |
-
# st.experimental_rerun()
|
218 |
if __name__ == '__main__':
|
219 |
main()
|
|
|
30 |
import streamlit as st
|
31 |
st.set_page_config(page_title="Anomaly_Detection_Tool", layout="wide", initial_sidebar_state="expanded")
|
32 |
import torch
|
33 |
+
#from transformers import AutoTokenizer, AutoModelWithLMHead, GPT2LMHeadModel
|
34 |
import docx2txt
|
35 |
from PIL import Image
|
36 |
from PyPDF2 import PdfFileReader
|
|
|
52 |
import altair as alt
|
53 |
#pytesseract.pytesseract.tesseract_cmd = r"./Tesseract-OCR/tesseract.exe"
|
54 |
from PIL import Image
|
55 |
+
API_URL = "https://api-inference.huggingface.co/models/gpt2-large"
|
56 |
+
headers = {"Authorization": "Bearer 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"}
|
57 |
+
API_URL0 = "https://api-inference.huggingface.co/models/t5-base"
|
58 |
+
headers0 = {"Authorization": "Bearer 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"}
|
59 |
def read_pdf(file):
|
60 |
# images=pdf2image.convert_from_path(file)
|
61 |
# # print(type(images))
|
|
|
113 |
allData = ['"Token":{},\n"Entities":{}'.format(tokens,entities)]
|
114 |
return allData
|
115 |
def main():
|
116 |
+
#global tokenizer, model
|
117 |
+
#tokenizer = AutoTokenizer.from_pretrained('t5-base')
|
118 |
+
#model = AutoModelWithLMHead.from_pretrained('t5-base', return_dict=True)
|
119 |
""" NLP Based Application with Streamlit """
|
120 |
st.markdown("""
|
121 |
#### Description
|
|
|
182 |
st.success(result_sentiment)
|
183 |
if st.checkbox("Spell Corrections for English"):
|
184 |
st.success(TextBlob(text).correct())
|
185 |
+
if st.checkbox("Text Generation"):
|
|
|
|
|
|
|
186 |
def query(payload):
|
187 |
response = requests.post(API_URL, headers=headers, json=payload)
|
188 |
return response.json()
|
|
|
191 |
"inputs": text,
|
192 |
})
|
193 |
st.success(output)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
194 |
if st.checkbox("Mark to English Text Summarization!"):
|
195 |
+
def query(payload):
|
196 |
+
response = requests.post(API_URL0, headers=headers0, json=payload)
|
197 |
+
return response.json()
|
198 |
+
output = query({
|
199 |
+
"inputs": text})
|
200 |
+
st.success(output)
|
|
|
|
|
|
|
|
|
|
|
201 |
if __name__ == '__main__':
|
202 |
main()
|