Update app.py
Browse files
app.py
CHANGED
@@ -28,6 +28,7 @@ os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
|
28 |
#os.system('pip install -q pytesseract')
|
29 |
#os.system('conda install -c conda-forge poppler')
|
30 |
import streamlit as st
|
|
|
31 |
import torch
|
32 |
from transformers import AutoTokenizer, AutoModelWithLMHead, GPT2LMHeadModel
|
33 |
import docx2txt
|
@@ -51,7 +52,8 @@ import line_cor
|
|
51 |
import altair as alt
|
52 |
#pytesseract.pytesseract.tesseract_cmd = r"./Tesseract-OCR/tesseract.exe"
|
53 |
from PIL import Image
|
54 |
-
|
|
|
55 |
def read_pdf(file):
|
56 |
# images=pdf2image.convert_from_path(file)
|
57 |
# # print(type(images))
|
@@ -85,20 +87,23 @@ def read_pdf(file):
|
|
85 |
# all_page_text += text + " " #page.extractText()
|
86 |
# return all_page_text
|
87 |
st.title("NLP APPLICATION")
|
88 |
-
|
|
|
89 |
def text_analyzer(my_text):
|
90 |
nlp = spacy.load('en_core_web_sm')
|
91 |
docx = nlp(my_text)
|
92 |
# tokens = [ token.text for token in docx]
|
93 |
allData = [('"Token":{},\n"Lemma":{}'.format(token.text,token.lemma_))for token in docx ]
|
94 |
return allData
|
95 |
-
|
|
|
96 |
def load_models():
|
97 |
tokenizer = AutoTokenizer.from_pretrained('gpt2-large')
|
98 |
model = GPT2LMHeadModel.from_pretrained('gpt2-large')
|
99 |
return tokenizer, model
|
100 |
# Function For Extracting Entities
|
101 |
-
|
|
|
102 |
def entity_analyzer(my_text):
|
103 |
nlp = spacy.load('en_core_web_sm')
|
104 |
docx = nlp(my_text)
|
@@ -164,15 +169,20 @@ def main():
|
|
164 |
#img = cv2.imread("scholarly_text.jpg")
|
165 |
text = message
|
166 |
if st.checkbox("Show Named Entities English/Bangla"):
|
|
|
167 |
entity_result = entity_analyzer(text)
|
168 |
st.json(entity_result)
|
|
|
169 |
if st.checkbox("Show Sentiment Analysis for English"):
|
|
|
170 |
blob = TextBlob(text)
|
171 |
result_sentiment = blob.sentiment
|
172 |
st.success(result_sentiment)
|
173 |
if st.checkbox("Spell Corrections for English"):
|
|
|
174 |
st.success(TextBlob(text).correct())
|
175 |
if st.checkbox("Text Generation"):
|
|
|
176 |
tokenizer, model = load_models()
|
177 |
input_ids = tokenizer(text, return_tensors='pt').input_ids
|
178 |
st.text("Using Hugging Face Transformer, Contrastive Search ..")
|
@@ -187,6 +197,7 @@ def main():
|
|
187 |
# st.success(summary_result)
|
188 |
if st.checkbox("Mark to English Text Summarization!"):
|
189 |
#st.title("Summarize Your Text for English only!")
|
|
|
190 |
tokenizer = AutoTokenizer.from_pretrained('t5-base')
|
191 |
model = AutoModelWithLMHead.from_pretrained('t5-base', return_dict=True)
|
192 |
#st.text("Using Google T5 Transformer ..")
|
@@ -198,6 +209,7 @@ def main():
|
|
198 |
summary = tokenizer.decode(summary_ids[0])
|
199 |
st.success(summary)
|
200 |
if st.button("refresh"):
|
|
|
201 |
st.experimental_rerun()
|
202 |
if __name__ == '__main__':
|
203 |
main()
|
|
|
28 |
#os.system('pip install -q pytesseract')
|
29 |
#os.system('conda install -c conda-forge poppler')
|
30 |
import streamlit as st
|
31 |
+
st.set_page_config(page_title="Anomaly_Detection_Tool", layout="wide", initial_sidebar_state="expanded")
|
32 |
import torch
|
33 |
from transformers import AutoTokenizer, AutoModelWithLMHead, GPT2LMHeadModel
|
34 |
import docx2txt
|
|
|
52 |
import altair as alt
|
53 |
#pytesseract.pytesseract.tesseract_cmd = r"./Tesseract-OCR/tesseract.exe"
|
54 |
from PIL import Image
|
55 |
+
#@st.experimental_singleton
|
56 |
+
@st.cache_resource(experimental_allow_widgets=True)
|
57 |
def read_pdf(file):
|
58 |
# images=pdf2image.convert_from_path(file)
|
59 |
# # print(type(images))
|
|
|
87 |
# all_page_text += text + " " #page.extractText()
|
88 |
# return all_page_text
|
89 |
st.title("NLP APPLICATION")
|
90 |
+
#@st.experimental_singleton
|
91 |
+
@st.cache_resource(experimental_allow_widgets=True)
|
92 |
def text_analyzer(my_text):
|
93 |
nlp = spacy.load('en_core_web_sm')
|
94 |
docx = nlp(my_text)
|
95 |
# tokens = [ token.text for token in docx]
|
96 |
allData = [('"Token":{},\n"Lemma":{}'.format(token.text,token.lemma_))for token in docx ]
|
97 |
return allData
|
98 |
+
#@st.experimental_singleton
|
99 |
+
@st.cache_resource(experimental_allow_widgets=True)
|
100 |
def load_models():
|
101 |
tokenizer = AutoTokenizer.from_pretrained('gpt2-large')
|
102 |
model = GPT2LMHeadModel.from_pretrained('gpt2-large')
|
103 |
return tokenizer, model
|
104 |
# Function For Extracting Entities
|
105 |
+
#@st.experimental_singleton
|
106 |
+
@st.cache_resource(experimental_allow_widgets=True)
|
107 |
def entity_analyzer(my_text):
|
108 |
nlp = spacy.load('en_core_web_sm')
|
109 |
docx = nlp(my_text)
|
|
|
169 |
#img = cv2.imread("scholarly_text.jpg")
|
170 |
text = message
|
171 |
if st.checkbox("Show Named Entities English/Bangla"):
|
172 |
+
st.cache_data.clear()
|
173 |
entity_result = entity_analyzer(text)
|
174 |
st.json(entity_result)
|
175 |
+
|
176 |
if st.checkbox("Show Sentiment Analysis for English"):
|
177 |
+
st.cache_data.clear()
|
178 |
blob = TextBlob(text)
|
179 |
result_sentiment = blob.sentiment
|
180 |
st.success(result_sentiment)
|
181 |
if st.checkbox("Spell Corrections for English"):
|
182 |
+
st.cache_data.clear()
|
183 |
st.success(TextBlob(text).correct())
|
184 |
if st.checkbox("Text Generation"):
|
185 |
+
st.cache_data.clear()
|
186 |
tokenizer, model = load_models()
|
187 |
input_ids = tokenizer(text, return_tensors='pt').input_ids
|
188 |
st.text("Using Hugging Face Transformer, Contrastive Search ..")
|
|
|
197 |
# st.success(summary_result)
|
198 |
if st.checkbox("Mark to English Text Summarization!"):
|
199 |
#st.title("Summarize Your Text for English only!")
|
200 |
+
st.cache_data.clear()
|
201 |
tokenizer = AutoTokenizer.from_pretrained('t5-base')
|
202 |
model = AutoModelWithLMHead.from_pretrained('t5-base', return_dict=True)
|
203 |
#st.text("Using Google T5 Transformer ..")
|
|
|
209 |
summary = tokenizer.decode(summary_ids[0])
|
210 |
st.success(summary)
|
211 |
if st.button("refresh"):
|
212 |
+
st.cache_data.clear()
|
213 |
st.experimental_rerun()
|
214 |
if __name__ == '__main__':
|
215 |
main()
|