|
""" |
|
## App: NLP App with Streamlit |
|
Credits: Streamlit Team,Marc Skov Madsen(For Awesome-streamlit gallery) |
|
Description |
|
This is a Natural Language Processing(NLP) Based App useful for basic NLP concepts such as follows; |
|
|
|
+ Tokenization & Lemmatization using Spacy |
|
|
|
+ Named Entity Recognition(NER) using SpaCy |
|
|
|
+ Sentiment Analysis using TextBlob |
|
|
|
+ Document/Text Summarization using Gensim/T5 |
|
|
|
This is built with Streamlit Framework, an awesome framework for building ML and NLP tools. |
|
Purpose |
|
To perform basic and useful NLP task with Streamlit, Spacy, Textblob and Gensim |
|
""" |
|
|
|
import streamlit as st |
|
import os |
|
import torch |
|
from transformers import AutoTokenizer, AutoModelWithLMHead |
|
|
|
|
|
from textblob import TextBlob |
|
import spacy |
|
from gensim.summarization import summarize |
|
import requests |
|
import cv2 |
|
import numpy as np |
|
import pytesseract |
|
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe" |
|
from PIL import Image |
|
|
|
tokenizer = AutoTokenizer.from_pretrained('t5-base') |
|
model = AutoModelWithLMHead.from_pretrained('t5-base', return_dict=True) |
|
@st.cache |
|
def text_analyzer(my_text): |
|
nlp = spacy.load('en_core_web_sm') |
|
docx = nlp(my_text) |
|
|
|
allData = [('"Token":{},\n"Lemma":{}'.format(token.text,token.lemma_))for token in docx ] |
|
return allData |
|
|
|
|
|
@st.cache |
|
def entity_analyzer(my_text): |
|
nlp = spacy.load('en_core_web_sm') |
|
docx = nlp(my_text) |
|
tokens = [ token.text for token in docx] |
|
entities = [(entity.text,entity.label_)for entity in docx.ents] |
|
allData = ['"Token":{},\n"Entities":{}'.format(tokens,entities)] |
|
return allData |
|
|
|
|
|
def main(): |
|
""" NLP Based App with Streamlit """ |
|
|
|
|
|
st.title("Streamlit NLP APP") |
|
st.markdown(""" |
|
#### Description |
|
+ This is a Natural Language Processing(NLP) Based App useful for basic NLP task |
|
NER,Sentiment, Spell Corrections and Summarization |
|
""") |
|
|
|
|
|
|
|
if st.checkbox("Show Named Entities"): |
|
st.subheader("Analyze Your Text") |
|
|
|
message = st.text_area("Enter your Text","Typing Here ..") |
|
if st.button("Extract"): |
|
entity_result = entity_analyzer(message) |
|
st.json(entity_result) |
|
|
|
|
|
elif st.checkbox("Show Sentiment Analysis"): |
|
st.subheader("Analyse Your Text") |
|
message = st.text_area("Enter Text plz","Type Here .") |
|
if st.button("Analyze"): |
|
blob = TextBlob(message) |
|
result_sentiment = blob.sentiment |
|
st.success(result_sentiment) |
|
|
|
elif st.checkbox("Spell Corrections"): |
|
st.subheader("Correct Your Text") |
|
message = st.text_area("Enter the Text","Type please ..") |
|
if st.button("Spell Corrections"): |
|
st.text("Using TextBlob ..") |
|
st.success(TextBlob(message).correct()) |
|
def change_photo_state(): |
|
st.session_state["photo"]="done" |
|
st.subheader("Summary section, feed your image!") |
|
camera_photo = st.camera_input("Take a photo", on_change=change_photo_state) |
|
uploaded_photo = st.file_uploader("Upload Image",type=['jpg','png','jpeg'], on_change=change_photo_state) |
|
message = st.text_input("Or, drop your text here!") |
|
if "photo" not in st.session_state: |
|
st.session_state["photo"]="not done" |
|
|
|
if st.session_state["photo"]=="done" or message: |
|
if uploaded_photo: |
|
img = Image.open(uploaded_photo) |
|
img = img.save("img.png") |
|
img = cv2.imread("img.png") |
|
text = pytesseract.image_to_string(img) |
|
st.success(text) |
|
if camera_photo: |
|
img = Image.open(camera_photo) |
|
img = img.save("img.png") |
|
img = cv2.imread("img.png") |
|
text = pytesseract.image_to_string(img) |
|
st.success(text) |
|
if uploaded_photo==None and camera_photo==None: |
|
|
|
|
|
text = message |
|
|
|
if st.checkbox("Show Text Summarization Genism"): |
|
st.subheader("Summarize Your Text") |
|
|
|
st.text("Using Gensim Summarizer ..") |
|
|
|
summary_result = summarize(text) |
|
st.success(summary_result) |
|
|
|
elif st.checkbox("Show Text Summarization T5"): |
|
st.subheader("Summarize Your Text") |
|
|
|
st.text("Using Google T5 Transformer ..") |
|
inputs = tokenizer.encode("summarize: " + text, |
|
return_tensors='pt', |
|
max_length=512, |
|
truncation=True) |
|
summary_ids = model.generate(inputs, max_length=150, min_length=80, length_penalty=5., num_beams=2) |
|
summary = tokenizer.decode(summary_ids[0]) |
|
st.success(summary) |
|
|
|
st.sidebar.subheader("About App") |
|
st.sidebar.subheader("By") |
|
st.sidebar.text("Soumen Sarker") |
|
|
|
if __name__ == '__main__': |
|
main() |
|
|