data_dynamos / app.py
domro11's picture
Update app.py
971925a
raw
history blame
5.57 kB
import streamlit as st
from time import sleep
from stqdm import stqdm
import pandas as pd
from transformers import pipeline
import json
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
def draw_all(
key,
plot=False,
):
st.write(
"""
# NLP Web App
This Natural Language Processing Based Web App can do anything u can imagine with Text. 😱
This App is built using pretrained transformers which are capable of doing wonders with the Textual data.
```python
# Key Features of this App.
1. Advanced Text Summarizer
2. Key Word Extractor
3. Question Answering
4. Question Generation
```
"""
)
with st.sidebar:
draw_all("sidebar")
#main function that holds all the options
def main():
st.title("NLP IE Web App")
menu = ["--Select--","Summarizer",
"Keyword Extractor","Question Answering","Question Generation"]
choice = st.sidebar.selectbox("What task would you like to do?", menu)
if choice=="--Select--":
st.write("""
Welcome to the the Web App of Data Dynamos. As an IE student of the Master of Business Analyitics and Big Data you have the opportunity to
do anything with your lectures you like
""")
st.write("""
Never heard of NLP? No way! Natural Language Processing (NLP) is a computational technique
to process human language in all of it's complexity
""")
st.write("""
NLP is an vital discipline in Artificial Intelligence and keeps growing
""")
st.image('banner_image.jpg')
elif choice=="Summarizer":
st.subheader("Text Summarization")
st.write(" Enter the Text you want to summarize !")
raw_text = st.text_area("Your Text","Enter Your Text Here")
num_words = st.number_input("Enter Number of Words in Summary")
if raw_text!="" and num_words is not None:
num_words = int(num_words)
summarizer = pipeline('summarization')
summary = summarizer(raw_text, min_length=num_words,max_length=50)
s1 = json.dumps(summary[0])
d2 = json.loads(s1)
result_summary = d2['summary_text']
result_summary = '. '.join(list(map(lambda x: x.strip().capitalize(), result_summary.split('.'))))
st.write(f"Here's your Summary : {result_summary}")
elif choice=="Keyword Extractor":
st.subheader("Keyword Extraction")
#loading the pipeline
model_name = "yanekyuk/bert-uncased-keyword-extractor"
keyword_extractor = pipeline("text2text-generation", model=model_name, tokenizer=model_name)
input_text = st.text_area("Enter some text:")
if st.button("Extract Keywords"):
# Extract keywords using the model
keywords = keyword_extractor(input_text, max_length=20, do_sample=False)[0]["generated_text"]
# Display the extracted keywords
st.write("Keywords:", keywords)
elif choice=="Question Answering":
st.subheader("Question Answering")
st.write(" Enter the Context and ask the Question to find out the Answer !")
question_answering = pipeline("question-answering", model = "distilbert-base-cased-distilled-squad")
context = st.text_area("Context","Enter the Context Here")
#This is the text box for the question
question = st.text_area("Your Question","Enter your Question Here")
if context !="Enter Text Here" and question!="Enter your Question Here":
#we are passing question and the context
result = question_answering(question=question, context=context)
#dump the result in json and load it again
s1 = json.dumps(result)
d2 = json.loads(s1)
generated_text = d2['answer']
#joining and capalizing by dot
generated_text = '. '.join(list(map(lambda x: x.strip().capitalize(), generated_text.split('.'))))
st.write(f" Here's your Answer :\n {generated_text}")
elif choice=="Question Generation":
st.subheader("Question Generation")
st.write(" Enter the text to get questions generated !")
# Load the T5 model and tokenizer
model = AutoModelForSeq2SeqLM.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap")
tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap")
text_input2 = st.text_area("Your Text","Enter the Text to complete")
# Create a button to generate questions
if st.button("Generate Questions"):
#Encode the input text using the tokenizer
input_ids = tokenizer.encode("generate questions: " + input_text, return_tensors="pt")
# Use the T5 model to generate questions
question_ids = model.generate(input_ids)
# Decode the questions from the output ids using the tokenizer
questions = tokenizer.decode(question_ids[0], skip_special_tokens=True)
# Display the questions to the user
st.write("Generated Questions:")
st.write(questions)
#main function to run
if __name__ == '__main__':
main()