Spaces:
Build error
Build error
import streamlit as st | |
from time import sleep | |
from stqdm import stqdm | |
import pandas as pd | |
from transformers import pipeline | |
import json | |
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer | |
def draw_all( | |
key, | |
plot=False, | |
): | |
st.write( | |
""" | |
# NLP Web App | |
This Natural Language Processing Based Web App can do anything u can imagine with Text. 😱 | |
This App is built using pretrained transformers which are capable of doing wonders with the Textual data. | |
```python | |
# Key Features of this App. | |
1. Advanced Text Summarizer | |
2. Key Word Extractor | |
3. Question Answering | |
4. Question Generation | |
``` | |
""" | |
) | |
with st.sidebar: | |
draw_all("sidebar") | |
#main function that holds all the options | |
def main(): | |
st.title("NLP IE Web App") | |
menu = ["--Select--","Summarizer", | |
"Keyword Extractor","Question Answering","Question Generation"] | |
choice = st.sidebar.selectbox("What task would you like to do?", menu) | |
if choice=="--Select--": | |
st.write(""" | |
Welcome to the the Web App of Data Dynamos. As an IE student of the Master of Business Analyitics and Big Data you have the opportunity to | |
do anything with your lectures you like | |
""") | |
st.write(""" | |
Never heard of NLP? No way! Natural Language Processing (NLP) is a computational technique | |
to process human language in all of it's complexity | |
""") | |
st.write(""" | |
NLP is an vital discipline in Artificial Intelligence and keeps growing | |
""") | |
st.image('banner_image.jpg') | |
elif choice=="Summarizer": | |
st.subheader("Text Summarization") | |
st.write(" Enter the Text you want to summarize !") | |
raw_text = st.text_area("Your Text","Enter Your Text Here") | |
num_words = st.number_input("Enter Number of Words in Summary") | |
if raw_text!="" and num_words is not None: | |
num_words = int(num_words) | |
summarizer = pipeline('summarization') | |
summary = summarizer(raw_text, min_length=num_words,max_length=50) | |
s1 = json.dumps(summary[0]) | |
d2 = json.loads(s1) | |
result_summary = d2['summary_text'] | |
result_summary = '. '.join(list(map(lambda x: x.strip().capitalize(), result_summary.split('.')))) | |
st.write(f"Here's your Summary : {result_summary}") | |
elif choice=="Keyword Extractor": | |
st.subheader("Keyword Extraction") | |
#loading the pipeline | |
model_name = "yanekyuk/bert-uncased-keyword-extractor" | |
keyword_extractor = pipeline("text2text-generation", model=model_name, tokenizer=model_name) | |
input_text = st.text_area("Enter some text:") | |
if st.button("Extract Keywords"): | |
# Extract keywords using the model | |
keywords = keyword_extractor(input_text, max_length=20, do_sample=False)[0]["generated_text"] | |
# Display the extracted keywords | |
st.write("Keywords:", keywords) | |
elif choice=="Question Answering": | |
st.subheader("Question Answering") | |
st.write(" Enter the Context and ask the Question to find out the Answer !") | |
question_answering = pipeline("question-answering", model = "distilbert-base-cased-distilled-squad") | |
context = st.text_area("Context","Enter the Context Here") | |
#This is the text box for the question | |
question = st.text_area("Your Question","Enter your Question Here") | |
if context !="Enter Text Here" and question!="Enter your Question Here": | |
#we are passing question and the context | |
result = question_answering(question=question, context=context) | |
#dump the result in json and load it again | |
s1 = json.dumps(result) | |
d2 = json.loads(s1) | |
generated_text = d2['answer'] | |
#joining and capalizing by dot | |
generated_text = '. '.join(list(map(lambda x: x.strip().capitalize(), generated_text.split('.')))) | |
st.write(f" Here's your Answer :\n {generated_text}") | |
elif choice=="Question Generation": | |
st.subheader("Question Generation") | |
st.write(" Enter the text to get questions generated !") | |
# Load the T5 model and tokenizer | |
model = AutoModelForSeq2SeqLM.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap") | |
tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap") | |
text_input2 = st.text_area("Your Text","Enter the Text to complete") | |
# Create a button to generate questions | |
if st.button("Generate Questions"): | |
#Encode the input text using the tokenizer | |
input_ids = tokenizer.encode("generate questions: " + input_text, return_tensors="pt") | |
# Use the T5 model to generate questions | |
question_ids = model.generate(input_ids) | |
# Decode the questions from the output ids using the tokenizer | |
questions = tokenizer.decode(question_ids[0], skip_special_tokens=True) | |
# Display the questions to the user | |
st.write("Generated Questions:") | |
st.write(questions) | |
#main function to run | |
if __name__ == '__main__': | |
main() | |