Spaces:

Soumen
/

Text-Summarization-and-NLP-tasks

Running

App Files Files Community

Soumen commited on Sep 6, 2023

Commit

a07988a

1 Parent(s): 7af9178

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -32

app.py CHANGED Viewed

@@ -30,7 +30,7 @@ os.environ["TOKENIZERS_PARALLELISM"] = "false"
 import streamlit as st
 st.set_page_config(page_title="Anomaly_Detection_Tool", layout="wide", initial_sidebar_state="expanded")
 import torch
-from transformers import AutoTokenizer, AutoModelWithLMHead, GPT2LMHeadModel
 import docx2txt
 from PIL import Image
 from PyPDF2 import PdfFileReader
@@ -52,7 +52,10 @@ import line_cor
 import altair as alt
 #pytesseract.pytesseract.tesseract_cmd = r"./Tesseract-OCR/tesseract.exe"
 from PIL import Image
 def read_pdf(file):
 #     images=pdf2image.convert_from_path(file)
 #     # print(type(images))
@@ -110,9 +113,9 @@ def entity_analyzer(my_text):
 	allData = ['"Token":{},\n"Entities":{}'.format(tokens,entities)]
 	return allData
 def main():
-    global tokenizer, model
-    tokenizer = AutoTokenizer.from_pretrained('t5-base')
-    model = AutoModelWithLMHead.from_pretrained('t5-base', return_dict=True)
     """ NLP Based Application with Streamlit """
     st.markdown("""
     	#### Description
@@ -179,10 +182,7 @@ def main():
             st.success(result_sentiment)
         if st.checkbox("Spell Corrections for English"):
             st.success(TextBlob(text).correct())
-        if st.checkbox("Text Generation"):
-            API_URL = "https://api-inference.huggingface.co/models/gpt2-large"
-            headers = {"Authorization": "Bearer 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"}
             def query(payload):
             	response = requests.post(API_URL, headers=headers, json=payload)
             	return response.json()
@@ -191,29 +191,12 @@ def main():
             	"inputs": text,
             })
             st.success(output)
-            # tokenizer, model = load_models()
-            # input_ids = tokenizer(text, return_tensors='pt').input_ids
-            # st.text("Using Hugging Face Transformer, Contrastive Search ..")
-            # output = model.generate(input_ids, max_length=128)
-            #st.success(tokenizer.decode(output[0], skip_special_tokens=True))
-        # if st.checkbox("Mark here, Text Summarization for English or Bangla!"):
-        #     st.subheader("Summarize Your Text for English and Bangla Texts!")
-        #     message = st.text_area("Enter the Text","Type please ..")
-        #     st.text("Using Gensim Summarizer ..")
-        #     st.success(message)
-        #     summary_result = summarize(text)
-        #     st.success(summary_result)
         if st.checkbox("Mark to English Text Summarization!"):
-			#st.title("Summarize Your Text for English only!")
-			#st.text("Using Google T5 Transformer ..")
-            inputs = tokenizer.encode("summarize: " + text,
-						return_tensors='pt',
-										max_length= 512,
-										truncation=True)
-            summary_ids = model.generate(inputs, max_length=150, min_length=80, length_penalty=5., num_beams=2)
-            summary = tokenizer.decode(summary_ids[0])
-            st.success(summary)
-    #if st.button("refresh"):
-      # st.experimental_rerun()
 if __name__ == '__main__':
     main()

 import streamlit as st
 st.set_page_config(page_title="Anomaly_Detection_Tool", layout="wide", initial_sidebar_state="expanded")
 import torch
+#from transformers import AutoTokenizer, AutoModelWithLMHead, GPT2LMHeadModel
 import docx2txt
 from PIL import Image
 from PyPDF2 import PdfFileReader
 import altair as alt
 #pytesseract.pytesseract.tesseract_cmd = r"./Tesseract-OCR/tesseract.exe"
 from PIL import Image
+API_URL = "https://api-inference.huggingface.co/models/gpt2-large"
+headers = {"Authorization": "Bearer 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"}
+API_URL0 = "https://api-inference.huggingface.co/models/t5-base"
+headers0 = {"Authorization": "Bearer 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"}
 def read_pdf(file):
 #     images=pdf2image.convert_from_path(file)
 #     # print(type(images))
 	allData = ['"Token":{},\n"Entities":{}'.format(tokens,entities)]
 	return allData
 def main():
+    #global tokenizer, model
+    #tokenizer = AutoTokenizer.from_pretrained('t5-base')
+    #model = AutoModelWithLMHead.from_pretrained('t5-base', return_dict=True)
     """ NLP Based Application with Streamlit """
     st.markdown("""
     	#### Description
             st.success(result_sentiment)
         if st.checkbox("Spell Corrections for English"):
             st.success(TextBlob(text).correct())
+        if st.checkbox("Text Generation"):
             def query(payload):
             	response = requests.post(API_URL, headers=headers, json=payload)
             	return response.json()
             	"inputs": text,
             })
             st.success(output)
         if st.checkbox("Mark to English Text Summarization!"):
+            def query(payload):
+                response = requests.post(API_URL0, headers=headers0, json=payload)
+                return response.json()
+            output = query({
+                "inputs": text})
+            st.success(output)
 if __name__ == '__main__':
     main()