Spaces:

mrsk1883
/

AAI-Assessment3

Runtime error

App Files Files Community

mrsk1883 commited on Dec 9, 2023

Commit

e12a777

1 Parent(s): b404b25

Upload 2 files

Browse files

Files changed (2) hide show

app (3).py +67 -0
requirements (3).txt +14 -0

app (3).py ADDED Viewed

	@@ -0,0 +1,67 @@

+import gradio as gr
+from PyPDF2 import PdfReader
+from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
+from gtts import gTTS
+from io import BytesIO
+import re
+import os
+# Load the LED-large model for summarization
+model_name = "pszemraj/led-large-book-summary"
+summarizer = pipeline("summarization", model=model_name, tokenizer=model_name)
+def extract_abstract_and_summarize(pdf_file):
+    try:
+        if pdf_file is None:
+            raise ValueError("PDF file is not provided.")
+        with open(pdf_file, "rb") as file:
+            pdf_reader = PdfReader(file)
+            abstract_text = ""
+            for page_num in range(len(pdf_reader.pages)):
+                page = pdf_reader.pages[page_num]
+                text = page.extract_text()
+                abstract_match = re.search(r"\bAbstract\b", text, re.IGNORECASE)
+                if abstract_match:
+                    start_index = abstract_match.end()
+                    introduction_match = re.search(r"\bIntroduction\b", text[start_index:], re.IGNORECASE)
+                    if introduction_match:
+                        end_index = start_index + introduction_match.start()
+                    else:
+                        end_index = None
+                    abstract_text = text[start_index:end_index]
+                    break
+            # Summarize the extracted abstract using the LED-large model with a specific max_length
+            result = summarizer(abstract_text, max_length=81)
+            # Extract only the first sentence from the summary
+            if result and isinstance(result, list) and len(result) > 0:
+                summary = result[0].get('summary_text', 'Summary not available.')
+                # Extracting the first sentence
+                first_sentence = summary.split('.')[0] + '.'
+            else:
+                first_sentence = "Summary not available."
+            # Generate audio
+            speech = gTTS(text=first_sentence, lang="en")
+            speech_bytes = BytesIO()
+            speech.write_to_fp(speech_bytes)
+            # Return individual output values
+            return first_sentence, speech_bytes.getvalue(), abstract_text.strip()
+    except Exception as e:
+        raise Exception(str(e))
+interface = gr.Interface(
+    fn=extract_abstract_and_summarize,
+    inputs=[gr.File(label="Upload PDF")],
+    outputs=[gr.Textbox(label="Summary"), gr.Audio()],
+    title="PDF Summarization & Audio Generation Tool",
+    description="""PDF Summarization App. This app extracts the abstract from a PDF, summarizes it using the 'pszemraj/led-large-book-summary' model into one sentence summary, and generates an audio of it. Only upload PDFs with abstracts. Example
+    PDF's are given below, and please click on them to see the summarized text and audio generated. Please read the README.MD for more information about the app.""",
+    examples=[[os.path.join(os.path.dirname(__file__), "Article 11 Hidden Technical Debt in Machine Learning Systems.pdf")],[os.path.join(os.path.dirname(__file__), "Article 4 Experimental Evidence on the Productivity Effects of Generative Artificial Intelligence.pdf")]],cache_examples=True,
+)
+interface.launch()

requirements (3).txt ADDED Viewed

	@@ -0,0 +1,14 @@

+gradio
+transformers
+PyPDF2
+gtts
+torch
+numpy
+pytest
+sphinx
+huggingface-hub
+IPython
+torchvision
+torchaudio
+tensorflow
+flax