Spaces:

varl42
/

audio_abstract42

Running

varl42 commited on Dec 9, 2023

Commit

a05ebb5

1 Parent(s): 5e43307

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -19,20 +19,24 @@ def extract_text(pdf_file):
 # Function to summarize text
-# Defines a function to summarize the extracted text using facebook/bart-large-cnn
 def summarize_text(text):
-    sentences = text.split(". ")
-    for i, sentence in enumerate(sentences):
-        if "Abstract" in sentence:
-            start = i + 1
-            end = start + 6
-            break
-    if start is not None and end is not None:
-        abstract = ". ".join(sentences[start:end+1])
-        #print(abstract)
-    else:                                            #if the Abstract is not found
-        return("Abstract section not found")
   # Load BART model & tokenizer
     tokenizer = AutoTokenizer.from_pretrained("pszemraj/led-base-book-summary")

 # Function to summarize text
+# Defines a function to summarize the extracted text using pszemraj/led-base-book-summary
 def summarize_text(text):
+      sentences = text.split(". ")
+      # Find the start index of the Abstract section
+      for i, sentence in enumerate(sentences):
+          if "Abstract" in sentence:
+              start = i + 1
+      # Find the end index (6 sentences after start)
+      end = start + 6
+      if start is not None and end is not None:
+          # Join the sentences from start to end into the abstract
+          abstract = ". ".join(sentences[start:end+1])
+      else:
+          return("Abstract section not found")
   # Load BART model & tokenizer
     tokenizer = AutoTokenizer.from_pretrained("pszemraj/led-base-book-summary")