varl42 commited on
Commit
9fd4051
·
1 Parent(s): a05ebb5

tried to fix end start bug in def summarize_text

Browse files
Files changed (1) hide show
  1. app.py +10 -17
app.py CHANGED
@@ -19,24 +19,17 @@ def extract_text(pdf_file):
19
 
20
 
21
  # Function to summarize text
22
- # Defines a function to summarize the extracted text using pszemraj/led-base-book-summary
23
  def summarize_text(text):
24
- sentences = text.split(". ")
25
-
26
- # Find the start index of the Abstract section
27
- for i, sentence in enumerate(sentences):
28
- if "Abstract" in sentence:
29
- start = i + 1
30
-
31
- # Find the end index (6 sentences after start)
32
- end = start + 6
33
-
34
- if start is not None and end is not None:
35
- # Join the sentences from start to end into the abstract
36
- abstract = ". ".join(sentences[start:end+1])
37
-
38
- else:
39
- return("Abstract section not found")
40
 
41
  # Load BART model & tokenizer
42
  tokenizer = AutoTokenizer.from_pretrained("pszemraj/led-base-book-summary")
 
19
 
20
 
21
  # Function to summarize text
22
+ # Defines a function to summarize the extracted text using facebook/bart-large-cnn
23
  def summarize_text(text):
24
+ sentences = text.split(". ")
25
+ for i, sentence in enumerate(sentences):
26
+ if "Abstract" in sentence:
27
+ start = i + 1
28
+ end = start + 6
29
+ break
30
+ abstract = ". ".join(sentences[start:end+1])
31
+ else:
32
+ return("Abstract section not found")
 
 
 
 
 
 
 
33
 
34
  # Load BART model & tokenizer
35
  tokenizer = AutoTokenizer.from_pretrained("pszemraj/led-base-book-summary")