varl42 commited on
Commit
5f6f8b5
1 Parent(s): a9c85ac

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -2
app.py CHANGED
@@ -6,12 +6,14 @@ import numpy
6
  import scipy
7
  from gtts import gTTS
8
  from io import BytesIO
 
9
 
10
  def extract_text(pdf_file):
11
  pdfReader = PyPDF2.PdfReader(pdf_file)
12
  pageObj = pdfReader.pages[0]
13
  return pageObj.extract_text()
14
 
 
15
  def summarize_text(text):
16
  sentences = text.split(". ")
17
  for i, sentence in enumerate(sentences):
@@ -20,8 +22,10 @@ def summarize_text(text):
20
  end = start + 3
21
  break
22
  abstract = ". ".join(sentences[start:end+1])
23
- summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
24
- summary = summarizer(abstract, max_length=30, min_length=30,
 
 
25
  do_sample=False)
26
  return summary[0]['summary_text']
27
 
 
6
  import scipy
7
  from gtts import gTTS
8
  from io import BytesIO
9
+ from transformers import BartTokenizer
10
 
11
  def extract_text(pdf_file):
12
  pdfReader = PyPDF2.PdfReader(pdf_file)
13
  pageObj = pdfReader.pages[0]
14
  return pageObj.extract_text()
15
 
16
+
17
  def summarize_text(text):
18
  sentences = text.split(". ")
19
  for i, sentence in enumerate(sentences):
 
22
  end = start + 3
23
  break
24
  abstract = ". ".join(sentences[start:end+1])
25
+
26
+ tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn")
27
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn", tokenizer=tokenizer)
28
+ summary = summarizer(abstract, max_length=40, min_length=40,
29
  do_sample=False)
30
  return summary[0]['summary_text']
31