Ari commited on
Commit
b74e4b8
·
verified ·
1 Parent(s): 9d0e6a8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -1
app.py CHANGED
@@ -9,6 +9,7 @@ from docx import Document
9
  from reportlab.lib.pagesizes import letter
10
  from reportlab.pdfgen import canvas
11
 
 
12
  nltk.download('punkt')
13
 
14
  tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
@@ -16,7 +17,7 @@ model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn")
16
 
17
  # Function to split text into chunks
18
  def split_text(text, max_chunk_size=1024):
19
- sentences = nltk.sent_tokenize(text)
20
  chunks = []
21
  chunk = ""
22
 
 
9
  from reportlab.lib.pagesizes import letter
10
  from reportlab.pdfgen import canvas
11
 
12
+ # Ensure that the punkt tokenizer is downloaded
13
  nltk.download('punkt')
14
 
15
  tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
 
17
 
18
  # Function to split text into chunks
19
  def split_text(text, max_chunk_size=1024):
20
+ sentences = nltk.sent_tokenize(text) # Use NLTK's sentence tokenizer
21
  chunks = []
22
  chunk = ""
23