Spaces:
Sleeping
Sleeping
Ari
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -9,6 +9,7 @@ from docx import Document
|
|
9 |
from reportlab.lib.pagesizes import letter
|
10 |
from reportlab.pdfgen import canvas
|
11 |
|
|
|
12 |
nltk.download('punkt')
|
13 |
|
14 |
tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
|
@@ -16,7 +17,7 @@ model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn")
|
|
16 |
|
17 |
# Function to split text into chunks
|
18 |
def split_text(text, max_chunk_size=1024):
|
19 |
-
sentences = nltk.sent_tokenize(text)
|
20 |
chunks = []
|
21 |
chunk = ""
|
22 |
|
|
|
9 |
from reportlab.lib.pagesizes import letter
|
10 |
from reportlab.pdfgen import canvas
|
11 |
|
12 |
+
# Ensure that the punkt tokenizer is downloaded
|
13 |
nltk.download('punkt')
|
14 |
|
15 |
tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
|
|
|
17 |
|
18 |
# Function to split text into chunks
|
19 |
def split_text(text, max_chunk_size=1024):
|
20 |
+
sentences = nltk.sent_tokenize(text) # Use NLTK's sentence tokenizer
|
21 |
chunks = []
|
22 |
chunk = ""
|
23 |
|