Spaces:
Running
Running
Commit
·
6bc80f7
1
Parent(s):
23846ee
Update app.py
Browse files
app.py
CHANGED
@@ -23,11 +23,11 @@ from pdfminer.high_level import extract_text
|
|
23 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
24 |
nltk.download('punkt')
|
25 |
|
26 |
-
def pdf_to_text(PDF,
|
27 |
model_name = 'nlpaueb/legal-bert-base-uncased'
|
28 |
# The setup of huggingface.co
|
29 |
file_obj = PDF
|
30 |
-
n = int(Percent.replace('%', ''))
|
31 |
tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
|
32 |
|
33 |
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn")
|
@@ -36,10 +36,9 @@ def pdf_to_text(PDF, Percent):
|
|
36 |
|
37 |
inputs = tokenizer([text], max_length=1024, return_tensors="pt")
|
38 |
|
39 |
-
|
40 |
-
less = (n-10)/100
|
41 |
# Generate Summary
|
42 |
-
summary_ids = model.generate(inputs["input_ids"], num_beams=2,
|
43 |
output_text = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
|
44 |
|
45 |
|
|
|
23 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
24 |
nltk.download('punkt')
|
25 |
|
26 |
+
def pdf_to_text(PDF, Min):
|
27 |
model_name = 'nlpaueb/legal-bert-base-uncased'
|
28 |
# The setup of huggingface.co
|
29 |
file_obj = PDF
|
30 |
+
#n = int(Percent.replace('%', ''))
|
31 |
tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
|
32 |
|
33 |
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn")
|
|
|
36 |
|
37 |
inputs = tokenizer([text], max_length=1024, return_tensors="pt")
|
38 |
|
39 |
+
|
|
|
40 |
# Generate Summary
|
41 |
+
summary_ids = model.generate(inputs["input_ids"], num_beams=2,min_length=Min, max_length=Min+1000)
|
42 |
output_text = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
|
43 |
|
44 |
|