arithescientist commited on
Commit
6bc80f7
·
1 Parent(s): 23846ee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -5
app.py CHANGED
@@ -23,11 +23,11 @@ from pdfminer.high_level import extract_text
23
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
24
  nltk.download('punkt')
25
 
26
- def pdf_to_text(PDF, Percent):
27
  model_name = 'nlpaueb/legal-bert-base-uncased'
28
  # The setup of huggingface.co
29
  file_obj = PDF
30
- n = int(Percent.replace('%', ''))
31
  tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
32
 
33
  model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn")
@@ -36,10 +36,9 @@ def pdf_to_text(PDF, Percent):
36
 
37
  inputs = tokenizer([text], max_length=1024, return_tensors="pt")
38
 
39
- more= (n)/100
40
- less = (n-10)/100
41
  # Generate Summary
42
- summary_ids = model.generate(inputs["input_ids"], num_beams=2, min_length= less, max_length= more)
43
  output_text = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
44
 
45
 
 
23
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
24
  nltk.download('punkt')
25
 
26
+ def pdf_to_text(PDF, Min):
27
  model_name = 'nlpaueb/legal-bert-base-uncased'
28
  # The setup of huggingface.co
29
  file_obj = PDF
30
+ #n = int(Percent.replace('%', ''))
31
  tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
32
 
33
  model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn")
 
36
 
37
  inputs = tokenizer([text], max_length=1024, return_tensors="pt")
38
 
39
+
 
40
  # Generate Summary
41
+ summary_ids = model.generate(inputs["input_ids"], num_beams=2,min_length=Min, max_length=Min+1000)
42
  output_text = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
43
 
44