Can Günen commited on
Commit
6a757a3
·
1 Parent(s): 5002d47

solved the uploading problem

Browse files
Files changed (1) hide show
  1. app.py +14 -10
app.py CHANGED
@@ -7,19 +7,22 @@ Created on Mon May 8 00:32:30 2023
7
  import pdfplumber
8
  import gradio as gr
9
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 
10
 
11
-
12
- def respond(pdf_file):
13
- pdf_file_name = pdf_file.name
14
  all_text = ''
15
- with pdfplumber.open(pdf_file_name) as pdf:
16
  total_pages = len(pdf.pages)
17
  for idx, pdf_page in enumerate(pdf.pages):
18
- single_page_text = pdf_page.extract_text()
19
- all_text = all_text + '\n' + single_page_text
20
- print(idx/total_pages)
21
- if idx/total_pages >0.2:
22
- break
 
 
23
  tokenizer=AutoTokenizer.from_pretrained('Einmalumdiewelt/T5-Base_GNAD')
24
  model=AutoModelForSeq2SeqLM.from_pretrained('Einmalumdiewelt/T5-Base_GNAD', return_dict=True)
25
  inputs=tokenizer.encode("sumarize: " +all_text, return_tensors='pt', max_length=512, truncation=True)
@@ -34,10 +37,11 @@ with gr.Blocks() as demo:
34
  with gr.Row():
35
  with gr.Column():
36
  file_input = gr.File(label="PDF File", type="file")
 
37
  summarize = gr.Button("Summarize")
38
  text_output = gr.Textbox(label="Summarized text")
39
 
40
- summarize.click(fn=respond, inputs=file_input, outputs=text_output)
41
 
42
 
43
  demo.launch(debug=True)
 
7
  import pdfplumber
8
  import gradio as gr
9
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
10
+ from pathlib import Path
11
 
12
+ def respond(pdf_file, upper_page=0):
13
+ pdf_file = Path(pdf_file.name)
14
+ #pdf_file_name = pdf_file.name
15
  all_text = ''
16
+ with pdfplumber.open(pdf_file) as pdf:
17
  total_pages = len(pdf.pages)
18
  for idx, pdf_page in enumerate(pdf.pages):
19
+ if int(upper_page) !=0 and idx<int(upper_page):
20
+ single_page_text = pdf_page.extract_text()
21
+ all_text = all_text + '\n' + single_page_text
22
+ print(idx/total_pages)
23
+ else:
24
+ break
25
+
26
  tokenizer=AutoTokenizer.from_pretrained('Einmalumdiewelt/T5-Base_GNAD')
27
  model=AutoModelForSeq2SeqLM.from_pretrained('Einmalumdiewelt/T5-Base_GNAD', return_dict=True)
28
  inputs=tokenizer.encode("sumarize: " +all_text, return_tensors='pt', max_length=512, truncation=True)
 
37
  with gr.Row():
38
  with gr.Column():
39
  file_input = gr.File(label="PDF File", type="file")
40
+ page_input = gr.Text(label="Page Limit")
41
  summarize = gr.Button("Summarize")
42
  text_output = gr.Textbox(label="Summarized text")
43
 
44
+ summarize.click(fn=respond, inputs=[file_input, page_input], outputs=text_output)
45
 
46
 
47
  demo.launch(debug=True)