Can Günen commited on
Commit
dee6fb6
·
1 Parent(s): 6a757a3

fixed path redirection

Browse files
Files changed (1) hide show
  1. app.py +19 -24
app.py CHANGED
@@ -1,34 +1,26 @@
1
- # -*- coding: utf-8 -*-
2
- """
3
- Created on Mon May 8 00:32:30 2023
4
-
5
- @author: ahmet
6
- """
7
- import pdfplumber
8
- import gradio as gr
9
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
10
  from pathlib import Path
 
 
 
 
11
 
12
  def respond(pdf_file, upper_page=0):
13
  pdf_file = Path(pdf_file.name)
14
- #pdf_file_name = pdf_file.name
15
- all_text = ''
16
  with pdfplumber.open(pdf_file) as pdf:
17
  total_pages = len(pdf.pages)
18
  for idx, pdf_page in enumerate(pdf.pages):
19
- if int(upper_page) !=0 and idx<int(upper_page):
20
- single_page_text = pdf_page.extract_text()
21
- all_text = all_text + '\n' + single_page_text
22
- print(idx/total_pages)
23
- else:
24
- break
25
-
26
  tokenizer=AutoTokenizer.from_pretrained('Einmalumdiewelt/T5-Base_GNAD')
27
  model=AutoModelForSeq2SeqLM.from_pretrained('Einmalumdiewelt/T5-Base_GNAD', return_dict=True)
28
- inputs=tokenizer.encode("sumarize: " +all_text, return_tensors='pt', max_length=512, truncation=True)
29
  output = model.generate(inputs, min_length=70, max_length=80)
30
  summary=tokenizer.decode(output[0])
31
- return summary
32
 
33
 
34
  with gr.Blocks() as demo:
@@ -37,11 +29,14 @@ with gr.Blocks() as demo:
37
  with gr.Row():
38
  with gr.Column():
39
  file_input = gr.File(label="PDF File", type="file")
40
- page_input = gr.Text(label="Page Limit")
41
- summarize = gr.Button("Summarize")
42
- text_output = gr.Textbox(label="Summarized text")
 
 
 
43
 
44
- summarize.click(fn=respond, inputs=[file_input, page_input], outputs=text_output)
45
-
46
 
 
 
47
  demo.launch(debug=True)
 
 
 
 
 
 
 
 
 
1
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
2
  from pathlib import Path
3
+ import pdfplumber
4
+ import gradio as gr
5
+
6
+
7
 
8
  def respond(pdf_file, upper_page=0):
9
  pdf_file = Path(pdf_file.name)
10
+ all_text = ""
 
11
  with pdfplumber.open(pdf_file) as pdf:
12
  total_pages = len(pdf.pages)
13
  for idx, pdf_page in enumerate(pdf.pages):
14
+ single_page_text = pdf_page.extract_text()
15
+ all_text = all_text + "\n" + single_page_text
16
+ #print(idx / total_pages)
17
+
 
 
 
18
  tokenizer=AutoTokenizer.from_pretrained('Einmalumdiewelt/T5-Base_GNAD')
19
  model=AutoModelForSeq2SeqLM.from_pretrained('Einmalumdiewelt/T5-Base_GNAD', return_dict=True)
20
+ inputs=tokenizer.encode("summarize: " +all_text, return_tensors='pt', max_length=512, truncation=True)
21
  output = model.generate(inputs, min_length=70, max_length=80)
22
  summary=tokenizer.decode(output[0])
23
+ return summary, all_text
24
 
25
 
26
  with gr.Blocks() as demo:
 
29
  with gr.Row():
30
  with gr.Column():
31
  file_input = gr.File(label="PDF File", type="file")
32
+ page_input = gr.Textbox(label="Page Limit")
33
+ summarize_button = gr.Button(label="Summarize")
34
+ with gr.Column():
35
+ summary_output = gr.Textbox(label="Summarized Text")
36
+ with gr.Column():
37
+ text_output =gr.Textbox(label="Extracted Text")
38
 
 
 
39
 
40
+ summarize_button.click(respond, inputs=[file_input, page_input], outputs=[summary_output, text_output])
41
+
42
  demo.launch(debug=True)