arad1367 commited on
Commit
52d559e
Β·
verified Β·
1 Parent(s): 5ca8666

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +96 -95
app.py CHANGED
@@ -1,95 +1,96 @@
1
- import gradio as gr
2
- from transformers import pipeline
3
- import fitz
4
-
5
- # Initialize summarization pipeline
6
- summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
7
-
8
- # Function to summarize text
9
- def summarize_text(text, model, max_length=1024):
10
- # Split the input text into smaller chunks
11
- chunks = [text[i:i+max_length] for i in range(0, len(text), max_length)]
12
-
13
- # Summarize each chunk separately (You can use list comprehension)
14
- summaries = []
15
- for chunk in chunks:
16
- summary = model(chunk, max_length=150, min_length=40, do_sample=False)[0]['summary_text']
17
- summaries.append(summary)
18
-
19
- # Concatenate the summaries into a single string
20
- summary = ' '.join(summaries)
21
-
22
- return summary
23
-
24
- # Function to read PDF and summarize
25
- def summarize_pdf(pdf_file, model):
26
- with fitz.open(pdf_file.name) as doc:
27
- text = ""
28
- for page in doc:
29
- text += page.get_text()
30
- return summarize_text(text, model)
31
-
32
- def summarize(input_type, input_text, uploaded_file):
33
- try:
34
- if input_type == "Text":
35
- summary = summarize_text(input_text, summarizer)
36
- else:
37
- summary = summarize_pdf(uploaded_file, summarizer)
38
- return summary
39
- except Exception as e:
40
- return "There was a problem summarizing the text. Please try again later."
41
-
42
- # Define the footer
43
- footer = """
44
- <div style="text-align: center; margin-top: 20px;">
45
- <a href="https://www.linkedin.com/in/pejman-ebrahimi-4a60151a7/" target="_blank">LinkedIn</a> |
46
- <a href="https://github.com/arad1367" target="_blank">GitHub</a> |
47
- <a href="https://arad1367.pythonanywhere.com/" target="_blank">Live demo of my PhD defense</a>
48
- <br>
49
- Made with πŸ’– by Pejman Ebrahimi
50
- </div>
51
- """
52
-
53
- # Define the inputs and outputs
54
- inputs = [
55
- gr.Radio(["Text", "PDF"], label="Input Type"),
56
- gr.Textbox(lines=10, label="Enter Text to Summarize", visible=False),
57
- gr.File(label="Upload PDF file", visible=False)
58
- ]
59
- outputs = [
60
- gr.Textbox(label="Summary"),
61
- gr.HTML(footer)
62
- ]
63
-
64
- # Define the submit button
65
- submit_btn = gr.Button("Submit")
66
-
67
- # Define the Gradio interface
68
- with gr.Blocks(theme='gradio/soft') as app:
69
- gr.Markdown("# Text and PDF Summarization App")
70
- gr.Markdown("Note: This model can handle a maximum of 1024 tokens. A token is a unit of text that the model can process at a time. When summarizing text, the input text is split into smaller chunks of up to 1024 tokens each, and each chunk is summarized separately. The summaries are then concatenated into a single summary.")
71
- with gr.Row():
72
- input_type = gr.Radio(["Text", "PDF"], label="Input Type")
73
- with gr.Row():
74
- input_text = gr.Textbox(lines=10, label="Enter Text to Summarize", visible=False)
75
- uploaded_file = gr.File(label="Upload PDF file", visible=False)
76
- with gr.Row():
77
- submit_btn = gr.Button("Submit")
78
- with gr.Row():
79
- summary = gr.Textbox(label="Summary")
80
- with gr.Row():
81
- footer = gr.HTML(footer)
82
-
83
- # Define the change event handler for the input type radio buttons
84
- def input_type_change(input_type):
85
- if input_type == "Text":
86
- return {input_text: gr.Textbox(visible=True), uploaded_file: gr.File(visible=False)}
87
- else:
88
- return {input_text: gr.Textbox(visible=False), uploaded_file: gr.File(visible=True)}
89
- input_type.change(fn=input_type_change, inputs=[input_type], outputs=[input_text, uploaded_file])
90
-
91
- # Define the click event handler for the submit button
92
- submit_btn.click(fn=summarize, inputs=[input_type, input_text, uploaded_file], outputs=[summary])
93
-
94
- # Launch the Gradio interface
95
- app.launch('share=True')
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+ import fitz
4
+
5
+ # Initialize summarization pipeline
6
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
7
+
8
+ # Function to summarize text
9
+ def summarize_text(text, model, max_length=1024):
10
+ # Split the input text into smaller chunks
11
+ chunks = [text[i:i+max_length] for i in range(0, len(text), max_length)]
12
+
13
+ # Summarize each chunk separately (You can use list comprehension)
14
+ summaries = []
15
+ for chunk in chunks:
16
+ summary = model(chunk, max_length=150, min_length=40, do_sample=False)[0]['summary_text']
17
+ summaries.append(summary)
18
+
19
+ # Concatenate the summaries into a single string
20
+ summary = ' '.join(summaries)
21
+
22
+ return summary
23
+
24
+ # Function to read PDF and summarize
25
+ def summarize_pdf(pdf_file, model):
26
+ with fitz.open(pdf_file.name) as doc:
27
+ text = ""
28
+ for page in doc:
29
+ text += page.get_text()
30
+ return summarize_text(text, model)
31
+
32
+ def summarize(input_type, input_text, uploaded_file):
33
+ try:
34
+ if input_type == "Text":
35
+ summary = summarize_text(input_text, summarizer)
36
+ else:
37
+ summary = summarize_pdf(uploaded_file, summarizer)
38
+ return summary
39
+ except Exception as e:
40
+ return "There was a problem summarizing the text. Please try again later."
41
+
42
+ # Define the footer
43
+ footer = """
44
+ <div style="text-align: center; margin-top: 20px;">
45
+ <a href="https://www.linkedin.com/in/pejman-ebrahimi-4a60151a7/" target="_blank">LinkedIn</a> |
46
+ <a href="https://github.com/arad1367" target="_blank">GitHub</a> |
47
+ <a href="https://arad1367.pythonanywhere.com/" target="_blank">Live demo of my PhD defense</a>
48
+ <br>
49
+ Made with πŸ’– by Pejman Ebrahimi
50
+ </div>
51
+ """
52
+
53
+ # Define the inputs and outputs
54
+ inputs = [
55
+ gr.Radio(["Text", "PDF"], label="Input Type"),
56
+ gr.Textbox(lines=10, label="Enter Text to Summarize", visible=False),
57
+ gr.File(label="Upload PDF file", visible=False)
58
+ ]
59
+ outputs = [
60
+ gr.Textbox(label="Summary"),
61
+ gr.HTML(footer)
62
+ ]
63
+
64
+ # Define the submit button
65
+ submit_btn = gr.Button("Submit")
66
+
67
+ # Define the Gradio interface
68
+ with gr.Blocks(theme='gradio/soft') as app:
69
+ gr.Markdown("# Text and PDF Summarization App")
70
+ gr.Markdown("Note: This model can handle a maximum of 1024 tokens. A token is a unit of text that the model can process at a time. When summarizing text, the input text is split into smaller chunks of up to 1024 tokens each, and each chunk is summarized separately. The summaries are then concatenated into a single summary.")
71
+ with gr.Row():
72
+ input_type = gr.Radio(["Text", "PDF"], label="Input Type")
73
+ with gr.Row():
74
+ input_text = gr.Textbox(lines=10, label="Enter Text to Summarize", visible=False)
75
+ uploaded_file = gr.File(label="Upload PDF file", visible=False)
76
+ with gr.Row():
77
+ submit_btn = gr.Button("Submit")
78
+ with gr.Row():
79
+ summary = gr.Textbox(label="Summary")
80
+ with gr.Row():
81
+ footer = gr.HTML(footer)
82
+
83
+ # Define the change event handler for the input type radio buttons
84
+ def input_type_change(input_type):
85
+ if input_type == "Text":
86
+ return {input_text: gr.Textbox(visible=True), uploaded_file: gr.File(visible=False)}
87
+ else:
88
+ return {input_text: gr.Textbox(visible=False), uploaded_file: gr.File(visible=True)}
89
+ input_type.change(fn=input_type_change, inputs=[input_type], outputs=[input_text, uploaded_file])
90
+
91
+ # Define the click event handler for the submit button
92
+ submit_btn.click(fn=summarize, inputs=[input_type, input_text, uploaded_file], outputs=[summary])
93
+
94
+ # Launch the Gradio interface
95
+ if __name__ == "__main__":
96
+ app.launch()