Spaces:
Running
Running
๐ html UI updates
Browse filesSigned-off-by: peter szemraj <peterszemraj@gmail.com>
app.py
CHANGED
@@ -36,7 +36,12 @@ from doctr.models import ocr_predictor
|
|
36 |
|
37 |
from pdf2text import convert_PDF_to_Text
|
38 |
from summarize import load_model_and_tokenizer, summarize_via_tokenbatches
|
39 |
-
from utils import
|
|
|
|
|
|
|
|
|
|
|
40 |
|
41 |
_here = Path(__file__).parent
|
42 |
|
@@ -179,15 +184,13 @@ def proc_submission(
|
|
179 |
token_batch_length=token_batch_length,
|
180 |
**settings,
|
181 |
)
|
182 |
-
sum_text = [
|
183 |
-
f"Batch {i}:\n\t" + s["summary"][0] for i, s in enumerate(_summaries, start=1)
|
184 |
-
]
|
185 |
sum_scores = [
|
186 |
f" - Batch Summary {i}: {round(s['summary_score'],4)}"
|
187 |
for i, s in enumerate(_summaries)
|
188 |
]
|
189 |
|
190 |
-
|
191 |
history["Summary Scores"] = "<br><br>"
|
192 |
scores_out = "\n".join(sum_scores)
|
193 |
rt = round((time.perf_counter() - st) / 60, 2)
|
@@ -203,7 +206,7 @@ def proc_submission(
|
|
203 |
settings["model_name"] = model_name
|
204 |
saved_file = saves_summary(summarize_output=_summaries, outpath=None, **settings)
|
205 |
|
206 |
-
return html,
|
207 |
|
208 |
|
209 |
def load_single_example_text(
|
@@ -356,22 +359,26 @@ if __name__ == "__main__":
|
|
356 |
|
357 |
output_text = gr.HTML("<p><em>Output will appear below:</em></p>")
|
358 |
gr.Markdown("### Summary Output")
|
359 |
-
summary_text = gr.
|
360 |
-
label="Summary",
|
361 |
-
)
|
362 |
-
gr.Markdown(
|
363 |
-
"The summary scores can be thought of as representing the quality of the summary. less-negative numbers (closer to 0) are better:"
|
364 |
-
)
|
365 |
-
summary_scores = gr.Textbox(
|
366 |
-
label="Summary Scores", placeholder="Summary scores will appear here"
|
367 |
-
)
|
368 |
-
|
369 |
-
text_file = gr.File(
|
370 |
-
label="Download as Text File",
|
371 |
-
file_count="single",
|
372 |
-
type="file",
|
373 |
-
interactive=False,
|
374 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
375 |
|
376 |
gr.Markdown("---")
|
377 |
with gr.Column():
|
|
|
36 |
|
37 |
from pdf2text import convert_PDF_to_Text
|
38 |
from summarize import load_model_and_tokenizer, summarize_via_tokenbatches
|
39 |
+
from utils import (
|
40 |
+
load_example_filenames,
|
41 |
+
saves_summary,
|
42 |
+
textlist2html,
|
43 |
+
truncate_word_count,
|
44 |
+
)
|
45 |
|
46 |
_here = Path(__file__).parent
|
47 |
|
|
|
184 |
token_batch_length=token_batch_length,
|
185 |
**settings,
|
186 |
)
|
187 |
+
sum_text = [s["summary"][0].strip() + "\n" for i, s in _summaries]
|
|
|
|
|
188 |
sum_scores = [
|
189 |
f" - Batch Summary {i}: {round(s['summary_score'],4)}"
|
190 |
for i, s in enumerate(_summaries)
|
191 |
]
|
192 |
|
193 |
+
full_summary = textlist2html(sum_text)
|
194 |
history["Summary Scores"] = "<br><br>"
|
195 |
scores_out = "\n".join(sum_scores)
|
196 |
rt = round((time.perf_counter() - st) / 60, 2)
|
|
|
206 |
settings["model_name"] = model_name
|
207 |
saved_file = saves_summary(summarize_output=_summaries, outpath=None, **settings)
|
208 |
|
209 |
+
return html, full_summary, scores_out, saved_file
|
210 |
|
211 |
|
212 |
def load_single_example_text(
|
|
|
359 |
|
360 |
output_text = gr.HTML("<p><em>Output will appear below:</em></p>")
|
361 |
gr.Markdown("### Summary Output")
|
362 |
+
summary_text = gr.HTML(
|
363 |
+
label="Summary", value="<i>Summary will appear here!</i>"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
364 |
)
|
365 |
+
with gr.Column():
|
366 |
+
gr.Markdown("Export & Summary Scores")
|
367 |
+
with gr.Row(variant="panel"):
|
368 |
+
text_file = gr.File(
|
369 |
+
label="Download as Text File",
|
370 |
+
file_count="single",
|
371 |
+
type="file",
|
372 |
+
interactive=False,
|
373 |
+
)
|
374 |
+
with gr.Row(variant="panel"):
|
375 |
+
gr.Markdown(
|
376 |
+
"The summary scores can be thought of as representing the quality of the summary. less-negative numbers (closer to 0) are better:"
|
377 |
+
)
|
378 |
+
summary_scores = gr.Textbox(
|
379 |
+
label="Summary Scores",
|
380 |
+
placeholder="Summary scores will appear here",
|
381 |
+
)
|
382 |
|
383 |
gr.Markdown("---")
|
384 |
with gr.Column():
|
utils.py
CHANGED
@@ -106,6 +106,28 @@ def load_example_filenames(example_path: str or Path):
|
|
106 |
return examples
|
107 |
|
108 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
def extract_keywords(
|
110 |
text: str, num_keywords: int = 3, window_size: int = 5, kw_max_len: int = 20
|
111 |
) -> List[str]:
|
|
|
106 |
return examples
|
107 |
|
108 |
|
109 |
+
def textlist2html(text_batches):
|
110 |
+
html_list = [
|
111 |
+
f"""
|
112 |
+
<div style="
|
113 |
+
margin-bottom: 20px;
|
114 |
+
font-size: 18px;
|
115 |
+
line-height: 1.5em;
|
116 |
+
color: #333;
|
117 |
+
background-color: #f9f9f9;
|
118 |
+
border: 1px solid #ddd;
|
119 |
+
border-radius: 5px;
|
120 |
+
padding: 20px;
|
121 |
+
">
|
122 |
+
<h2 style="font-size: 22px; color: #555;">Batch {i}:</h2>
|
123 |
+
<p style="white-space: pre-line;">{s}</p>
|
124 |
+
</div>
|
125 |
+
"""
|
126 |
+
for i, s in enumerate(text_batches, start=1)
|
127 |
+
]
|
128 |
+
return "\n".join(html_list)
|
129 |
+
|
130 |
+
|
131 |
def extract_keywords(
|
132 |
text: str, num_keywords: int = 3, window_size: int = 5, kw_max_len: int = 20
|
133 |
) -> List[str]:
|