document-summarization

Build error

App Files Files Community

pszemraj commited on Apr 30, 2023

Commit

c430753

1 Parent(s): 32939cb

💄 html UI updates

Browse files

Signed-off-by: peter szemraj <peterszemraj@gmail.com>

Files changed (2) hide show

app.py +28 -21
utils.py +22 -0

app.py CHANGED Viewed

@@ -36,7 +36,12 @@ from doctr.models import ocr_predictor
 from pdf2text import convert_PDF_to_Text
 from summarize import load_model_and_tokenizer, summarize_via_tokenbatches
-from utils import load_example_filenames, saves_summary, truncate_word_count
 _here = Path(__file__).parent
@@ -179,15 +184,13 @@ def proc_submission(
         token_batch_length=token_batch_length,
         **settings,
     )
-    sum_text = [
-        f"Batch {i}:\n\t" + s["summary"][0] for i, s in enumerate(_summaries, start=1)
-    ]
     sum_scores = [
         f" - Batch Summary {i}: {round(s['summary_score'],4)}"
         for i, s in enumerate(_summaries)
     ]
-    sum_text_out = "\n".join(sum_text)
     history["Summary Scores"] = "<br><br>"
     scores_out = "\n".join(sum_scores)
     rt = round((time.perf_counter() - st) / 60, 2)
@@ -203,7 +206,7 @@ def proc_submission(
     settings["model_name"] = model_name
     saved_file = saves_summary(summarize_output=_summaries, outpath=None, **settings)
-    return html, sum_text_out, scores_out, saved_file
 def load_single_example_text(
@@ -356,22 +359,26 @@ if __name__ == "__main__":
             output_text = gr.HTML("<p><em>Output will appear below:</em></p>")
             gr.Markdown("### Summary Output")
-            summary_text = gr.Textbox(
-                label="Summary", placeholder="The generated summary will appear here"
-            )
-            gr.Markdown(
-                "The summary scores can be thought of as representing the quality of the summary. less-negative numbers (closer to 0) are better:"
-            )
-            summary_scores = gr.Textbox(
-                label="Summary Scores", placeholder="Summary scores will appear here"
-            )
-            text_file = gr.File(
-                label="Download as Text File",
-                file_count="single",
-                type="file",
-                interactive=False,
             )
         gr.Markdown("---")
         with gr.Column():

 from pdf2text import convert_PDF_to_Text
 from summarize import load_model_and_tokenizer, summarize_via_tokenbatches
+from utils import (
+    load_example_filenames,
+    saves_summary,
+    textlist2html,
+    truncate_word_count,
+)
 _here = Path(__file__).parent
         token_batch_length=token_batch_length,
         **settings,
     )
+    sum_text = [s["summary"][0].strip() + "\n" for i, s in _summaries]
     sum_scores = [
         f" - Batch Summary {i}: {round(s['summary_score'],4)}"
         for i, s in enumerate(_summaries)
     ]
+    full_summary = textlist2html(sum_text)
     history["Summary Scores"] = "<br><br>"
     scores_out = "\n".join(sum_scores)
     rt = round((time.perf_counter() - st) / 60, 2)
     settings["model_name"] = model_name
     saved_file = saves_summary(summarize_output=_summaries, outpath=None, **settings)
+    return html, full_summary, scores_out, saved_file
 def load_single_example_text(
             output_text = gr.HTML("<p><em>Output will appear below:</em></p>")
             gr.Markdown("### Summary Output")
+            summary_text = gr.HTML(
+                label="Summary", value="<i>Summary will appear here!</i>"
             )
+            with gr.Column():
+                gr.Markdown("Export & Summary Scores")
+                with gr.Row(variant="panel"):
+                    text_file = gr.File(
+                        label="Download as Text File",
+                        file_count="single",
+                        type="file",
+                        interactive=False,
+                    )
+                with gr.Row(variant="panel"):
+                    gr.Markdown(
+                        "The summary scores can be thought of as representing the quality of the summary. less-negative numbers (closer to 0) are better:"
+                    )
+                    summary_scores = gr.Textbox(
+                        label="Summary Scores",
+                        placeholder="Summary scores will appear here",
+                    )
         gr.Markdown("---")
         with gr.Column():

utils.py CHANGED Viewed

@@ -106,6 +106,28 @@ def load_example_filenames(example_path: str or Path):
     return examples
 def extract_keywords(
     text: str, num_keywords: int = 3, window_size: int = 5, kw_max_len: int = 20
 ) -> List[str]:

     return examples
+def textlist2html(text_batches):
+    html_list = [
+        f"""
+        <div style="
+            margin-bottom: 20px;
+            font-size: 18px;
+            line-height: 1.5em;
+            color: #333;
+            background-color: #f9f9f9;
+            border: 1px solid #ddd;
+            border-radius: 5px;
+            padding: 20px;
+        ">
+            <h2 style="font-size: 22px; color: #555;">Batch {i}:</h2>
+            <p style="white-space: pre-line;">{s}</p>
+        </div>
+        """
+        for i, s in enumerate(text_batches, start=1)
+    ]
+    return "\n".join(html_list)
 def extract_keywords(
     text: str, num_keywords: int = 3, window_size: int = 5, kw_max_len: int = 20
 ) -> List[str]: