Spaces:

Blaise-g
/

summarize-biomedical-papers-long-summary-or-tldr

Runtime error

App Files Files Community

Blaise-g commited on Aug 18, 2022

Commit

d1685df

1 Parent(s): d19b184

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -18

app.py CHANGED Viewed

@@ -21,6 +21,7 @@ logging.basicConfig(
 def proc_submission(
     input_text: str,
     model_type: str,
     num_beams,
     token_batch_length,
     length_penalty,
@@ -42,7 +43,7 @@ def proc_submission(
         max_input_length (int, optional): the maximum input length to use. Defaults to 768.
     Returns:
-        str in HTML format, string of the summary, str of score
     """
     settings = {
@@ -73,20 +74,22 @@ def proc_submission(
     _summaries = summarize_via_tokenbatches(
         tr_in,
-        model_tldr if model_type == "tldr" else model,
-        tokenizer_tldr if model_type == "tldr" else tokenizer,
         batch_length=token_batch_length,
         **settings,
     )
     sum_text = [f"Section {i}: " + s["summary"][0] for i, s in enumerate(_summaries)]
-    sum_scores = [
-        f" - Section {i}: {round(s['summary_score'],4)}"
         for i, s in enumerate(_summaries)
     ]
     sum_text_out = "\n".join(sum_text)
-    history["Summary Scores"] = "<br><br>"
-    scores_out = "\n".join(sum_scores)
     rt = round((time.perf_counter() - st) / 60, 2)
     print(f"Runtime: {rt} minutes")
     html = ""
@@ -96,7 +99,7 @@ def proc_submission(
     html += ""
-    return html, sum_text_out, scores_out
 def load_single_example_text(
@@ -144,8 +147,10 @@ def load_uploaded_file(file_obj):
 if __name__ == "__main__":
-    model, tokenizer = load_model_and_tokenizer("Blaise-g/longt5_tglobal_large_sumpubmed")
     model_tldr, tokenizer_tldr = load_model_and_tokenizer("Blaise-g/longt5_tglobal_large_scitldr")
     name_to_path = load_example_filenames(_here / "examples")
     logging.info(f"Loaded {len(name_to_path)} examples")
@@ -168,7 +173,7 @@ if __name__ == "__main__":
                     choices=["tldr", "detailed"], label="Summary type", value="detailed"
                 )
                 model_type = gr.Radio(
-                    choices=["LongT5", "LED"], label="Model type", value="LongT5"
                 )
                 num_beams = gr.Radio(
                     choices=[2, 3, 4],
@@ -176,7 +181,7 @@ if __name__ == "__main__":
                     value=2,
                 )
             gr.Markdown(
-                "_The LED model is less performant than the LongT5 model, but it's smaller in terms of size and therefore all other parameters being equal allows for a larger _"
             )
             with gr.Row():
                 length_penalty = gr.inputs.Slider(
@@ -245,9 +250,9 @@ if __name__ == "__main__":
                 label="Summary", placeholder="The generated summary will appear here"
             )
             gr.Markdown(
-                "The summary scores can be thought of as representing the quality of the summary. less-negative numbers (closer to 0) are better:"
             )
-            summary_scores = gr.Textbox(
                 label="Compression rate 🗜", placeholder="🗜 will appear here"
             )
@@ -261,9 +266,6 @@ if __name__ == "__main__":
             gr.Markdown(
                 "- The two most important parameters-empirically-are the `num_beams` and `token_batch_length`. However, increasing these will also increase the amount of time it takes to generate a summary. The `length_penalty` and `repetition_penalty` parameters are also important for the model to generate good summaries."
             )
-            gr.Markdown(
-                "- The model can be "
-            )
             gr.Markdown("---")
         load_examples_button.click(
@@ -278,14 +280,15 @@ if __name__ == "__main__":
             fn=proc_submission,
             inputs=[
                 input_text,
-                model_size,
                 num_beams,
                 token_batch_length,
                 length_penalty,
                 repetition_penalty,
                 no_repeat_ngram_size,
             ],
-            outputs=[output_text, summary_text, summary_scores],
         )
     demo.launch(enable_queue=True, share=False)

 def proc_submission(
     input_text: str,
     model_type: str,
+    summary_type: str,
     num_beams,
     token_batch_length,
     length_penalty,
         max_input_length (int, optional): the maximum input length to use. Defaults to 768.
     Returns:
+        str in HTML format, string of the summary, str of compression rate in %
     """
     settings = {
     _summaries = summarize_via_tokenbatches(
         tr_in,
+        model_led_det if (model_type == "LED" & summary_type == "detailed") else model_det,
+        tokenizer_led_det if (model_type == "LED" & summary_type == "detailed") else tokenizer_det,
+        model_led_tldr if (model_type == "LED" & summary_type == "tldr") else model_tldr,
+        tokenizer_led_tldr if (model_type == "LED" & summary_type == "tldr") else tokenizer_tldr,
         batch_length=token_batch_length,
         **settings,
     )
     sum_text = [f"Section {i}: " + s["summary"][0] for i, s in enumerate(_summaries)]
+    compression_rate = [
+        f" - Section {i}: {round(s['compression_rate'],3)}"
         for i, s in enumerate(_summaries)
     ]
     sum_text_out = "\n".join(sum_text)
+    history["compression_rate"] = "<br><br>"
+    rate_out = "\n".join(compression_rate)
     rt = round((time.perf_counter() - st) / 60, 2)
     print(f"Runtime: {rt} minutes")
     html = ""
     html += ""
+    return html, sum_text_out, rate_out
 def load_single_example_text(
 if __name__ == "__main__":
+    model_det, tokenizer_det = load_model_and_tokenizer("Blaise-g/longt5_tglobal_large_sumpubmed")
     model_tldr, tokenizer_tldr = load_model_and_tokenizer("Blaise-g/longt5_tglobal_large_scitldr")
+    model_led_det, tokenizer_led_det = load_model_and_tokenizer("Blaise-g/led_pubmed_sumpubmed_1")
+    model_led_tldr, tokenizer_led_tldr = load_model_and_tokenizer("Blaise-g/led_large_sumpbumed_scitldr")
     name_to_path = load_example_filenames(_here / "examples")
     logging.info(f"Loaded {len(name_to_path)} examples")
                     choices=["tldr", "detailed"], label="Summary type", value="detailed"
                 )
                 model_type = gr.Radio(
+                    choices=["LongT5", "LED"], label="Model architecture", value="LongT5"
                 )
                 num_beams = gr.Radio(
                     choices=[2, 3, 4],
                     value=2,
                 )
             gr.Markdown(
+                "_The LED model is less performant than the LongT5 model, but it's smaller in terms of size and therefore all other parameters being equal allows for a longer input sequence._"
             )
             with gr.Row():
                 length_penalty = gr.inputs.Slider(
                 label="Summary", placeholder="The generated summary will appear here"
             )
             gr.Markdown(
+                "The compression rate indicates the ratio between the machine-generated summary length and the input text (from 0% to 100%). The higher the compression rate the more extreme the summary is."
             )
+            compression_rate = gr.Textbox(
                 label="Compression rate 🗜", placeholder="🗜 will appear here"
             )
             gr.Markdown(
                 "- The two most important parameters-empirically-are the `num_beams` and `token_batch_length`. However, increasing these will also increase the amount of time it takes to generate a summary. The `length_penalty` and `repetition_penalty` parameters are also important for the model to generate good summaries."
             )
             gr.Markdown("---")
         load_examples_button.click(
             fn=proc_submission,
             inputs=[
                 input_text,
+                summary_type,
+                model_type,
                 num_beams,
                 token_batch_length,
                 length_penalty,
                 repetition_penalty,
                 no_repeat_ngram_size,
             ],
+            outputs=[output_text, summary_text, compression_rate],
         )
     demo.launch(enable_queue=True, share=False)