Spaces:

Blaise-g
/

summarize-biomedical-papers-long-summary-or-tldr

Runtime error

App Files Files Community

Blaise-g commited on Aug 28, 2022

Commit

de9b441

•

1 Parent(s): c6727a8

Update app.py

Browse files

Files changed (1) hide show

app.py +7 -7

app.py CHANGED Viewed

@@ -10,7 +10,7 @@ from utils import load_example_filenames, truncate_word_count
 _here = Path(__file__).parent
-nltk.download("stopwords")  # TODO=find where this requirement originates from
 logging.basicConfig(
     level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
@@ -23,7 +23,7 @@ def proc_submission(
     num_beams,
     token_batch_length,
     length_penalty,
-    max_input_length: int = 2048,
 ):
     """
     proc_submission - a helper function for the gradio module to process submissions
@@ -167,7 +167,7 @@ if __name__ == "__main__":
         gr.Markdown("# Automatic summarization of biomedical research papers with neural abstractive methods into a long and comprehensive synopsis or extreme TLDR summary version")
         gr.Markdown(
-            "A rather simple demo developed for my Master Thesis project using ad-hoc fine-tuned abstractive summarization models to summarize long biomedical articles (or any scientific text related to the biomedical domain) into a detailed, explanatory synopsis or extreme TLDR summary."
         )
         with gr.Column():
@@ -185,7 +185,7 @@ if __name__ == "__main__":
                     value=2,
                 )
             gr.Markdown(
-                "_The tldr model variant takes less time to produce the summaries and accepts a longer input sequence all other parameters being equal._"
             )
             with gr.Row():
                 length_penalty = gr.inputs.Slider(
@@ -196,9 +196,9 @@ if __name__ == "__main__":
                     step=0.05,
                 )
                 token_batch_length = gr.Radio(
-                    choices=[768, 1024, 2048],
                     label="token batch length",
-                    value=1024,
                 )
             with gr.Row():
                 example_name = gr.Dropdown(
@@ -248,7 +248,7 @@ if __name__ == "__main__":
             gr.Markdown("---")
         with gr.Column():
-            gr.Markdown("## About the Model")
             gr.Markdown(
                 "- [Blaise-g/longt5_tglobal_large_sumpubmed](https://huggingface.co/Blaise-g/longt5_tglobal_large_sumpubmed) is a fine-tuned checkpoint of [Stancld/longt5-tglobal-large-16384-pubmed-3k_steps](https://huggingface.co/Stancld/longt5-tglobal-large-16384-pubmed-3k_steps) on the [SumPubMed dataset](https://aclanthology.org/2021.acl-srw.30/). [Blaise-g/longt5_tglobal_large_scitldr](https://huggingface.co/Blaise-g/longt5_tglobal_large_scitldr) is a fine-tuned checkpoint of [Blaise-g/longt5_tglobal_large_sumpubmed](https://huggingface.co/Blaise-g/longt5_tglobal_large_sumpubmed) on the [Scitldr dataset](https://arxiv.org/abs/2004.15011). The goal was to create two models capable of handling the complex information contained in long biomedical documents and subsequently producing scientific summaries according to one of the two possible levels of conciseness: 1) A long explanatory synopsis that retains the majority of domain-specific language used in the original source text. 2)A one sentence long, TLDR style summary."
             )

 _here = Path(__file__).parent
+nltk.download("stopwords")
 logging.basicConfig(
     level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
     num_beams,
     token_batch_length,
     length_penalty,
+    max_input_length: int = 3060,
 ):
     """
     proc_submission - a helper function for the gradio module to process submissions
         gr.Markdown("# Automatic summarization of biomedical research papers with neural abstractive methods into a long and comprehensive synopsis or extreme TLDR summary version")
         gr.Markdown(
+            "A demo developed for my Master Thesis project using ad-hoc fine-tuned abstractive summarization models to summarize long biomedical articles (or any scientific text related to the biomedical domain) into a detailed, explanatory synopsis or extreme TLDR summary."
         )
         with gr.Column():
                     value=2,
                 )
             gr.Markdown(
+                "_The input text is divided into batches of the selected token lengths to fit within the memory constraints, pre-processed and fed into the model of choice. For optimal results use a GPU as the hosted CPU inference is lacking at times and hinders the models' output summary quality._"
             )
             with gr.Row():
                 length_penalty = gr.inputs.Slider(
                     step=0.05,
                 )
                 token_batch_length = gr.Radio(
+                    choices=[1024, 2048, 3060],
                     label="token batch length",
+                    value=2048,
                 )
             with gr.Row():
                 example_name = gr.Dropdown(
             gr.Markdown("---")
         with gr.Column():
+            gr.Markdown("## About the Models")
             gr.Markdown(
                 "- [Blaise-g/longt5_tglobal_large_sumpubmed](https://huggingface.co/Blaise-g/longt5_tglobal_large_sumpubmed) is a fine-tuned checkpoint of [Stancld/longt5-tglobal-large-16384-pubmed-3k_steps](https://huggingface.co/Stancld/longt5-tglobal-large-16384-pubmed-3k_steps) on the [SumPubMed dataset](https://aclanthology.org/2021.acl-srw.30/). [Blaise-g/longt5_tglobal_large_scitldr](https://huggingface.co/Blaise-g/longt5_tglobal_large_scitldr) is a fine-tuned checkpoint of [Blaise-g/longt5_tglobal_large_sumpubmed](https://huggingface.co/Blaise-g/longt5_tglobal_large_sumpubmed) on the [Scitldr dataset](https://arxiv.org/abs/2004.15011). The goal was to create two models capable of handling the complex information contained in long biomedical documents and subsequently producing scientific summaries according to one of the two possible levels of conciseness: 1) A long explanatory synopsis that retains the majority of domain-specific language used in the original source text. 2)A one sentence long, TLDR style summary."
             )