Blaise-g commited on
Commit
de9b441
β€’
1 Parent(s): c6727a8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -7
app.py CHANGED
@@ -10,7 +10,7 @@ from utils import load_example_filenames, truncate_word_count
10
 
11
  _here = Path(__file__).parent
12
 
13
- nltk.download("stopwords") # TODO=find where this requirement originates from
14
 
15
  logging.basicConfig(
16
  level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
@@ -23,7 +23,7 @@ def proc_submission(
23
  num_beams,
24
  token_batch_length,
25
  length_penalty,
26
- max_input_length: int = 2048,
27
  ):
28
  """
29
  proc_submission - a helper function for the gradio module to process submissions
@@ -167,7 +167,7 @@ if __name__ == "__main__":
167
 
168
  gr.Markdown("# Automatic summarization of biomedical research papers with neural abstractive methods into a long and comprehensive synopsis or extreme TLDR summary version")
169
  gr.Markdown(
170
- "A rather simple demo developed for my Master Thesis project using ad-hoc fine-tuned abstractive summarization models to summarize long biomedical articles (or any scientific text related to the biomedical domain) into a detailed, explanatory synopsis or extreme TLDR summary."
171
  )
172
  with gr.Column():
173
 
@@ -185,7 +185,7 @@ if __name__ == "__main__":
185
  value=2,
186
  )
187
  gr.Markdown(
188
- "_The tldr model variant takes less time to produce the summaries and accepts a longer input sequence all other parameters being equal._"
189
  )
190
  with gr.Row():
191
  length_penalty = gr.inputs.Slider(
@@ -196,9 +196,9 @@ if __name__ == "__main__":
196
  step=0.05,
197
  )
198
  token_batch_length = gr.Radio(
199
- choices=[768, 1024, 2048],
200
  label="token batch length",
201
- value=1024,
202
  )
203
  with gr.Row():
204
  example_name = gr.Dropdown(
@@ -248,7 +248,7 @@ if __name__ == "__main__":
248
  gr.Markdown("---")
249
 
250
  with gr.Column():
251
- gr.Markdown("## About the Model")
252
  gr.Markdown(
253
  "- [Blaise-g/longt5_tglobal_large_sumpubmed](https://huggingface.co/Blaise-g/longt5_tglobal_large_sumpubmed) is a fine-tuned checkpoint of [Stancld/longt5-tglobal-large-16384-pubmed-3k_steps](https://huggingface.co/Stancld/longt5-tglobal-large-16384-pubmed-3k_steps) on the [SumPubMed dataset](https://aclanthology.org/2021.acl-srw.30/). [Blaise-g/longt5_tglobal_large_scitldr](https://huggingface.co/Blaise-g/longt5_tglobal_large_scitldr) is a fine-tuned checkpoint of [Blaise-g/longt5_tglobal_large_sumpubmed](https://huggingface.co/Blaise-g/longt5_tglobal_large_sumpubmed) on the [Scitldr dataset](https://arxiv.org/abs/2004.15011). The goal was to create two models capable of handling the complex information contained in long biomedical documents and subsequently producing scientific summaries according to one of the two possible levels of conciseness: 1) A long explanatory synopsis that retains the majority of domain-specific language used in the original source text. 2)A one sentence long, TLDR style summary."
254
  )
 
10
 
11
  _here = Path(__file__).parent
12
 
13
+ nltk.download("stopwords")
14
 
15
  logging.basicConfig(
16
  level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
 
23
  num_beams,
24
  token_batch_length,
25
  length_penalty,
26
+ max_input_length: int = 3060,
27
  ):
28
  """
29
  proc_submission - a helper function for the gradio module to process submissions
 
167
 
168
  gr.Markdown("# Automatic summarization of biomedical research papers with neural abstractive methods into a long and comprehensive synopsis or extreme TLDR summary version")
169
  gr.Markdown(
170
+ "A demo developed for my Master Thesis project using ad-hoc fine-tuned abstractive summarization models to summarize long biomedical articles (or any scientific text related to the biomedical domain) into a detailed, explanatory synopsis or extreme TLDR summary."
171
  )
172
  with gr.Column():
173
 
 
185
  value=2,
186
  )
187
  gr.Markdown(
188
+ "_The input text is divided into batches of the selected token lengths to fit within the memory constraints, pre-processed and fed into the model of choice. For optimal results use a GPU as the hosted CPU inference is lacking at times and hinders the models' output summary quality._"
189
  )
190
  with gr.Row():
191
  length_penalty = gr.inputs.Slider(
 
196
  step=0.05,
197
  )
198
  token_batch_length = gr.Radio(
199
+ choices=[1024, 2048, 3060],
200
  label="token batch length",
201
+ value=2048,
202
  )
203
  with gr.Row():
204
  example_name = gr.Dropdown(
 
248
  gr.Markdown("---")
249
 
250
  with gr.Column():
251
+ gr.Markdown("## About the Models")
252
  gr.Markdown(
253
  "- [Blaise-g/longt5_tglobal_large_sumpubmed](https://huggingface.co/Blaise-g/longt5_tglobal_large_sumpubmed) is a fine-tuned checkpoint of [Stancld/longt5-tglobal-large-16384-pubmed-3k_steps](https://huggingface.co/Stancld/longt5-tglobal-large-16384-pubmed-3k_steps) on the [SumPubMed dataset](https://aclanthology.org/2021.acl-srw.30/). [Blaise-g/longt5_tglobal_large_scitldr](https://huggingface.co/Blaise-g/longt5_tglobal_large_scitldr) is a fine-tuned checkpoint of [Blaise-g/longt5_tglobal_large_sumpubmed](https://huggingface.co/Blaise-g/longt5_tglobal_large_sumpubmed) on the [Scitldr dataset](https://arxiv.org/abs/2004.15011). The goal was to create two models capable of handling the complex information contained in long biomedical documents and subsequently producing scientific summaries according to one of the two possible levels of conciseness: 1) A long explanatory synopsis that retains the majority of domain-specific language used in the original source text. 2)A one sentence long, TLDR style summary."
254
  )