Blaise-g commited on
Commit
d1685df
Β·
1 Parent(s): d19b184

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -18
app.py CHANGED
@@ -21,6 +21,7 @@ logging.basicConfig(
21
  def proc_submission(
22
  input_text: str,
23
  model_type: str,
 
24
  num_beams,
25
  token_batch_length,
26
  length_penalty,
@@ -42,7 +43,7 @@ def proc_submission(
42
  max_input_length (int, optional): the maximum input length to use. Defaults to 768.
43
 
44
  Returns:
45
- str in HTML format, string of the summary, str of score
46
  """
47
 
48
  settings = {
@@ -73,20 +74,22 @@ def proc_submission(
73
 
74
  _summaries = summarize_via_tokenbatches(
75
  tr_in,
76
- model_tldr if model_type == "tldr" else model,
77
- tokenizer_tldr if model_type == "tldr" else tokenizer,
 
 
78
  batch_length=token_batch_length,
79
  **settings,
80
  )
81
  sum_text = [f"Section {i}: " + s["summary"][0] for i, s in enumerate(_summaries)]
82
- sum_scores = [
83
- f" - Section {i}: {round(s['summary_score'],4)}"
84
  for i, s in enumerate(_summaries)
85
  ]
86
 
87
  sum_text_out = "\n".join(sum_text)
88
- history["Summary Scores"] = "<br><br>"
89
- scores_out = "\n".join(sum_scores)
90
  rt = round((time.perf_counter() - st) / 60, 2)
91
  print(f"Runtime: {rt} minutes")
92
  html = ""
@@ -96,7 +99,7 @@ def proc_submission(
96
 
97
  html += ""
98
 
99
- return html, sum_text_out, scores_out
100
 
101
 
102
  def load_single_example_text(
@@ -144,8 +147,10 @@ def load_uploaded_file(file_obj):
144
 
145
  if __name__ == "__main__":
146
 
147
- model, tokenizer = load_model_and_tokenizer("Blaise-g/longt5_tglobal_large_sumpubmed")
148
  model_tldr, tokenizer_tldr = load_model_and_tokenizer("Blaise-g/longt5_tglobal_large_scitldr")
 
 
149
 
150
  name_to_path = load_example_filenames(_here / "examples")
151
  logging.info(f"Loaded {len(name_to_path)} examples")
@@ -168,7 +173,7 @@ if __name__ == "__main__":
168
  choices=["tldr", "detailed"], label="Summary type", value="detailed"
169
  )
170
  model_type = gr.Radio(
171
- choices=["LongT5", "LED"], label="Model type", value="LongT5"
172
  )
173
  num_beams = gr.Radio(
174
  choices=[2, 3, 4],
@@ -176,7 +181,7 @@ if __name__ == "__main__":
176
  value=2,
177
  )
178
  gr.Markdown(
179
- "_The LED model is less performant than the LongT5 model, but it's smaller in terms of size and therefore all other parameters being equal allows for a larger _"
180
  )
181
  with gr.Row():
182
  length_penalty = gr.inputs.Slider(
@@ -245,9 +250,9 @@ if __name__ == "__main__":
245
  label="Summary", placeholder="The generated summary will appear here"
246
  )
247
  gr.Markdown(
248
- "The summary scores can be thought of as representing the quality of the summary. less-negative numbers (closer to 0) are better:"
249
  )
250
- summary_scores = gr.Textbox(
251
  label="Compression rate πŸ—œ", placeholder="πŸ—œ will appear here"
252
  )
253
 
@@ -261,9 +266,6 @@ if __name__ == "__main__":
261
  gr.Markdown(
262
  "- The two most important parameters-empirically-are the `num_beams` and `token_batch_length`. However, increasing these will also increase the amount of time it takes to generate a summary. The `length_penalty` and `repetition_penalty` parameters are also important for the model to generate good summaries."
263
  )
264
- gr.Markdown(
265
- "- The model can be "
266
- )
267
  gr.Markdown("---")
268
 
269
  load_examples_button.click(
@@ -278,14 +280,15 @@ if __name__ == "__main__":
278
  fn=proc_submission,
279
  inputs=[
280
  input_text,
281
- model_size,
 
282
  num_beams,
283
  token_batch_length,
284
  length_penalty,
285
  repetition_penalty,
286
  no_repeat_ngram_size,
287
  ],
288
- outputs=[output_text, summary_text, summary_scores],
289
  )
290
 
291
  demo.launch(enable_queue=True, share=False)
 
21
  def proc_submission(
22
  input_text: str,
23
  model_type: str,
24
+ summary_type: str,
25
  num_beams,
26
  token_batch_length,
27
  length_penalty,
 
43
  max_input_length (int, optional): the maximum input length to use. Defaults to 768.
44
 
45
  Returns:
46
+ str in HTML format, string of the summary, str of compression rate in %
47
  """
48
 
49
  settings = {
 
74
 
75
  _summaries = summarize_via_tokenbatches(
76
  tr_in,
77
+ model_led_det if (model_type == "LED" & summary_type == "detailed") else model_det,
78
+ tokenizer_led_det if (model_type == "LED" & summary_type == "detailed") else tokenizer_det,
79
+ model_led_tldr if (model_type == "LED" & summary_type == "tldr") else model_tldr,
80
+ tokenizer_led_tldr if (model_type == "LED" & summary_type == "tldr") else tokenizer_tldr,
81
  batch_length=token_batch_length,
82
  **settings,
83
  )
84
  sum_text = [f"Section {i}: " + s["summary"][0] for i, s in enumerate(_summaries)]
85
+ compression_rate = [
86
+ f" - Section {i}: {round(s['compression_rate'],3)}"
87
  for i, s in enumerate(_summaries)
88
  ]
89
 
90
  sum_text_out = "\n".join(sum_text)
91
+ history["compression_rate"] = "<br><br>"
92
+ rate_out = "\n".join(compression_rate)
93
  rt = round((time.perf_counter() - st) / 60, 2)
94
  print(f"Runtime: {rt} minutes")
95
  html = ""
 
99
 
100
  html += ""
101
 
102
+ return html, sum_text_out, rate_out
103
 
104
 
105
  def load_single_example_text(
 
147
 
148
  if __name__ == "__main__":
149
 
150
+ model_det, tokenizer_det = load_model_and_tokenizer("Blaise-g/longt5_tglobal_large_sumpubmed")
151
  model_tldr, tokenizer_tldr = load_model_and_tokenizer("Blaise-g/longt5_tglobal_large_scitldr")
152
+ model_led_det, tokenizer_led_det = load_model_and_tokenizer("Blaise-g/led_pubmed_sumpubmed_1")
153
+ model_led_tldr, tokenizer_led_tldr = load_model_and_tokenizer("Blaise-g/led_large_sumpbumed_scitldr")
154
 
155
  name_to_path = load_example_filenames(_here / "examples")
156
  logging.info(f"Loaded {len(name_to_path)} examples")
 
173
  choices=["tldr", "detailed"], label="Summary type", value="detailed"
174
  )
175
  model_type = gr.Radio(
176
+ choices=["LongT5", "LED"], label="Model architecture", value="LongT5"
177
  )
178
  num_beams = gr.Radio(
179
  choices=[2, 3, 4],
 
181
  value=2,
182
  )
183
  gr.Markdown(
184
+ "_The LED model is less performant than the LongT5 model, but it's smaller in terms of size and therefore all other parameters being equal allows for a longer input sequence._"
185
  )
186
  with gr.Row():
187
  length_penalty = gr.inputs.Slider(
 
250
  label="Summary", placeholder="The generated summary will appear here"
251
  )
252
  gr.Markdown(
253
+ "The compression rate indicates the ratio between the machine-generated summary length and the input text (from 0% to 100%). The higher the compression rate the more extreme the summary is."
254
  )
255
+ compression_rate = gr.Textbox(
256
  label="Compression rate πŸ—œ", placeholder="πŸ—œ will appear here"
257
  )
258
 
 
266
  gr.Markdown(
267
  "- The two most important parameters-empirically-are the `num_beams` and `token_batch_length`. However, increasing these will also increase the amount of time it takes to generate a summary. The `length_penalty` and `repetition_penalty` parameters are also important for the model to generate good summaries."
268
  )
 
 
 
269
  gr.Markdown("---")
270
 
271
  load_examples_button.click(
 
280
  fn=proc_submission,
281
  inputs=[
282
  input_text,
283
+ summary_type,
284
+ model_type,
285
  num_beams,
286
  token_batch_length,
287
  length_penalty,
288
  repetition_penalty,
289
  no_repeat_ngram_size,
290
  ],
291
+ outputs=[output_text, summary_text, compression_rate],
292
  )
293
 
294
  demo.launch(enable_queue=True, share=False)