Update app.py
Browse files
app.py
CHANGED
@@ -21,6 +21,7 @@ logging.basicConfig(
|
|
21 |
def proc_submission(
|
22 |
input_text: str,
|
23 |
model_type: str,
|
|
|
24 |
num_beams,
|
25 |
token_batch_length,
|
26 |
length_penalty,
|
@@ -42,7 +43,7 @@ def proc_submission(
|
|
42 |
max_input_length (int, optional): the maximum input length to use. Defaults to 768.
|
43 |
|
44 |
Returns:
|
45 |
-
str in HTML format, string of the summary, str of
|
46 |
"""
|
47 |
|
48 |
settings = {
|
@@ -73,20 +74,22 @@ def proc_submission(
|
|
73 |
|
74 |
_summaries = summarize_via_tokenbatches(
|
75 |
tr_in,
|
76 |
-
|
77 |
-
|
|
|
|
|
78 |
batch_length=token_batch_length,
|
79 |
**settings,
|
80 |
)
|
81 |
sum_text = [f"Section {i}: " + s["summary"][0] for i, s in enumerate(_summaries)]
|
82 |
-
|
83 |
-
f" - Section {i}: {round(s['
|
84 |
for i, s in enumerate(_summaries)
|
85 |
]
|
86 |
|
87 |
sum_text_out = "\n".join(sum_text)
|
88 |
-
history["
|
89 |
-
|
90 |
rt = round((time.perf_counter() - st) / 60, 2)
|
91 |
print(f"Runtime: {rt} minutes")
|
92 |
html = ""
|
@@ -96,7 +99,7 @@ def proc_submission(
|
|
96 |
|
97 |
html += ""
|
98 |
|
99 |
-
return html, sum_text_out,
|
100 |
|
101 |
|
102 |
def load_single_example_text(
|
@@ -144,8 +147,10 @@ def load_uploaded_file(file_obj):
|
|
144 |
|
145 |
if __name__ == "__main__":
|
146 |
|
147 |
-
|
148 |
model_tldr, tokenizer_tldr = load_model_and_tokenizer("Blaise-g/longt5_tglobal_large_scitldr")
|
|
|
|
|
149 |
|
150 |
name_to_path = load_example_filenames(_here / "examples")
|
151 |
logging.info(f"Loaded {len(name_to_path)} examples")
|
@@ -168,7 +173,7 @@ if __name__ == "__main__":
|
|
168 |
choices=["tldr", "detailed"], label="Summary type", value="detailed"
|
169 |
)
|
170 |
model_type = gr.Radio(
|
171 |
-
choices=["LongT5", "LED"], label="Model
|
172 |
)
|
173 |
num_beams = gr.Radio(
|
174 |
choices=[2, 3, 4],
|
@@ -176,7 +181,7 @@ if __name__ == "__main__":
|
|
176 |
value=2,
|
177 |
)
|
178 |
gr.Markdown(
|
179 |
-
"_The LED model is less performant than the LongT5 model, but it's smaller in terms of size and therefore all other parameters being equal allows for a
|
180 |
)
|
181 |
with gr.Row():
|
182 |
length_penalty = gr.inputs.Slider(
|
@@ -245,9 +250,9 @@ if __name__ == "__main__":
|
|
245 |
label="Summary", placeholder="The generated summary will appear here"
|
246 |
)
|
247 |
gr.Markdown(
|
248 |
-
"The
|
249 |
)
|
250 |
-
|
251 |
label="Compression rate π", placeholder="π will appear here"
|
252 |
)
|
253 |
|
@@ -261,9 +266,6 @@ if __name__ == "__main__":
|
|
261 |
gr.Markdown(
|
262 |
"- The two most important parameters-empirically-are the `num_beams` and `token_batch_length`. However, increasing these will also increase the amount of time it takes to generate a summary. The `length_penalty` and `repetition_penalty` parameters are also important for the model to generate good summaries."
|
263 |
)
|
264 |
-
gr.Markdown(
|
265 |
-
"- The model can be "
|
266 |
-
)
|
267 |
gr.Markdown("---")
|
268 |
|
269 |
load_examples_button.click(
|
@@ -278,14 +280,15 @@ if __name__ == "__main__":
|
|
278 |
fn=proc_submission,
|
279 |
inputs=[
|
280 |
input_text,
|
281 |
-
|
|
|
282 |
num_beams,
|
283 |
token_batch_length,
|
284 |
length_penalty,
|
285 |
repetition_penalty,
|
286 |
no_repeat_ngram_size,
|
287 |
],
|
288 |
-
outputs=[output_text, summary_text,
|
289 |
)
|
290 |
|
291 |
demo.launch(enable_queue=True, share=False)
|
|
|
21 |
def proc_submission(
|
22 |
input_text: str,
|
23 |
model_type: str,
|
24 |
+
summary_type: str,
|
25 |
num_beams,
|
26 |
token_batch_length,
|
27 |
length_penalty,
|
|
|
43 |
max_input_length (int, optional): the maximum input length to use. Defaults to 768.
|
44 |
|
45 |
Returns:
|
46 |
+
str in HTML format, string of the summary, str of compression rate in %
|
47 |
"""
|
48 |
|
49 |
settings = {
|
|
|
74 |
|
75 |
_summaries = summarize_via_tokenbatches(
|
76 |
tr_in,
|
77 |
+
model_led_det if (model_type == "LED" & summary_type == "detailed") else model_det,
|
78 |
+
tokenizer_led_det if (model_type == "LED" & summary_type == "detailed") else tokenizer_det,
|
79 |
+
model_led_tldr if (model_type == "LED" & summary_type == "tldr") else model_tldr,
|
80 |
+
tokenizer_led_tldr if (model_type == "LED" & summary_type == "tldr") else tokenizer_tldr,
|
81 |
batch_length=token_batch_length,
|
82 |
**settings,
|
83 |
)
|
84 |
sum_text = [f"Section {i}: " + s["summary"][0] for i, s in enumerate(_summaries)]
|
85 |
+
compression_rate = [
|
86 |
+
f" - Section {i}: {round(s['compression_rate'],3)}"
|
87 |
for i, s in enumerate(_summaries)
|
88 |
]
|
89 |
|
90 |
sum_text_out = "\n".join(sum_text)
|
91 |
+
history["compression_rate"] = "<br><br>"
|
92 |
+
rate_out = "\n".join(compression_rate)
|
93 |
rt = round((time.perf_counter() - st) / 60, 2)
|
94 |
print(f"Runtime: {rt} minutes")
|
95 |
html = ""
|
|
|
99 |
|
100 |
html += ""
|
101 |
|
102 |
+
return html, sum_text_out, rate_out
|
103 |
|
104 |
|
105 |
def load_single_example_text(
|
|
|
147 |
|
148 |
if __name__ == "__main__":
|
149 |
|
150 |
+
model_det, tokenizer_det = load_model_and_tokenizer("Blaise-g/longt5_tglobal_large_sumpubmed")
|
151 |
model_tldr, tokenizer_tldr = load_model_and_tokenizer("Blaise-g/longt5_tglobal_large_scitldr")
|
152 |
+
model_led_det, tokenizer_led_det = load_model_and_tokenizer("Blaise-g/led_pubmed_sumpubmed_1")
|
153 |
+
model_led_tldr, tokenizer_led_tldr = load_model_and_tokenizer("Blaise-g/led_large_sumpbumed_scitldr")
|
154 |
|
155 |
name_to_path = load_example_filenames(_here / "examples")
|
156 |
logging.info(f"Loaded {len(name_to_path)} examples")
|
|
|
173 |
choices=["tldr", "detailed"], label="Summary type", value="detailed"
|
174 |
)
|
175 |
model_type = gr.Radio(
|
176 |
+
choices=["LongT5", "LED"], label="Model architecture", value="LongT5"
|
177 |
)
|
178 |
num_beams = gr.Radio(
|
179 |
choices=[2, 3, 4],
|
|
|
181 |
value=2,
|
182 |
)
|
183 |
gr.Markdown(
|
184 |
+
"_The LED model is less performant than the LongT5 model, but it's smaller in terms of size and therefore all other parameters being equal allows for a longer input sequence._"
|
185 |
)
|
186 |
with gr.Row():
|
187 |
length_penalty = gr.inputs.Slider(
|
|
|
250 |
label="Summary", placeholder="The generated summary will appear here"
|
251 |
)
|
252 |
gr.Markdown(
|
253 |
+
"The compression rate indicates the ratio between the machine-generated summary length and the input text (from 0% to 100%). The higher the compression rate the more extreme the summary is."
|
254 |
)
|
255 |
+
compression_rate = gr.Textbox(
|
256 |
label="Compression rate π", placeholder="π will appear here"
|
257 |
)
|
258 |
|
|
|
266 |
gr.Markdown(
|
267 |
"- The two most important parameters-empirically-are the `num_beams` and `token_batch_length`. However, increasing these will also increase the amount of time it takes to generate a summary. The `length_penalty` and `repetition_penalty` parameters are also important for the model to generate good summaries."
|
268 |
)
|
|
|
|
|
|
|
269 |
gr.Markdown("---")
|
270 |
|
271 |
load_examples_button.click(
|
|
|
280 |
fn=proc_submission,
|
281 |
inputs=[
|
282 |
input_text,
|
283 |
+
summary_type,
|
284 |
+
model_type,
|
285 |
num_beams,
|
286 |
token_batch_length,
|
287 |
length_penalty,
|
288 |
repetition_penalty,
|
289 |
no_repeat_ngram_size,
|
290 |
],
|
291 |
+
outputs=[output_text, summary_text, compression_rate],
|
292 |
)
|
293 |
|
294 |
demo.launch(enable_queue=True, share=False)
|