Update app.py
Browse files
app.py
CHANGED
@@ -10,7 +10,7 @@ from utils import load_example_filenames, truncate_word_count
|
|
10 |
|
11 |
_here = Path(__file__).parent
|
12 |
|
13 |
-
nltk.download("stopwords")
|
14 |
|
15 |
logging.basicConfig(
|
16 |
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
|
@@ -23,7 +23,7 @@ def proc_submission(
|
|
23 |
num_beams,
|
24 |
token_batch_length,
|
25 |
length_penalty,
|
26 |
-
max_input_length: int =
|
27 |
):
|
28 |
"""
|
29 |
proc_submission - a helper function for the gradio module to process submissions
|
@@ -167,7 +167,7 @@ if __name__ == "__main__":
|
|
167 |
|
168 |
gr.Markdown("# Automatic summarization of biomedical research papers with neural abstractive methods into a long and comprehensive synopsis or extreme TLDR summary version")
|
169 |
gr.Markdown(
|
170 |
-
"A
|
171 |
)
|
172 |
with gr.Column():
|
173 |
|
@@ -185,7 +185,7 @@ if __name__ == "__main__":
|
|
185 |
value=2,
|
186 |
)
|
187 |
gr.Markdown(
|
188 |
-
"_The
|
189 |
)
|
190 |
with gr.Row():
|
191 |
length_penalty = gr.inputs.Slider(
|
@@ -196,9 +196,9 @@ if __name__ == "__main__":
|
|
196 |
step=0.05,
|
197 |
)
|
198 |
token_batch_length = gr.Radio(
|
199 |
-
choices=[
|
200 |
label="token batch length",
|
201 |
-
value=
|
202 |
)
|
203 |
with gr.Row():
|
204 |
example_name = gr.Dropdown(
|
@@ -248,7 +248,7 @@ if __name__ == "__main__":
|
|
248 |
gr.Markdown("---")
|
249 |
|
250 |
with gr.Column():
|
251 |
-
gr.Markdown("## About the
|
252 |
gr.Markdown(
|
253 |
"- [Blaise-g/longt5_tglobal_large_sumpubmed](https://huggingface.co/Blaise-g/longt5_tglobal_large_sumpubmed) is a fine-tuned checkpoint of [Stancld/longt5-tglobal-large-16384-pubmed-3k_steps](https://huggingface.co/Stancld/longt5-tglobal-large-16384-pubmed-3k_steps) on the [SumPubMed dataset](https://aclanthology.org/2021.acl-srw.30/). [Blaise-g/longt5_tglobal_large_scitldr](https://huggingface.co/Blaise-g/longt5_tglobal_large_scitldr) is a fine-tuned checkpoint of [Blaise-g/longt5_tglobal_large_sumpubmed](https://huggingface.co/Blaise-g/longt5_tglobal_large_sumpubmed) on the [Scitldr dataset](https://arxiv.org/abs/2004.15011). The goal was to create two models capable of handling the complex information contained in long biomedical documents and subsequently producing scientific summaries according to one of the two possible levels of conciseness: 1) A long explanatory synopsis that retains the majority of domain-specific language used in the original source text. 2)A one sentence long, TLDR style summary."
|
254 |
)
|
|
|
10 |
|
11 |
_here = Path(__file__).parent
|
12 |
|
13 |
+
nltk.download("stopwords")
|
14 |
|
15 |
logging.basicConfig(
|
16 |
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
|
|
|
23 |
num_beams,
|
24 |
token_batch_length,
|
25 |
length_penalty,
|
26 |
+
max_input_length: int = 3060,
|
27 |
):
|
28 |
"""
|
29 |
proc_submission - a helper function for the gradio module to process submissions
|
|
|
167 |
|
168 |
gr.Markdown("# Automatic summarization of biomedical research papers with neural abstractive methods into a long and comprehensive synopsis or extreme TLDR summary version")
|
169 |
gr.Markdown(
|
170 |
+
"A demo developed for my Master Thesis project using ad-hoc fine-tuned abstractive summarization models to summarize long biomedical articles (or any scientific text related to the biomedical domain) into a detailed, explanatory synopsis or extreme TLDR summary."
|
171 |
)
|
172 |
with gr.Column():
|
173 |
|
|
|
185 |
value=2,
|
186 |
)
|
187 |
gr.Markdown(
|
188 |
+
"_The input text is divided into batches of the selected token lengths to fit within the memory constraints, pre-processed and fed into the model of choice. For optimal results use a GPU as the hosted CPU inference is lacking at times and hinders the models' output summary quality._"
|
189 |
)
|
190 |
with gr.Row():
|
191 |
length_penalty = gr.inputs.Slider(
|
|
|
196 |
step=0.05,
|
197 |
)
|
198 |
token_batch_length = gr.Radio(
|
199 |
+
choices=[1024, 2048, 3060],
|
200 |
label="token batch length",
|
201 |
+
value=2048,
|
202 |
)
|
203 |
with gr.Row():
|
204 |
example_name = gr.Dropdown(
|
|
|
248 |
gr.Markdown("---")
|
249 |
|
250 |
with gr.Column():
|
251 |
+
gr.Markdown("## About the Models")
|
252 |
gr.Markdown(
|
253 |
"- [Blaise-g/longt5_tglobal_large_sumpubmed](https://huggingface.co/Blaise-g/longt5_tglobal_large_sumpubmed) is a fine-tuned checkpoint of [Stancld/longt5-tglobal-large-16384-pubmed-3k_steps](https://huggingface.co/Stancld/longt5-tglobal-large-16384-pubmed-3k_steps) on the [SumPubMed dataset](https://aclanthology.org/2021.acl-srw.30/). [Blaise-g/longt5_tglobal_large_scitldr](https://huggingface.co/Blaise-g/longt5_tglobal_large_scitldr) is a fine-tuned checkpoint of [Blaise-g/longt5_tglobal_large_sumpubmed](https://huggingface.co/Blaise-g/longt5_tglobal_large_sumpubmed) on the [Scitldr dataset](https://arxiv.org/abs/2004.15011). The goal was to create two models capable of handling the complex information contained in long biomedical documents and subsequently producing scientific summaries according to one of the two possible levels of conciseness: 1) A long explanatory synopsis that retains the majority of domain-specific language used in the original source text. 2)A one sentence long, TLDR style summary."
|
254 |
)
|