ArneBinder commited on
Commit
a8529ac
1 Parent(s): 16d7871

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -25
app.py CHANGED
@@ -155,7 +155,7 @@ def inject_relation_data(html: str, sorted_entities, binary_relations) -> str:
155
  return str(soup)
156
 
157
 
158
- def predict(text, render_as, render_kwargs_json):
159
  document = TextDocumentWithLabeledSpansBinaryRelationsAndLabeledPartitions(text=text)
160
 
161
  # add single partition from the whole text (the model only considers text in partitions)
@@ -164,6 +164,15 @@ def predict(text, render_as, render_kwargs_json):
164
  # execute prediction pipeline
165
  pipeline(document)
166
 
 
 
 
 
 
 
 
 
 
167
  render_kwargs = json.loads(render_kwargs_json)
168
  if render_as == "Pretty Table":
169
  html = render_pretty_table(document, **render_kwargs)
@@ -175,12 +184,22 @@ def predict(text, render_as, render_kwargs_json):
175
  return html
176
 
177
 
 
 
 
 
 
 
 
 
178
  if __name__ == "__main__":
179
 
180
  model_name_or_path = "ArneBinder/sam-pointer-bart-base-v0.3"
181
  # local path
182
  # model_name_or_path = "models/dataset-sciarg/task-ner_re/v0.3/2024-03-01_18-25-32"
183
 
 
 
184
  pipeline = AutoPipeline.from_pretrained(model_name_or_path, device=-1, num_workers=0)
185
  re_pipeline = AutoPipeline.from_pretrained(
186
  model_name_or_path,
@@ -201,27 +220,48 @@ if __name__ == "__main__":
201
  },
202
  }
203
 
204
- iface = gr.Interface(
205
- fn=predict,
206
- inputs=[
207
- gr.Textbox(
208
- lines=20,
209
- value="Scholarly Argumentation Mining (SAM) has recently gained attention due to its potential to help scholars with the rapid growth of published scientific literature. It comprises two subtasks: argumentative discourse unit recognition (ADUR) and argumentative relation extraction (ARE), both of which are challenging since they require e.g. the integration of domain knowledge, the detection of implicit statements, and the disambiguation of argument structure. While previous work focused on dataset construction and baseline methods for specific document sections, such as abstract or results, full-text scholarly argumentation mining has seen little progress. In this work, we introduce a sequential pipeline model combining ADUR and ARE for full-text SAM, and provide a first analysis of the performance of pretrained language models (PLMs) on both subtasks. We establish a new SotA for ADUR on the Sci-Arg corpus, outperforming the previous best reported result by a large margin (+7% F1). We also present the first results for ARE, and thus for the full AM pipeline, on this benchmark dataset. Our detailed error analysis reveals that non-contiguous ADUs as well as the interpretation of discourse connectors pose major challenges and that data annotation needs to be more consistent.",
210
- ),
211
- ],
212
- additional_inputs=[
213
- gr.Dropdown(
214
- label="Render as",
215
- choices=["Pretty Table", "spaCy"],
216
- value="spaCy",
217
- ),
218
- gr.Textbox(
219
- label="Render Arguments",
220
- lines=5,
221
- value=json.dumps(default_render_kwargs, indent=2),
222
- ),
223
- ],
224
- additional_inputs_accordion=gr.Accordion(label="Render Options", open=False),
225
- outputs=["html"],
226
- )
227
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
  return str(soup)
156
 
157
 
158
+ def predict(text: str) -> Tuple[dict, str]:
159
  document = TextDocumentWithLabeledSpansBinaryRelationsAndLabeledPartitions(text=text)
160
 
161
  # add single partition from the whole text (the model only considers text in partitions)
 
164
  # execute prediction pipeline
165
  pipeline(document)
166
 
167
+ document_dict = document.asdict()
168
+ return document_dict, json.dumps(document_dict)
169
+
170
+
171
+ def render(document_txt: str, render_as: str, render_kwargs_json: str) -> str:
172
+ document_dict = json.loads(document_txt)
173
+ document = TextDocumentWithLabeledSpansBinaryRelationsAndLabeledPartitions.fromdict(
174
+ document_dict
175
+ )
176
  render_kwargs = json.loads(render_kwargs_json)
177
  if render_as == "Pretty Table":
178
  html = render_pretty_table(document, **render_kwargs)
 
184
  return html
185
 
186
 
187
+ def open_accordion():
188
+ return gr.Accordion(open=True)
189
+
190
+
191
+ def close_accordion():
192
+ return gr.Accordion(open=False)
193
+
194
+
195
  if __name__ == "__main__":
196
 
197
  model_name_or_path = "ArneBinder/sam-pointer-bart-base-v0.3"
198
  # local path
199
  # model_name_or_path = "models/dataset-sciarg/task-ner_re/v0.3/2024-03-01_18-25-32"
200
 
201
+ example_text = "Scholarly Argumentation Mining (SAM) has recently gained attention due to its potential to help scholars with the rapid growth of published scientific literature. It comprises two subtasks: argumentative discourse unit recognition (ADUR) and argumentative relation extraction (ARE), both of which are challenging since they require e.g. the integration of domain knowledge, the detection of implicit statements, and the disambiguation of argument structure. While previous work focused on dataset construction and baseline methods for specific document sections, such as abstract or results, full-text scholarly argumentation mining has seen little progress. In this work, we introduce a sequential pipeline model combining ADUR and ARE for full-text SAM, and provide a first analysis of the performance of pretrained language models (PLMs) on both subtasks. We establish a new SotA for ADUR on the Sci-Arg corpus, outperforming the previous best reported result by a large margin (+7% F1). We also present the first results for ARE, and thus for the full AM pipeline, on this benchmark dataset. Our detailed error analysis reveals that non-contiguous ADUs as well as the interpretation of discourse connectors pose major challenges and that data annotation needs to be more consistent."
202
+
203
  pipeline = AutoPipeline.from_pretrained(model_name_or_path, device=-1, num_workers=0)
204
  re_pipeline = AutoPipeline.from_pretrained(
205
  model_name_or_path,
 
220
  },
221
  }
222
 
223
+ with gr.Blocks() as demo:
224
+ with gr.Row():
225
+ with gr.Column(scale=1):
226
+ text = gr.Textbox(
227
+ label="Input Text",
228
+ lines=20,
229
+ value=example_text,
230
+ )
231
+
232
+ predict_btn = gr.Button("Predict")
233
+
234
+ output_txt = gr.Textbox(visible=False)
235
+
236
+ with gr.Column(scale=1):
237
+
238
+ with gr.Accordion("See plain result ...", open=False) as output_accordion:
239
+ output_json = gr.JSON(label="Model Output")
240
+
241
+ with gr.Accordion("Render Options", open=False):
242
+ render_as = gr.Dropdown(
243
+ label="Render as",
244
+ choices=["Pretty Table", "spaCy"],
245
+ value="spaCy",
246
+ )
247
+ render_kwargs = gr.Textbox(
248
+ label="Render Arguments",
249
+ lines=5,
250
+ value=json.dumps(default_render_kwargs, indent=2),
251
+ )
252
+ render_btn = gr.Button("Re-render")
253
+
254
+ rendered_output = gr.HTML(label="Rendered Output")
255
+
256
+ render_button_kwargs = dict(
257
+ fn=render, inputs=[output_txt, render_as, render_kwargs], outputs=rendered_output
258
+ )
259
+ predict_btn.click(open_accordion, inputs=[], outputs=[output_accordion]).then(
260
+ fn=predict, inputs=text, outputs=[output_json, output_txt], api_name="predict"
261
+ ).success(**render_button_kwargs).success(
262
+ close_accordion, inputs=[], outputs=[output_accordion]
263
+ )
264
+ render_btn.click(**render_button_kwargs, api_name="render")
265
+
266
+ demo.launch()
267
+