note-ner-demo / app.py
andrewgleave's picture
WIP
24d9d43
raw
history blame
1.62 kB
import json
from collections import defaultdict
import gradio as gr
import pandas as pd
from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification
tokenizer = AutoTokenizer.from_pretrained("d4data/biomedical-ner-all")
model = AutoModelForTokenClassification.from_pretrained("d4data/biomedical-ner-all")
EXAMPLE_MAP = {}
with open("examples.json", "r") as f:
example_json = json.load(f)
EXAMPLE_MAP = {x["text"]: x["label"] for x in example_json}
pipe = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple")
def group_by_entity(raw):
out = defaultdict(int)
for ent in raw:
out[ent["entity_group"]] += 1
out["total"] = sum(out.values())
return out
def ner(text):
raw = pipe(text)
ner_content = {
"text": text,
"entities": [
{
"entity": x["entity_group"],
"word": x["word"],
"score": x["score"],
"start": x["start"],
"end": x["end"],
}
for x in raw
],
}
grouped = group_by_entity(raw)
df = pd.DataFrame({"Entity": grouped.keys(), "Count": grouped.values()})
label = EXAMPLE_MAP.get(text, None)
return (ner_content, grouped, label, df.hist())
interface = gr.Interface(
ner,
inputs=gr.Textbox(label="Note text", value=""),
outputs=[
gr.HighlightedText(label="NER", combine_adjacent=True),
gr.JSON(label="Entity Counts"),
gr.Label(label="Rating"),
"plot",
],
examples=list(EXAMPLE_MAP.keys()),
)
interface.launch()