Spaces:
Running
Running
import json | |
from collections import defaultdict | |
import gradio as gr | |
import pandas as pd | |
from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification | |
tokenizer = AutoTokenizer.from_pretrained("d4data/biomedical-ner-all") | |
model = AutoModelForTokenClassification.from_pretrained("d4data/biomedical-ner-all") | |
EXAMPLE_MAP = {} | |
with open("examples.json", "r") as f: | |
example_json = json.load(f) | |
EXAMPLE_MAP = {x["text"]: x["label"] for x in example_json} | |
pipe = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple") | |
def group_by_entity(raw): | |
out = defaultdict(int) | |
for ent in raw: | |
out[ent["entity_group"]] += 1 | |
out["total"] = sum(out.values()) | |
return out | |
def ner(text): | |
raw = pipe(text) | |
ner_content = { | |
"text": text, | |
"entities": [ | |
{ | |
"entity": x["entity_group"], | |
"word": x["word"], | |
"score": x["score"], | |
"start": x["start"], | |
"end": x["end"], | |
} | |
for x in raw | |
], | |
} | |
grouped = group_by_entity(raw) | |
df = pd.DataFrame({"Entity": grouped.keys(), "Count": grouped.values()}) | |
label = EXAMPLE_MAP.get(text, None) | |
return (ner_content, grouped, label, df.hist()) | |
interface = gr.Interface( | |
ner, | |
inputs=gr.Textbox(label="Note text", value=""), | |
outputs=[ | |
gr.HighlightedText(label="NER", combine_adjacent=True), | |
gr.JSON(label="Entity Counts"), | |
gr.Label(label="Rating"), | |
"plot", | |
], | |
examples=list(EXAMPLE_MAP.keys()), | |
) | |
interface.launch() | |