Spaces:
Running
Running
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
""" | |
`spacyfishing` entity linking to Wikidata | |
<https://github.com/Lucaterre/spacyfishing> | |
""" | |
from icecream import ic # pylint: disable=E0401 | |
import spacy # pylint: disable=E0401 | |
SRC_TEXT: str = """ | |
Werner Herzog is a remarkable filmmaker and an intellectual originally from Germany, the son of Dietrich Herzog, although they never spoke after the war. | |
""" | |
nlp = spacy.load( | |
"en_core_web_sm", | |
exclude = [ "ner" ], | |
) | |
nlp.add_pipe( | |
"span_marker", | |
config = { | |
"model": "tomaarsen/span-marker-roberta-large-ontonotes5", | |
}, | |
) | |
nlp.add_pipe( | |
"entityfishing", | |
config = { | |
"api_ef_base": "https://cloud.science-miner.com/nerd/service", | |
"extra_info": True, | |
"filter_statements": [ ], | |
}, | |
) | |
nlp.add_pipe( | |
"merge_entities", | |
) | |
doc = nlp(SRC_TEXT.strip()) | |
for ent in doc.ents: | |
ic( | |
ent.text, | |
ent.label_, | |
ent._.nerd_score, | |
ent._.url_wikidata, | |
ent._.description, | |
ent._.other_ids, | |
) | |