# This code is mainly taken from
# https://github.com/explosion/spaCy/blob/master/spacy/displacy/templates.py, and from
# https://github.com/explosion/spaCy/blob/master/spacy/displacy/render.py.
# Setting explicit height and max-width: none on the SVG is required for
# Jupyter to render it properly in a cell
TPL_DEP_SVG = """
"""
TPL_DEP_WORDS = """
{text}{tag}
"""
TPL_DEP_WORDS_LEMMA = """
{text}{lemma}{tag}
"""
TPL_DEP_ARCS = """
{label}
"""
TPL_FIGURE = """
"""
TPL_TITLE = """
{title}
"""
TPL_ENTS = """
{content}
"""
TPL_ENT = """
{text}
{label}
"""
TPL_ENT_RTL = """
{text}
{label}
"""
TPL_PAGE = """
displaCy
{content}
"""
DEFAULT_LANG = "en"
DEFAULT_DIR = "ltr"
def minify_html(html):
"""Perform a template-specific, rudimentary HTML minification for displaCy.
Disclaimer: NOT a general-purpose solution, only removes indentation and
newlines.
html (unicode): Markup to minify.
RETURNS (unicode): "Minified" HTML.
"""
return html.strip().replace(" ", "").replace("\n", "")
def escape_html(text):
"""Replace <, >, &, " with their HTML encoded representation. Intended to prevent HTML errors
in rendered displaCy markup.
text (unicode): The original text. RETURNS (unicode): Equivalent text to be safely used within
HTML.
"""
text = text.replace("&", "&")
text = text.replace("<", "<")
text = text.replace(">", ">")
text = text.replace('"', """)
return text
class EntityRenderer(object):
"""Render named entities as HTML."""
style = "ent"
def __init__(self, options={}):
"""Initialise dependency renderer.
options (dict): Visualiser-specific options (colors, ents)
"""
colors = {
"ORG": "#7aecec",
"PRODUCT": "#bfeeb7",
"GPE": "#feca74",
"LOC": "#ff9561",
"PERSON": "#aa9cfc",
"NORP": "#c887fb",
"FACILITY": "#9cc9cc",
"EVENT": "#ffeb80",
"LAW": "#ff8197",
"LANGUAGE": "#ff8197",
"WORK_OF_ART": "#f0d0ff",
"DATE": "#bfe1d9",
"TIME": "#bfe1d9",
"MONEY": "#e4e7d2",
"QUANTITY": "#e4e7d2",
"ORDINAL": "#e4e7d2",
"CARDINAL": "#e4e7d2",
"PERCENT": "#e4e7d2",
}
# user_colors = registry.displacy_colors.get_all()
# for user_color in user_colors.values():
# colors.update(user_color)
colors.update(options.get("colors", {}))
self.default_color = "#ddd"
self.colors = colors
self.ents = options.get("ents", None)
self.direction = DEFAULT_DIR
self.lang = DEFAULT_LANG
template = options.get("template")
if template:
self.ent_template = template
else:
if self.direction == "rtl":
self.ent_template = TPL_ENT_RTL
else:
self.ent_template = TPL_ENT
def render(self, parsed, page=False, minify=False):
"""Render complete markup.
parsed (list): Dependency parses to render. page (bool): Render parses wrapped as full HTML
page. minify (bool): Minify HTML markup. RETURNS (unicode): Rendered HTML markup.
"""
rendered = []
for i, p in enumerate(parsed):
if i == 0:
settings = p.get("settings", {})
self.direction = settings.get("direction", DEFAULT_DIR)
self.lang = settings.get("lang", DEFAULT_LANG)
rendered.append(self.render_ents(p["text"], p["ents"], p.get("title")))
if page:
docs = "".join([TPL_FIGURE.format(content=doc) for doc in rendered])
markup = TPL_PAGE.format(content=docs, lang=self.lang, dir=self.direction)
else:
markup = "".join(rendered)
if minify:
return minify_html(markup)
return markup
def render_ents(self, text, spans, title):
"""Render entities in text.
text (unicode): Original text. spans (list): Individual entity spans and their start, end
and label. title (unicode or None): Document title set in Doc.user_data['title'].
"""
markup = ""
offset = 0
for span in spans:
label = span["label"]
start = span["start"]
end = span["end"]
additional_params = span.get("params", {})
entity = escape_html(text[start:end])
fragments = text[offset:start].split("\n")
for i, fragment in enumerate(fragments):
markup += escape_html(fragment)
if len(fragments) > 1 and i != len(fragments) - 1:
markup += ""
if self.ents is None or label.upper() in self.ents:
color = self.colors.get(label.upper(), self.default_color)
ent_settings = {"label": label, "text": entity, "bg": color}
ent_settings.update(additional_params)
markup += self.ent_template.format(**ent_settings)
else:
markup += entity
offset = end
fragments = text[offset:].split("\n")
for i, fragment in enumerate(fragments):
markup += escape_html(fragment)
if len(fragments) > 1 and i != len(fragments) - 1:
markup += ""
markup = TPL_ENTS.format(content=markup, dir=self.direction)
if title:
markup = TPL_TITLE.format(title=title) + markup
return markup