# This code is mainly taken from # https://github.com/explosion/spaCy/blob/master/spacy/displacy/templates.py, and from # https://github.com/explosion/spaCy/blob/master/spacy/displacy/render.py. # Setting explicit height and max-width: none on the SVG is required for # Jupyter to render it properly in a cell TPL_DEP_SVG = """ {content} """ TPL_DEP_WORDS = """ {text} {tag} """ TPL_DEP_WORDS_LEMMA = """ {text} {lemma} {tag} """ TPL_DEP_ARCS = """ {label} """ TPL_FIGURE = """
{content}
""" TPL_TITLE = """

{title}

""" TPL_ENTS = """
{content}
""" TPL_ENT = """ {text} {label} """ TPL_ENT_RTL = """ {text} {label} """ TPL_PAGE = """ displaCy {content} """ DEFAULT_LANG = "en" DEFAULT_DIR = "ltr" def minify_html(html): """Perform a template-specific, rudimentary HTML minification for displaCy. Disclaimer: NOT a general-purpose solution, only removes indentation and newlines. html (unicode): Markup to minify. RETURNS (unicode): "Minified" HTML. """ return html.strip().replace(" ", "").replace("\n", "") def escape_html(text): """Replace <, >, &, " with their HTML encoded representation. Intended to prevent HTML errors in rendered displaCy markup. text (unicode): The original text. RETURNS (unicode): Equivalent text to be safely used within HTML. """ text = text.replace("&", "&") text = text.replace("<", "<") text = text.replace(">", ">") text = text.replace('"', """) return text class EntityRenderer(object): """Render named entities as HTML.""" style = "ent" def __init__(self, options={}): """Initialise dependency renderer. options (dict): Visualiser-specific options (colors, ents) """ colors = { "ORG": "#7aecec", "PRODUCT": "#bfeeb7", "GPE": "#feca74", "LOC": "#ff9561", "PERSON": "#aa9cfc", "NORP": "#c887fb", "FACILITY": "#9cc9cc", "EVENT": "#ffeb80", "LAW": "#ff8197", "LANGUAGE": "#ff8197", "WORK_OF_ART": "#f0d0ff", "DATE": "#bfe1d9", "TIME": "#bfe1d9", "MONEY": "#e4e7d2", "QUANTITY": "#e4e7d2", "ORDINAL": "#e4e7d2", "CARDINAL": "#e4e7d2", "PERCENT": "#e4e7d2", } # user_colors = registry.displacy_colors.get_all() # for user_color in user_colors.values(): # colors.update(user_color) colors.update(options.get("colors", {})) self.default_color = "#ddd" self.colors = colors self.ents = options.get("ents", None) self.direction = DEFAULT_DIR self.lang = DEFAULT_LANG template = options.get("template") if template: self.ent_template = template else: if self.direction == "rtl": self.ent_template = TPL_ENT_RTL else: self.ent_template = TPL_ENT def render(self, parsed, page=False, minify=False): """Render complete markup. parsed (list): Dependency parses to render. page (bool): Render parses wrapped as full HTML page. minify (bool): Minify HTML markup. RETURNS (unicode): Rendered HTML markup. """ rendered = [] for i, p in enumerate(parsed): if i == 0: settings = p.get("settings", {}) self.direction = settings.get("direction", DEFAULT_DIR) self.lang = settings.get("lang", DEFAULT_LANG) rendered.append(self.render_ents(p["text"], p["ents"], p.get("title"))) if page: docs = "".join([TPL_FIGURE.format(content=doc) for doc in rendered]) markup = TPL_PAGE.format(content=docs, lang=self.lang, dir=self.direction) else: markup = "".join(rendered) if minify: return minify_html(markup) return markup def render_ents(self, text, spans, title): """Render entities in text. text (unicode): Original text. spans (list): Individual entity spans and their start, end and label. title (unicode or None): Document title set in Doc.user_data['title']. """ markup = "" offset = 0 for span in spans: label = span["label"] start = span["start"] end = span["end"] additional_params = span.get("params", {}) entity = escape_html(text[start:end]) fragments = text[offset:start].split("\n") for i, fragment in enumerate(fragments): markup += escape_html(fragment) if len(fragments) > 1 and i != len(fragments) - 1: markup += "
" if self.ents is None or label.upper() in self.ents: color = self.colors.get(label.upper(), self.default_color) ent_settings = {"label": label, "text": entity, "bg": color} ent_settings.update(additional_params) markup += self.ent_template.format(**ent_settings) else: markup += entity offset = end fragments = text[offset:].split("\n") for i, fragment in enumerate(fragments): markup += escape_html(fragment) if len(fragments) > 1 and i != len(fragments) - 1: markup += "
" markup = TPL_ENTS.format(content=markup, dir=self.direction) if title: markup = TPL_TITLE.format(title=title) + markup return markup