Spaces:

spacy
/

gradio_pipeline_visualizer

Runtime error

File size: 3,696 Bytes

bc565d4

import spacy
from spacy import displacy
import random
from spacy.tokens import Span
import gradio as gr

DEFAULT_MODEL = "en_core_web_sm"
DEFAULT_TEXT = "David Bowie moved to the US in 1974, initially staying in New York City before settling in Los Angeles."
DEFAULT_TOK_ATTR = ['idx', 'text', 'pos_', 'lemma_', 'shape_', 'dep_']
DEFAULT_ENTS = ['CARDINAL', 'DATE', 'EVENT', 'FAC', 'GPE', 'LANGUAGE', 'LAW', 'LOC', 'MONEY', 'NORP', 'ORDINAL', 'ORG', 'PERCENT', 'PERSON', 'PRODUCT', 'QUANTITY', 'TIME', 'WORK_OF_ART']

nlp = spacy.load("en_core_web_sm")
nlp2 = spacy.load("en_core_web_md")

def dependency(text, col_punct, col_phrase, compact):
  doc = nlp(text)
  options = {"compact": compact, "collapse_phrases": col_phrase, "collapse_punct": col_punct}
  html = displacy.render(doc, style="dep", options=options)
  return html

def entity(text, ents):
  doc = nlp(text)
  options = {"ents": ents}
  html = displacy.render(doc, style="ent", options=options)
  return html

def text(default):
  if default:
    return default

def token(text, attributes):
  data = []
  doc = nlp(text)
  for tok in doc:
    tok_data = []
    for attr in attributes:
      tok_data.append(getattr(tok, attr))
    data.append(tok_data)
  return data

def vectors(text):
  doc = nlp2(text)
  n_chunks = [chunk for chunk in doc.noun_chunks]
  words = [tok for tok in doc if not tok.is_stop and tok.pos_ not in ['PUNCT', "PROPN"]]
  str_list = n_chunks + words
  choice = random.choices(str_list, k=2)
  return round(choice[0].similarity(choice[1]), 2), choice[0].text, choice[1].text

def span(text, span1, span2, label1, label2):
  doc = nlp(text)
  idx1_1 = 0
  idx1_2 = 0
  idx2_1 = 0
  idx2_2 = 0

  for tok in doc:
    if span1[0] == tok.text:
      idx1_1 = tok.idx
    if span1[-1] == tok.text:
      idx1_2 = tok.idx
    if span2[0] == tok.text:
      idx2_1 = tok.idx
    if span2[-1] == tok.text:
      idx2_2 = tok.idx


  doc.spans["sc"] = [
      Span(doc, idx1_1, idx1_2, label1), 
      Span(doc, idx2_1, idx2_2, label2),
  ]

  html = displacy.render(doc, style="span")
  return html

list_chunks = [chunk.text for chunk in nlp(DEFAULT_TEXT).noun_chunks]

demo = gr.Blocks()

with demo:
    # gr.Markdown("Input text here!")
    text_input = gr.Textbox(value=DEFAULT_TEXT, interactive=True)
    with gr.Tabs():
        with gr.TabItem("Dependency"):
            col_punct = gr.Checkbox(label="Collapse Punctuation", value=True)
            col_phrase = gr.Checkbox(label="Collapse Phrases", value=True) 
            compact = gr.Checkbox(label="Compact", value=True)
            depen_output = gr.HTML()
            depen_button = gr.Button("Generate")
        with gr.TabItem("Entity"):
            entity_input = gr.CheckboxGroup(DEFAULT_ENTS, value=DEFAULT_ENTS)
            entity_output = gr.HTML()
            entity_button = gr.Button("Generate")
        with gr.TabItem("Tokens"):
            tok_input = gr.CheckboxGroup(DEFAULT_TOK_ATTR, value=DEFAULT_TOK_ATTR)
            tok_output = gr.Dataframe()
            tok_button = gr.Button("Generate")
        with gr.TabItem("Similarity"):
            sim_text1 = gr.Textbox(label="Chosen")
            sim_text2 = gr.Textbox(label="Chosen")
            sim_output = gr.Textbox(label="Similarity Score")
            sim_button = gr.Button("Generate")

    depen_button.click(dependency, inputs=[text_input, col_punct, col_phrase, compact], outputs=depen_output)
    entity_button.click(entity, inputs=[text_input, entity_input], outputs=entity_output)
    tok_button.click(token, inputs=[text_input, tok_input], outputs=tok_output)
    sim_button.click(vectors, inputs=[text_input], outputs=[sim_output, sim_text1, sim_text2])

demo.launch()