Spaces:
Runtime error
Runtime error
File size: 3,696 Bytes
bc565d4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
import spacy
from spacy import displacy
import random
from spacy.tokens import Span
import gradio as gr
DEFAULT_MODEL = "en_core_web_sm"
DEFAULT_TEXT = "David Bowie moved to the US in 1974, initially staying in New York City before settling in Los Angeles."
DEFAULT_TOK_ATTR = ['idx', 'text', 'pos_', 'lemma_', 'shape_', 'dep_']
DEFAULT_ENTS = ['CARDINAL', 'DATE', 'EVENT', 'FAC', 'GPE', 'LANGUAGE', 'LAW', 'LOC', 'MONEY', 'NORP', 'ORDINAL', 'ORG', 'PERCENT', 'PERSON', 'PRODUCT', 'QUANTITY', 'TIME', 'WORK_OF_ART']
nlp = spacy.load("en_core_web_sm")
nlp2 = spacy.load("en_core_web_md")
def dependency(text, col_punct, col_phrase, compact):
doc = nlp(text)
options = {"compact": compact, "collapse_phrases": col_phrase, "collapse_punct": col_punct}
html = displacy.render(doc, style="dep", options=options)
return html
def entity(text, ents):
doc = nlp(text)
options = {"ents": ents}
html = displacy.render(doc, style="ent", options=options)
return html
def text(default):
if default:
return default
def token(text, attributes):
data = []
doc = nlp(text)
for tok in doc:
tok_data = []
for attr in attributes:
tok_data.append(getattr(tok, attr))
data.append(tok_data)
return data
def vectors(text):
doc = nlp2(text)
n_chunks = [chunk for chunk in doc.noun_chunks]
words = [tok for tok in doc if not tok.is_stop and tok.pos_ not in ['PUNCT', "PROPN"]]
str_list = n_chunks + words
choice = random.choices(str_list, k=2)
return round(choice[0].similarity(choice[1]), 2), choice[0].text, choice[1].text
def span(text, span1, span2, label1, label2):
doc = nlp(text)
idx1_1 = 0
idx1_2 = 0
idx2_1 = 0
idx2_2 = 0
for tok in doc:
if span1[0] == tok.text:
idx1_1 = tok.idx
if span1[-1] == tok.text:
idx1_2 = tok.idx
if span2[0] == tok.text:
idx2_1 = tok.idx
if span2[-1] == tok.text:
idx2_2 = tok.idx
doc.spans["sc"] = [
Span(doc, idx1_1, idx1_2, label1),
Span(doc, idx2_1, idx2_2, label2),
]
html = displacy.render(doc, style="span")
return html
list_chunks = [chunk.text for chunk in nlp(DEFAULT_TEXT).noun_chunks]
demo = gr.Blocks()
with demo:
# gr.Markdown("Input text here!")
text_input = gr.Textbox(value=DEFAULT_TEXT, interactive=True)
with gr.Tabs():
with gr.TabItem("Dependency"):
col_punct = gr.Checkbox(label="Collapse Punctuation", value=True)
col_phrase = gr.Checkbox(label="Collapse Phrases", value=True)
compact = gr.Checkbox(label="Compact", value=True)
depen_output = gr.HTML()
depen_button = gr.Button("Generate")
with gr.TabItem("Entity"):
entity_input = gr.CheckboxGroup(DEFAULT_ENTS, value=DEFAULT_ENTS)
entity_output = gr.HTML()
entity_button = gr.Button("Generate")
with gr.TabItem("Tokens"):
tok_input = gr.CheckboxGroup(DEFAULT_TOK_ATTR, value=DEFAULT_TOK_ATTR)
tok_output = gr.Dataframe()
tok_button = gr.Button("Generate")
with gr.TabItem("Similarity"):
sim_text1 = gr.Textbox(label="Chosen")
sim_text2 = gr.Textbox(label="Chosen")
sim_output = gr.Textbox(label="Similarity Score")
sim_button = gr.Button("Generate")
depen_button.click(dependency, inputs=[text_input, col_punct, col_phrase, compact], outputs=depen_output)
entity_button.click(entity, inputs=[text_input, entity_input], outputs=entity_output)
tok_button.click(token, inputs=[text_input, tok_input], outputs=tok_output)
sim_button.click(vectors, inputs=[text_input], outputs=[sim_output, sim_text1, sim_text2])
demo.launch() |