Spaces:
Runtime error
Runtime error
Victoria Slocum
commited on
Commit
•
c0dee52
1
Parent(s):
09414a9
Feat:Spans
Browse files
app.py
CHANGED
@@ -7,72 +7,83 @@ import gradio as gr
|
|
7 |
DEFAULT_MODEL = "en_core_web_sm"
|
8 |
DEFAULT_TEXT = "David Bowie moved to the US in 1974, initially staying in New York City before settling in Los Angeles."
|
9 |
DEFAULT_TOK_ATTR = ['idx', 'text', 'pos_', 'lemma_', 'shape_', 'dep_']
|
10 |
-
DEFAULT_ENTS = ['CARDINAL', 'DATE', 'EVENT', 'FAC', 'GPE', 'LANGUAGE', 'LAW', 'LOC', 'MONEY',
|
|
|
11 |
|
12 |
nlp = spacy.load("en_core_web_sm")
|
13 |
nlp2 = spacy.load("en_core_web_md")
|
14 |
|
|
|
15 |
def dependency(text, col_punct, col_phrase, compact):
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
|
|
|
|
20 |
|
21 |
def entity(text, ents):
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
|
27 |
-
def text(default):
|
28 |
-
if default:
|
29 |
-
return default
|
30 |
|
31 |
-
def
|
32 |
-
|
33 |
-
|
34 |
-
for tok in doc:
|
35 |
-
tok_data = []
|
36 |
-
for attr in attributes:
|
37 |
-
tok_data.append(getattr(tok, attr))
|
38 |
-
data.append(tok_data)
|
39 |
-
return data
|
40 |
|
41 |
-
def vectors(text):
|
42 |
-
doc = nlp2(text)
|
43 |
-
n_chunks = [chunk for chunk in doc.noun_chunks]
|
44 |
-
words = [tok for tok in doc if not tok.is_stop and tok.pos_ not in ['PUNCT', "PROPN"]]
|
45 |
-
str_list = n_chunks + words
|
46 |
-
choice = random.choices(str_list, k=2)
|
47 |
-
return round(choice[0].similarity(choice[1]), 2), choice[0].text, choice[1].text
|
48 |
|
49 |
-
def
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
|
|
|
|
|
|
55 |
|
56 |
-
for tok in doc:
|
57 |
-
if span1[0] == tok.text:
|
58 |
-
idx1_1 = tok.idx
|
59 |
-
if span1[-1] == tok.text:
|
60 |
-
idx1_2 = tok.idx
|
61 |
-
if span2[0] == tok.text:
|
62 |
-
idx2_1 = tok.idx
|
63 |
-
if span2[-1] == tok.text:
|
64 |
-
idx2_2 = tok.idx
|
65 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
|
67 |
-
doc.spans["sc"] = [
|
68 |
-
Span(doc, idx1_1, idx1_2, label1),
|
69 |
-
Span(doc, idx2_1, idx2_2, label2),
|
70 |
-
]
|
71 |
|
72 |
-
|
73 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
|
75 |
-
list_chunks = [chunk.text for chunk in nlp(DEFAULT_TEXT).noun_chunks]
|
76 |
|
77 |
demo = gr.Blocks()
|
78 |
|
@@ -82,7 +93,7 @@ with demo:
|
|
82 |
with gr.Tabs():
|
83 |
with gr.TabItem("Dependency"):
|
84 |
col_punct = gr.Checkbox(label="Collapse Punctuation", value=True)
|
85 |
-
col_phrase = gr.Checkbox(label="Collapse Phrases", value=True)
|
86 |
compact = gr.Checkbox(label="Compact", value=True)
|
87 |
depen_output = gr.HTML()
|
88 |
depen_button = gr.Button("Generate")
|
@@ -91,18 +102,31 @@ with demo:
|
|
91 |
entity_output = gr.HTML()
|
92 |
entity_button = gr.Button("Generate")
|
93 |
with gr.TabItem("Tokens"):
|
94 |
-
tok_input = gr.CheckboxGroup(
|
|
|
95 |
tok_output = gr.Dataframe()
|
96 |
tok_button = gr.Button("Generate")
|
97 |
with gr.TabItem("Similarity"):
|
98 |
-
sim_text1 = gr.Textbox(label="Chosen")
|
99 |
-
sim_text2 = gr.Textbox(label="Chosen")
|
100 |
-
sim_output = gr.Textbox(label="Similarity Score")
|
101 |
sim_button = gr.Button("Generate")
|
102 |
-
|
103 |
-
|
104 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
tok_button.click(token, inputs=[text_input, tok_input], outputs=tok_output)
|
106 |
-
sim_button.click(vectors, inputs=[text_input], outputs=[
|
|
|
|
|
|
|
107 |
|
108 |
-
demo.launch()
|
|
|
7 |
DEFAULT_MODEL = "en_core_web_sm"
|
8 |
DEFAULT_TEXT = "David Bowie moved to the US in 1974, initially staying in New York City before settling in Los Angeles."
|
9 |
DEFAULT_TOK_ATTR = ['idx', 'text', 'pos_', 'lemma_', 'shape_', 'dep_']
|
10 |
+
DEFAULT_ENTS = ['CARDINAL', 'DATE', 'EVENT', 'FAC', 'GPE', 'LANGUAGE', 'LAW', 'LOC', 'MONEY',
|
11 |
+
'NORP', 'ORDINAL', 'ORG', 'PERCENT', 'PERSON', 'PRODUCT', 'QUANTITY', 'TIME', 'WORK_OF_ART']
|
12 |
|
13 |
nlp = spacy.load("en_core_web_sm")
|
14 |
nlp2 = spacy.load("en_core_web_md")
|
15 |
|
16 |
+
|
17 |
def dependency(text, col_punct, col_phrase, compact):
|
18 |
+
doc = nlp(text)
|
19 |
+
options = {"compact": compact, "collapse_phrases": col_phrase,
|
20 |
+
"collapse_punct": col_punct}
|
21 |
+
html = displacy.render(doc, style="dep", options=options)
|
22 |
+
return html
|
23 |
+
|
24 |
|
25 |
def entity(text, ents):
|
26 |
+
doc = nlp(text)
|
27 |
+
options = {"ents": ents}
|
28 |
+
html = displacy.render(doc, style="ent", options=options)
|
29 |
+
return html
|
30 |
|
|
|
|
|
|
|
31 |
|
32 |
+
def text(default):
|
33 |
+
if default:
|
34 |
+
return default
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
|
37 |
+
def token(text, attributes):
|
38 |
+
data = []
|
39 |
+
doc = nlp(text)
|
40 |
+
for tok in doc:
|
41 |
+
tok_data = []
|
42 |
+
for attr in attributes:
|
43 |
+
tok_data.append(getattr(tok, attr))
|
44 |
+
data.append(tok_data)
|
45 |
+
return data
|
46 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
|
48 |
+
def vectors(text):
|
49 |
+
doc = nlp2(text)
|
50 |
+
n_chunks = [chunk for chunk in doc.noun_chunks]
|
51 |
+
words = [tok for tok in doc if not tok.is_stop and tok.pos_ not in [
|
52 |
+
'PUNCT', "PROPN"]]
|
53 |
+
str_list = n_chunks + words
|
54 |
+
choice = random.choices(str_list, k=2)
|
55 |
+
return round(choice[0].similarity(choice[1]), 2), choice[0].text, choice[1].text
|
56 |
|
|
|
|
|
|
|
|
|
57 |
|
58 |
+
def span(text, span1, span2, label1, label2):
|
59 |
+
doc = nlp(text)
|
60 |
+
idx1_1 = 0
|
61 |
+
idx1_2 = 0
|
62 |
+
idx2_1 = 0
|
63 |
+
idx2_2 = 0
|
64 |
+
|
65 |
+
span1 = span1.split(" ")
|
66 |
+
span2 = span2.split(" ")
|
67 |
+
|
68 |
+
for i in range(len(list(doc))):
|
69 |
+
tok = list(doc)[i]
|
70 |
+
if span1[0] == tok.text:
|
71 |
+
idx1_1 = i
|
72 |
+
if span1[-1] == tok.text:
|
73 |
+
idx1_2 = i + 1
|
74 |
+
if span2[0] == tok.text:
|
75 |
+
idx2_1 = i
|
76 |
+
if span2[-1] == tok.text:
|
77 |
+
idx2_2 = i + 1
|
78 |
+
|
79 |
+
doc.spans["sc"] = [
|
80 |
+
Span(doc, idx1_1, idx1_2, label1),
|
81 |
+
Span(doc, idx2_1, idx2_2, label2),
|
82 |
+
]
|
83 |
+
|
84 |
+
html = displacy.render(doc, style="span")
|
85 |
+
return html
|
86 |
|
|
|
87 |
|
88 |
demo = gr.Blocks()
|
89 |
|
|
|
93 |
with gr.Tabs():
|
94 |
with gr.TabItem("Dependency"):
|
95 |
col_punct = gr.Checkbox(label="Collapse Punctuation", value=True)
|
96 |
+
col_phrase = gr.Checkbox(label="Collapse Phrases", value=True)
|
97 |
compact = gr.Checkbox(label="Compact", value=True)
|
98 |
depen_output = gr.HTML()
|
99 |
depen_button = gr.Button("Generate")
|
|
|
102 |
entity_output = gr.HTML()
|
103 |
entity_button = gr.Button("Generate")
|
104 |
with gr.TabItem("Tokens"):
|
105 |
+
tok_input = gr.CheckboxGroup(
|
106 |
+
DEFAULT_TOK_ATTR, value=DEFAULT_TOK_ATTR)
|
107 |
tok_output = gr.Dataframe()
|
108 |
tok_button = gr.Button("Generate")
|
109 |
with gr.TabItem("Similarity"):
|
110 |
+
sim_text1 = gr.Textbox(value="David Bowie", label="Chosen")
|
111 |
+
sim_text2 = gr.Textbox(value="the US", label="Chosen")
|
112 |
+
sim_output = gr.Textbox(value="0.09", label="Similarity Score")
|
113 |
sim_button = gr.Button("Generate")
|
114 |
+
with gr.TabItem("Spans"):
|
115 |
+
span1 = gr.Textbox(value="David Bowie", label="Span 1")
|
116 |
+
label1 = gr.Textbox(value="Full Name", label="Label for Span 1")
|
117 |
+
span2 = gr.Textbox(value="David", label="Span 2")
|
118 |
+
label2 = gr.Textbox(value="First Name", label="Label for Span 2")
|
119 |
+
span_output = gr.HTML()
|
120 |
+
span_button = gr.Button("Generate")
|
121 |
+
|
122 |
+
depen_button.click(dependency, inputs=[
|
123 |
+
text_input, col_punct, col_phrase, compact], outputs=depen_output)
|
124 |
+
entity_button.click(
|
125 |
+
entity, inputs=[text_input, entity_input], outputs=entity_output)
|
126 |
tok_button.click(token, inputs=[text_input, tok_input], outputs=tok_output)
|
127 |
+
sim_button.click(vectors, inputs=[text_input], outputs=[
|
128 |
+
sim_output, sim_text1, sim_text2])
|
129 |
+
span_button.click(
|
130 |
+
span, inputs=[text_input, span1, span2, label1, label2], outputs=span_output)
|
131 |
|
132 |
+
demo.launch()
|