Spaces:
Runtime error
Runtime error
victorialslocum
commited on
Commit
•
cfe3fe5
1
Parent(s):
d56f03c
add comments
Browse files
app.py
CHANGED
@@ -22,28 +22,30 @@ texts = {"en": DEFAULT_TEXT, "ca": "Apple està buscant comprar una startup del
|
|
22 |
button_css = "float: right; --tw-border-opacity: 1; border-color: rgb(229 231 235 / var(--tw-border-opacity)); --tw-gradient-from: rgb(243 244 246 / 0.7); --tw-gradient-stops: var(--tw-gradient-from), var(--tw-gradient-to, rgb(243 244 246 / 0)); --tw-gradient-to: rgb(229 231 235 / 0.8); --tw-text-opacity: 1; color: rgb(55 65 81 / var(--tw-text-opacity)); border-width: 1px; --tw-bg-opacity: 1; background-color: rgb(255 255 255 / var(--tw-bg-opacity)); background-image: linear-gradient(to bottom right, var(--tw-gradient-stops)); display: inline-flex; flex: 1 1 0%; align-items: center; justify-content: center; --tw-shadow: 0 1px 2px 0 rgb(0 0 0 / 0.05); --tw-shadow-colored: 0 1px 2px 0 var(--tw-shadow-color); box-shadow: var(--tw-ring-offset-shadow, 0 0 #0000), var(--tw-ring-shadow, 0 0 #0000), var(--tw-shadow); -webkit-appearance: button; border-radius: 0.5rem; padding-top: 0.5rem; padding-bottom: 0.5rem; padding-left: 1rem; padding-right: 1rem; font-size: 1rem; line-height: 1.5rem; font-weight: 600;"
|
23 |
NOUN_ATTR = ['text', 'root.text', 'root.dep_', 'root.head.text']
|
24 |
|
|
|
25 |
def get_all_models():
|
26 |
with open("requirements.txt") as f:
|
27 |
content = f.readlines()
|
28 |
models = []
|
29 |
for line in content:
|
30 |
if "huggingface.co" in line:
|
|
|
31 |
model = "_".join(line.split("/")[4].split("_")[:3])
|
32 |
if model not in models:
|
33 |
models.append(model)
|
34 |
return models
|
35 |
|
36 |
-
|
37 |
models = get_all_models()
|
38 |
|
39 |
-
|
40 |
def download_svg(svg):
|
41 |
encode = base64.b64encode(bytes(svg, 'utf-8'))
|
42 |
img = 'data:image/svg+xml;base64,' + str(encode)[2:-1]
|
43 |
html = f'<a download="displacy.svg" href="{img}" style="{button_css}">Download as SVG</a>'
|
44 |
return html
|
45 |
|
46 |
-
|
|
|
47 |
def dependency(text, col_punct, col_phrase, compact, bg, font, model):
|
48 |
model_name = model + "_sm"
|
49 |
nlp = spacy.load(model_name)
|
@@ -51,10 +53,10 @@ def dependency(text, col_punct, col_phrase, compact, bg, font, model):
|
|
51 |
options = {"compact": compact, "collapse_phrases": col_phrase,
|
52 |
"collapse_punct": col_punct, "bg": bg, "color": font}
|
53 |
svg = displacy.render(doc, style="dep", options=options)
|
54 |
-
download = download_svg(svg)
|
55 |
return svg, download, model_name
|
56 |
|
57 |
-
|
58 |
def entity(text, ents, model):
|
59 |
model_name = model + "_sm"
|
60 |
nlp = spacy.load(model_name)
|
@@ -63,7 +65,7 @@ def entity(text, ents, model):
|
|
63 |
svg = displacy.render(doc, style="ent", options=options)
|
64 |
return svg, model_name
|
65 |
|
66 |
-
|
67 |
def token(text, attributes, model):
|
68 |
model_name = model + "_sm"
|
69 |
nlp = spacy.load(model_name)
|
@@ -77,7 +79,8 @@ def token(text, attributes, model):
|
|
77 |
data = pd.DataFrame(data, columns=attributes)
|
78 |
return data, model_name
|
79 |
|
80 |
-
|
|
|
81 |
def default_token(text, attributes, model):
|
82 |
model_name = model + "_sm"
|
83 |
nlp = spacy.load(model_name)
|
@@ -90,7 +93,7 @@ def default_token(text, attributes, model):
|
|
90 |
data.append(tok_data)
|
91 |
return data, model_name
|
92 |
|
93 |
-
|
94 |
def noun_chunks(text, model):
|
95 |
model_name = model + "_sm"
|
96 |
nlp = spacy.load(model_name)
|
@@ -102,7 +105,8 @@ def noun_chunks(text, model):
|
|
102 |
data = pd.DataFrame(data, columns=NOUN_ATTR)
|
103 |
return data, model_name
|
104 |
|
105 |
-
|
|
|
106 |
def default_noun_chunks(text, model):
|
107 |
model_name = model + "_sm"
|
108 |
nlp = spacy.load(model_name)
|
@@ -113,7 +117,7 @@ def default_noun_chunks(text, model):
|
|
113 |
chunk.root.head.text])
|
114 |
return data, model_name
|
115 |
|
116 |
-
|
117 |
def random_vectors(text, model):
|
118 |
model_name = model + "_md"
|
119 |
nlp = spacy.load(model_name)
|
@@ -125,13 +129,13 @@ def random_vectors(text, model):
|
|
125 |
choice = random.choices(str_list, k=2)
|
126 |
return round(choice[0].similarity(choice[1]), 2), choice[0].text, choice[1].text, model_name
|
127 |
|
128 |
-
|
129 |
def vectors(input1, input2, model):
|
130 |
model_name = model + "_md"
|
131 |
nlp = spacy.load(model_name)
|
132 |
return round(nlp(input1).similarity(nlp(input2)), 2), model_name
|
133 |
|
134 |
-
|
135 |
def span(text, span1, span2, label1, label2, model):
|
136 |
model_name = model + "_sm"
|
137 |
nlp = spacy.load(model_name)
|
@@ -174,20 +178,17 @@ def span(text, span1, span2, label1, label2, model):
|
|
174 |
svg = displacy.render(doc, style="span")
|
175 |
return svg, model_name
|
176 |
|
177 |
-
|
178 |
def get_text(model):
|
179 |
for i in range(len(models)):
|
180 |
model = model.split("_")[0]
|
181 |
new_text = texts[model]
|
182 |
-
|
183 |
return new_text
|
184 |
|
185 |
-
|
186 |
demo = gr.Blocks(css="scrollbar.css")
|
187 |
|
188 |
with demo:
|
189 |
with gr.Box():
|
190 |
-
|
191 |
with gr.Row():
|
192 |
with gr.Column():
|
193 |
gr.Markdown("# Pipeline Visualizer")
|
@@ -208,14 +209,12 @@ with demo:
|
|
208 |
gr.Markdown("")
|
209 |
with gr.Column():
|
210 |
gr.Markdown("")
|
211 |
-
|
212 |
with gr.Row():
|
213 |
with gr.Column():
|
214 |
text_input = gr.Textbox(
|
215 |
value=DEFAULT_TEXT, interactive=True, label="Input Text")
|
216 |
with gr.Column():
|
217 |
gr.Markdown("")
|
218 |
-
|
219 |
button = gr.Button("Update", variant="primary")
|
220 |
with gr.Box():
|
221 |
with gr.Column():
|
@@ -349,7 +348,7 @@ with demo:
|
|
349 |
with gr.Column():
|
350 |
gr.Markdown("")
|
351 |
sim_random_button = gr.Button("Update random words")
|
352 |
-
sim_button = gr.Button("Update similarity", variant="primary")
|
353 |
with gr.Box():
|
354 |
with gr.Column():
|
355 |
with gr.Row():
|
@@ -391,7 +390,10 @@ with demo:
|
|
391 |
DEFAULT_TEXT, "U.K. startup", "U.K.", "ORG", "GPE", DEFAULT_MODEL)[0])
|
392 |
span_button = gr.Button("Update Spans", variant="primary")
|
393 |
|
|
|
394 |
model_input.change(get_text, inputs=[model_input], outputs=text_input)
|
|
|
|
|
395 |
button.click(dependency, inputs=[
|
396 |
text_input, col_punct, col_phrase, compact, bg, text, model_input], outputs=[dep_output, dep_download_button, dep_model])
|
397 |
button.click(
|
@@ -404,6 +406,8 @@ with demo:
|
|
404 |
sim_text2, model_input], outputs=[sim_output, sim_model])
|
405 |
button.click(
|
406 |
span, inputs=[text_input, span1, span2, label1, label2, model_input], outputs=[span_output, span_model])
|
|
|
|
|
407 |
dep_button.click(dependency, inputs=[
|
408 |
text_input, col_punct, col_phrase, compact, bg, text, model_input], outputs=[dep_output, dep_download_button, dep_model])
|
409 |
ent_button.click(
|
@@ -418,4 +422,5 @@ with demo:
|
|
418 |
span, inputs=[text_input, span1, span2, label1, label2, model_input], outputs=[span_output, span_model])
|
419 |
sim_random_button.click(random_vectors, inputs=[text_input, model_input], outputs=[
|
420 |
sim_output, sim_text1, sim_text2, sim_model])
|
|
|
421 |
demo.launch()
|
|
|
22 |
button_css = "float: right; --tw-border-opacity: 1; border-color: rgb(229 231 235 / var(--tw-border-opacity)); --tw-gradient-from: rgb(243 244 246 / 0.7); --tw-gradient-stops: var(--tw-gradient-from), var(--tw-gradient-to, rgb(243 244 246 / 0)); --tw-gradient-to: rgb(229 231 235 / 0.8); --tw-text-opacity: 1; color: rgb(55 65 81 / var(--tw-text-opacity)); border-width: 1px; --tw-bg-opacity: 1; background-color: rgb(255 255 255 / var(--tw-bg-opacity)); background-image: linear-gradient(to bottom right, var(--tw-gradient-stops)); display: inline-flex; flex: 1 1 0%; align-items: center; justify-content: center; --tw-shadow: 0 1px 2px 0 rgb(0 0 0 / 0.05); --tw-shadow-colored: 0 1px 2px 0 var(--tw-shadow-color); box-shadow: var(--tw-ring-offset-shadow, 0 0 #0000), var(--tw-ring-shadow, 0 0 #0000), var(--tw-shadow); -webkit-appearance: button; border-radius: 0.5rem; padding-top: 0.5rem; padding-bottom: 0.5rem; padding-left: 1rem; padding-right: 1rem; font-size: 1rem; line-height: 1.5rem; font-weight: 600;"
|
23 |
NOUN_ATTR = ['text', 'root.text', 'root.dep_', 'root.head.text']
|
24 |
|
25 |
+
# get the huggingface models specified in the requirements.txt file
|
26 |
def get_all_models():
|
27 |
with open("requirements.txt") as f:
|
28 |
content = f.readlines()
|
29 |
models = []
|
30 |
for line in content:
|
31 |
if "huggingface.co" in line:
|
32 |
+
# the first three tokens in model, ex. en_core_web
|
33 |
model = "_".join(line.split("/")[4].split("_")[:3])
|
34 |
if model not in models:
|
35 |
models.append(model)
|
36 |
return models
|
37 |
|
|
|
38 |
models = get_all_models()
|
39 |
|
40 |
+
# when clicked, download as SVG. Rendered as HTML on the page
|
41 |
def download_svg(svg):
|
42 |
encode = base64.b64encode(bytes(svg, 'utf-8'))
|
43 |
img = 'data:image/svg+xml;base64,' + str(encode)[2:-1]
|
44 |
html = f'<a download="displacy.svg" href="{img}" style="{button_css}">Download as SVG</a>'
|
45 |
return html
|
46 |
|
47 |
+
# create dependency graph, inputs are text, collapse punctuation,
|
48 |
+
# collapse phrases, compact, background color, font color, and model
|
49 |
def dependency(text, col_punct, col_phrase, compact, bg, font, model):
|
50 |
model_name = model + "_sm"
|
51 |
nlp = spacy.load(model_name)
|
|
|
53 |
options = {"compact": compact, "collapse_phrases": col_phrase,
|
54 |
"collapse_punct": col_punct, "bg": bg, "color": font}
|
55 |
svg = displacy.render(doc, style="dep", options=options)
|
56 |
+
download = download_svg(svg) # download button for SVG
|
57 |
return svg, download, model_name
|
58 |
|
59 |
+
# returns the NER displacy, inputs are text, checked ents, and model
|
60 |
def entity(text, ents, model):
|
61 |
model_name = model + "_sm"
|
62 |
nlp = spacy.load(model_name)
|
|
|
65 |
svg = displacy.render(doc, style="ent", options=options)
|
66 |
return svg, model_name
|
67 |
|
68 |
+
# returns token attributes for the user inputs
|
69 |
def token(text, attributes, model):
|
70 |
model_name = model + "_sm"
|
71 |
nlp = spacy.load(model_name)
|
|
|
79 |
data = pd.DataFrame(data, columns=attributes)
|
80 |
return data, model_name
|
81 |
|
82 |
+
# returns token attributtes in the default state
|
83 |
+
# the return value is not a pandas DataFrame
|
84 |
def default_token(text, attributes, model):
|
85 |
model_name = model + "_sm"
|
86 |
nlp = spacy.load(model_name)
|
|
|
93 |
data.append(tok_data)
|
94 |
return data, model_name
|
95 |
|
96 |
+
# returns noun chunks in text
|
97 |
def noun_chunks(text, model):
|
98 |
model_name = model + "_sm"
|
99 |
nlp = spacy.load(model_name)
|
|
|
105 |
data = pd.DataFrame(data, columns=NOUN_ATTR)
|
106 |
return data, model_name
|
107 |
|
108 |
+
# returns noun chuncks for the default value
|
109 |
+
# the return value is not a pandas DataFrame
|
110 |
def default_noun_chunks(text, model):
|
111 |
model_name = model + "_sm"
|
112 |
nlp = spacy.load(model_name)
|
|
|
117 |
chunk.root.head.text])
|
118 |
return data, model_name
|
119 |
|
120 |
+
# Get similarity of two random generated vectors
|
121 |
def random_vectors(text, model):
|
122 |
model_name = model + "_md"
|
123 |
nlp = spacy.load(model_name)
|
|
|
129 |
choice = random.choices(str_list, k=2)
|
130 |
return round(choice[0].similarity(choice[1]), 2), choice[0].text, choice[1].text, model_name
|
131 |
|
132 |
+
# Get similarity of two inputted vectors
|
133 |
def vectors(input1, input2, model):
|
134 |
model_name = model + "_md"
|
135 |
nlp = spacy.load(model_name)
|
136 |
return round(nlp(input1).similarity(nlp(input2)), 2), model_name
|
137 |
|
138 |
+
# display spans, inputs are text, spans, labels, and model
|
139 |
def span(text, span1, span2, label1, label2, model):
|
140 |
model_name = model + "_sm"
|
141 |
nlp = spacy.load(model_name)
|
|
|
178 |
svg = displacy.render(doc, style="span")
|
179 |
return svg, model_name
|
180 |
|
181 |
+
# get default text based on language model
|
182 |
def get_text(model):
|
183 |
for i in range(len(models)):
|
184 |
model = model.split("_")[0]
|
185 |
new_text = texts[model]
|
|
|
186 |
return new_text
|
187 |
|
|
|
188 |
demo = gr.Blocks(css="scrollbar.css")
|
189 |
|
190 |
with demo:
|
191 |
with gr.Box():
|
|
|
192 |
with gr.Row():
|
193 |
with gr.Column():
|
194 |
gr.Markdown("# Pipeline Visualizer")
|
|
|
209 |
gr.Markdown("")
|
210 |
with gr.Column():
|
211 |
gr.Markdown("")
|
|
|
212 |
with gr.Row():
|
213 |
with gr.Column():
|
214 |
text_input = gr.Textbox(
|
215 |
value=DEFAULT_TEXT, interactive=True, label="Input Text")
|
216 |
with gr.Column():
|
217 |
gr.Markdown("")
|
|
|
218 |
button = gr.Button("Update", variant="primary")
|
219 |
with gr.Box():
|
220 |
with gr.Column():
|
|
|
348 |
with gr.Column():
|
349 |
gr.Markdown("")
|
350 |
sim_random_button = gr.Button("Update random words")
|
351 |
+
sim_button = gr.Button("Update similarity", variant="primary")
|
352 |
with gr.Box():
|
353 |
with gr.Column():
|
354 |
with gr.Row():
|
|
|
390 |
DEFAULT_TEXT, "U.K. startup", "U.K.", "ORG", "GPE", DEFAULT_MODEL)[0])
|
391 |
span_button = gr.Button("Update Spans", variant="primary")
|
392 |
|
393 |
+
# change text based on model input
|
394 |
model_input.change(get_text, inputs=[model_input], outputs=text_input)
|
395 |
+
|
396 |
+
# main button - update all components
|
397 |
button.click(dependency, inputs=[
|
398 |
text_input, col_punct, col_phrase, compact, bg, text, model_input], outputs=[dep_output, dep_download_button, dep_model])
|
399 |
button.click(
|
|
|
406 |
sim_text2, model_input], outputs=[sim_output, sim_model])
|
407 |
button.click(
|
408 |
span, inputs=[text_input, span1, span2, label1, label2, model_input], outputs=[span_output, span_model])
|
409 |
+
|
410 |
+
# individual component buttons
|
411 |
dep_button.click(dependency, inputs=[
|
412 |
text_input, col_punct, col_phrase, compact, bg, text, model_input], outputs=[dep_output, dep_download_button, dep_model])
|
413 |
ent_button.click(
|
|
|
422 |
span, inputs=[text_input, span1, span2, label1, label2, model_input], outputs=[span_output, span_model])
|
423 |
sim_random_button.click(random_vectors, inputs=[text_input, model_input], outputs=[
|
424 |
sim_output, sim_text1, sim_text2, sim_model])
|
425 |
+
|
426 |
demo.launch()
|