victorialslocum commited on
Commit
cfe3fe5
1 Parent(s): d56f03c

add comments

Browse files
Files changed (1) hide show
  1. app.py +24 -19
app.py CHANGED
@@ -22,28 +22,30 @@ texts = {"en": DEFAULT_TEXT, "ca": "Apple està buscant comprar una startup del
22
  button_css = "float: right; --tw-border-opacity: 1; border-color: rgb(229 231 235 / var(--tw-border-opacity)); --tw-gradient-from: rgb(243 244 246 / 0.7); --tw-gradient-stops: var(--tw-gradient-from), var(--tw-gradient-to, rgb(243 244 246 / 0)); --tw-gradient-to: rgb(229 231 235 / 0.8); --tw-text-opacity: 1; color: rgb(55 65 81 / var(--tw-text-opacity)); border-width: 1px; --tw-bg-opacity: 1; background-color: rgb(255 255 255 / var(--tw-bg-opacity)); background-image: linear-gradient(to bottom right, var(--tw-gradient-stops)); display: inline-flex; flex: 1 1 0%; align-items: center; justify-content: center; --tw-shadow: 0 1px 2px 0 rgb(0 0 0 / 0.05); --tw-shadow-colored: 0 1px 2px 0 var(--tw-shadow-color); box-shadow: var(--tw-ring-offset-shadow, 0 0 #0000), var(--tw-ring-shadow, 0 0 #0000), var(--tw-shadow); -webkit-appearance: button; border-radius: 0.5rem; padding-top: 0.5rem; padding-bottom: 0.5rem; padding-left: 1rem; padding-right: 1rem; font-size: 1rem; line-height: 1.5rem; font-weight: 600;"
23
  NOUN_ATTR = ['text', 'root.text', 'root.dep_', 'root.head.text']
24
 
 
25
  def get_all_models():
26
  with open("requirements.txt") as f:
27
  content = f.readlines()
28
  models = []
29
  for line in content:
30
  if "huggingface.co" in line:
 
31
  model = "_".join(line.split("/")[4].split("_")[:3])
32
  if model not in models:
33
  models.append(model)
34
  return models
35
 
36
-
37
  models = get_all_models()
38
 
39
-
40
  def download_svg(svg):
41
  encode = base64.b64encode(bytes(svg, 'utf-8'))
42
  img = 'data:image/svg+xml;base64,' + str(encode)[2:-1]
43
  html = f'<a download="displacy.svg" href="{img}" style="{button_css}">Download as SVG</a>'
44
  return html
45
 
46
-
 
47
  def dependency(text, col_punct, col_phrase, compact, bg, font, model):
48
  model_name = model + "_sm"
49
  nlp = spacy.load(model_name)
@@ -51,10 +53,10 @@ def dependency(text, col_punct, col_phrase, compact, bg, font, model):
51
  options = {"compact": compact, "collapse_phrases": col_phrase,
52
  "collapse_punct": col_punct, "bg": bg, "color": font}
53
  svg = displacy.render(doc, style="dep", options=options)
54
- download = download_svg(svg)
55
  return svg, download, model_name
56
 
57
-
58
  def entity(text, ents, model):
59
  model_name = model + "_sm"
60
  nlp = spacy.load(model_name)
@@ -63,7 +65,7 @@ def entity(text, ents, model):
63
  svg = displacy.render(doc, style="ent", options=options)
64
  return svg, model_name
65
 
66
-
67
  def token(text, attributes, model):
68
  model_name = model + "_sm"
69
  nlp = spacy.load(model_name)
@@ -77,7 +79,8 @@ def token(text, attributes, model):
77
  data = pd.DataFrame(data, columns=attributes)
78
  return data, model_name
79
 
80
-
 
81
  def default_token(text, attributes, model):
82
  model_name = model + "_sm"
83
  nlp = spacy.load(model_name)
@@ -90,7 +93,7 @@ def default_token(text, attributes, model):
90
  data.append(tok_data)
91
  return data, model_name
92
 
93
-
94
  def noun_chunks(text, model):
95
  model_name = model + "_sm"
96
  nlp = spacy.load(model_name)
@@ -102,7 +105,8 @@ def noun_chunks(text, model):
102
  data = pd.DataFrame(data, columns=NOUN_ATTR)
103
  return data, model_name
104
 
105
-
 
106
  def default_noun_chunks(text, model):
107
  model_name = model + "_sm"
108
  nlp = spacy.load(model_name)
@@ -113,7 +117,7 @@ def default_noun_chunks(text, model):
113
  chunk.root.head.text])
114
  return data, model_name
115
 
116
-
117
  def random_vectors(text, model):
118
  model_name = model + "_md"
119
  nlp = spacy.load(model_name)
@@ -125,13 +129,13 @@ def random_vectors(text, model):
125
  choice = random.choices(str_list, k=2)
126
  return round(choice[0].similarity(choice[1]), 2), choice[0].text, choice[1].text, model_name
127
 
128
-
129
  def vectors(input1, input2, model):
130
  model_name = model + "_md"
131
  nlp = spacy.load(model_name)
132
  return round(nlp(input1).similarity(nlp(input2)), 2), model_name
133
 
134
-
135
  def span(text, span1, span2, label1, label2, model):
136
  model_name = model + "_sm"
137
  nlp = spacy.load(model_name)
@@ -174,20 +178,17 @@ def span(text, span1, span2, label1, label2, model):
174
  svg = displacy.render(doc, style="span")
175
  return svg, model_name
176
 
177
-
178
  def get_text(model):
179
  for i in range(len(models)):
180
  model = model.split("_")[0]
181
  new_text = texts[model]
182
-
183
  return new_text
184
 
185
-
186
  demo = gr.Blocks(css="scrollbar.css")
187
 
188
  with demo:
189
  with gr.Box():
190
-
191
  with gr.Row():
192
  with gr.Column():
193
  gr.Markdown("# Pipeline Visualizer")
@@ -208,14 +209,12 @@ with demo:
208
  gr.Markdown("")
209
  with gr.Column():
210
  gr.Markdown("")
211
-
212
  with gr.Row():
213
  with gr.Column():
214
  text_input = gr.Textbox(
215
  value=DEFAULT_TEXT, interactive=True, label="Input Text")
216
  with gr.Column():
217
  gr.Markdown("")
218
-
219
  button = gr.Button("Update", variant="primary")
220
  with gr.Box():
221
  with gr.Column():
@@ -349,7 +348,7 @@ with demo:
349
  with gr.Column():
350
  gr.Markdown("")
351
  sim_random_button = gr.Button("Update random words")
352
- sim_button = gr.Button("Update similarity", variant="primary")
353
  with gr.Box():
354
  with gr.Column():
355
  with gr.Row():
@@ -391,7 +390,10 @@ with demo:
391
  DEFAULT_TEXT, "U.K. startup", "U.K.", "ORG", "GPE", DEFAULT_MODEL)[0])
392
  span_button = gr.Button("Update Spans", variant="primary")
393
 
 
394
  model_input.change(get_text, inputs=[model_input], outputs=text_input)
 
 
395
  button.click(dependency, inputs=[
396
  text_input, col_punct, col_phrase, compact, bg, text, model_input], outputs=[dep_output, dep_download_button, dep_model])
397
  button.click(
@@ -404,6 +406,8 @@ with demo:
404
  sim_text2, model_input], outputs=[sim_output, sim_model])
405
  button.click(
406
  span, inputs=[text_input, span1, span2, label1, label2, model_input], outputs=[span_output, span_model])
 
 
407
  dep_button.click(dependency, inputs=[
408
  text_input, col_punct, col_phrase, compact, bg, text, model_input], outputs=[dep_output, dep_download_button, dep_model])
409
  ent_button.click(
@@ -418,4 +422,5 @@ with demo:
418
  span, inputs=[text_input, span1, span2, label1, label2, model_input], outputs=[span_output, span_model])
419
  sim_random_button.click(random_vectors, inputs=[text_input, model_input], outputs=[
420
  sim_output, sim_text1, sim_text2, sim_model])
 
421
  demo.launch()
 
22
  button_css = "float: right; --tw-border-opacity: 1; border-color: rgb(229 231 235 / var(--tw-border-opacity)); --tw-gradient-from: rgb(243 244 246 / 0.7); --tw-gradient-stops: var(--tw-gradient-from), var(--tw-gradient-to, rgb(243 244 246 / 0)); --tw-gradient-to: rgb(229 231 235 / 0.8); --tw-text-opacity: 1; color: rgb(55 65 81 / var(--tw-text-opacity)); border-width: 1px; --tw-bg-opacity: 1; background-color: rgb(255 255 255 / var(--tw-bg-opacity)); background-image: linear-gradient(to bottom right, var(--tw-gradient-stops)); display: inline-flex; flex: 1 1 0%; align-items: center; justify-content: center; --tw-shadow: 0 1px 2px 0 rgb(0 0 0 / 0.05); --tw-shadow-colored: 0 1px 2px 0 var(--tw-shadow-color); box-shadow: var(--tw-ring-offset-shadow, 0 0 #0000), var(--tw-ring-shadow, 0 0 #0000), var(--tw-shadow); -webkit-appearance: button; border-radius: 0.5rem; padding-top: 0.5rem; padding-bottom: 0.5rem; padding-left: 1rem; padding-right: 1rem; font-size: 1rem; line-height: 1.5rem; font-weight: 600;"
23
  NOUN_ATTR = ['text', 'root.text', 'root.dep_', 'root.head.text']
24
 
25
+ # get the huggingface models specified in the requirements.txt file
26
  def get_all_models():
27
  with open("requirements.txt") as f:
28
  content = f.readlines()
29
  models = []
30
  for line in content:
31
  if "huggingface.co" in line:
32
+ # the first three tokens in model, ex. en_core_web
33
  model = "_".join(line.split("/")[4].split("_")[:3])
34
  if model not in models:
35
  models.append(model)
36
  return models
37
 
 
38
  models = get_all_models()
39
 
40
+ # when clicked, download as SVG. Rendered as HTML on the page
41
  def download_svg(svg):
42
  encode = base64.b64encode(bytes(svg, 'utf-8'))
43
  img = 'data:image/svg+xml;base64,' + str(encode)[2:-1]
44
  html = f'<a download="displacy.svg" href="{img}" style="{button_css}">Download as SVG</a>'
45
  return html
46
 
47
+ # create dependency graph, inputs are text, collapse punctuation,
48
+ # collapse phrases, compact, background color, font color, and model
49
  def dependency(text, col_punct, col_phrase, compact, bg, font, model):
50
  model_name = model + "_sm"
51
  nlp = spacy.load(model_name)
 
53
  options = {"compact": compact, "collapse_phrases": col_phrase,
54
  "collapse_punct": col_punct, "bg": bg, "color": font}
55
  svg = displacy.render(doc, style="dep", options=options)
56
+ download = download_svg(svg) # download button for SVG
57
  return svg, download, model_name
58
 
59
+ # returns the NER displacy, inputs are text, checked ents, and model
60
  def entity(text, ents, model):
61
  model_name = model + "_sm"
62
  nlp = spacy.load(model_name)
 
65
  svg = displacy.render(doc, style="ent", options=options)
66
  return svg, model_name
67
 
68
+ # returns token attributes for the user inputs
69
  def token(text, attributes, model):
70
  model_name = model + "_sm"
71
  nlp = spacy.load(model_name)
 
79
  data = pd.DataFrame(data, columns=attributes)
80
  return data, model_name
81
 
82
+ # returns token attributtes in the default state
83
+ # the return value is not a pandas DataFrame
84
  def default_token(text, attributes, model):
85
  model_name = model + "_sm"
86
  nlp = spacy.load(model_name)
 
93
  data.append(tok_data)
94
  return data, model_name
95
 
96
+ # returns noun chunks in text
97
  def noun_chunks(text, model):
98
  model_name = model + "_sm"
99
  nlp = spacy.load(model_name)
 
105
  data = pd.DataFrame(data, columns=NOUN_ATTR)
106
  return data, model_name
107
 
108
+ # returns noun chuncks for the default value
109
+ # the return value is not a pandas DataFrame
110
  def default_noun_chunks(text, model):
111
  model_name = model + "_sm"
112
  nlp = spacy.load(model_name)
 
117
  chunk.root.head.text])
118
  return data, model_name
119
 
120
+ # Get similarity of two random generated vectors
121
  def random_vectors(text, model):
122
  model_name = model + "_md"
123
  nlp = spacy.load(model_name)
 
129
  choice = random.choices(str_list, k=2)
130
  return round(choice[0].similarity(choice[1]), 2), choice[0].text, choice[1].text, model_name
131
 
132
+ # Get similarity of two inputted vectors
133
  def vectors(input1, input2, model):
134
  model_name = model + "_md"
135
  nlp = spacy.load(model_name)
136
  return round(nlp(input1).similarity(nlp(input2)), 2), model_name
137
 
138
+ # display spans, inputs are text, spans, labels, and model
139
  def span(text, span1, span2, label1, label2, model):
140
  model_name = model + "_sm"
141
  nlp = spacy.load(model_name)
 
178
  svg = displacy.render(doc, style="span")
179
  return svg, model_name
180
 
181
+ # get default text based on language model
182
  def get_text(model):
183
  for i in range(len(models)):
184
  model = model.split("_")[0]
185
  new_text = texts[model]
 
186
  return new_text
187
 
 
188
  demo = gr.Blocks(css="scrollbar.css")
189
 
190
  with demo:
191
  with gr.Box():
 
192
  with gr.Row():
193
  with gr.Column():
194
  gr.Markdown("# Pipeline Visualizer")
 
209
  gr.Markdown("")
210
  with gr.Column():
211
  gr.Markdown("")
 
212
  with gr.Row():
213
  with gr.Column():
214
  text_input = gr.Textbox(
215
  value=DEFAULT_TEXT, interactive=True, label="Input Text")
216
  with gr.Column():
217
  gr.Markdown("")
 
218
  button = gr.Button("Update", variant="primary")
219
  with gr.Box():
220
  with gr.Column():
 
348
  with gr.Column():
349
  gr.Markdown("")
350
  sim_random_button = gr.Button("Update random words")
351
+ sim_button = gr.Button("Update similarity", variant="primary")
352
  with gr.Box():
353
  with gr.Column():
354
  with gr.Row():
 
390
  DEFAULT_TEXT, "U.K. startup", "U.K.", "ORG", "GPE", DEFAULT_MODEL)[0])
391
  span_button = gr.Button("Update Spans", variant="primary")
392
 
393
+ # change text based on model input
394
  model_input.change(get_text, inputs=[model_input], outputs=text_input)
395
+
396
+ # main button - update all components
397
  button.click(dependency, inputs=[
398
  text_input, col_punct, col_phrase, compact, bg, text, model_input], outputs=[dep_output, dep_download_button, dep_model])
399
  button.click(
 
406
  sim_text2, model_input], outputs=[sim_output, sim_model])
407
  button.click(
408
  span, inputs=[text_input, span1, span2, label1, label2, model_input], outputs=[span_output, span_model])
409
+
410
+ # individual component buttons
411
  dep_button.click(dependency, inputs=[
412
  text_input, col_punct, col_phrase, compact, bg, text, model_input], outputs=[dep_output, dep_download_button, dep_model])
413
  ent_button.click(
 
422
  span, inputs=[text_input, span1, span2, label1, label2, model_input], outputs=[span_output, span_model])
423
  sim_random_button.click(random_vectors, inputs=[text_input, model_input], outputs=[
424
  sim_output, sim_text1, sim_text2, sim_model])
425
+
426
  demo.launch()