Victoria Slocum commited on
Commit
bc565d4
1 Parent(s): 6934a3d

Add application file

Browse files
Files changed (1) hide show
  1. app.py +108 -0
app.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spacy
2
+ from spacy import displacy
3
+ import random
4
+ from spacy.tokens import Span
5
+ import gradio as gr
6
+
7
+ DEFAULT_MODEL = "en_core_web_sm"
8
+ DEFAULT_TEXT = "David Bowie moved to the US in 1974, initially staying in New York City before settling in Los Angeles."
9
+ DEFAULT_TOK_ATTR = ['idx', 'text', 'pos_', 'lemma_', 'shape_', 'dep_']
10
+ DEFAULT_ENTS = ['CARDINAL', 'DATE', 'EVENT', 'FAC', 'GPE', 'LANGUAGE', 'LAW', 'LOC', 'MONEY', 'NORP', 'ORDINAL', 'ORG', 'PERCENT', 'PERSON', 'PRODUCT', 'QUANTITY', 'TIME', 'WORK_OF_ART']
11
+
12
+ nlp = spacy.load("en_core_web_sm")
13
+ nlp2 = spacy.load("en_core_web_md")
14
+
15
+ def dependency(text, col_punct, col_phrase, compact):
16
+ doc = nlp(text)
17
+ options = {"compact": compact, "collapse_phrases": col_phrase, "collapse_punct": col_punct}
18
+ html = displacy.render(doc, style="dep", options=options)
19
+ return html
20
+
21
+ def entity(text, ents):
22
+ doc = nlp(text)
23
+ options = {"ents": ents}
24
+ html = displacy.render(doc, style="ent", options=options)
25
+ return html
26
+
27
+ def text(default):
28
+ if default:
29
+ return default
30
+
31
+ def token(text, attributes):
32
+ data = []
33
+ doc = nlp(text)
34
+ for tok in doc:
35
+ tok_data = []
36
+ for attr in attributes:
37
+ tok_data.append(getattr(tok, attr))
38
+ data.append(tok_data)
39
+ return data
40
+
41
+ def vectors(text):
42
+ doc = nlp2(text)
43
+ n_chunks = [chunk for chunk in doc.noun_chunks]
44
+ words = [tok for tok in doc if not tok.is_stop and tok.pos_ not in ['PUNCT', "PROPN"]]
45
+ str_list = n_chunks + words
46
+ choice = random.choices(str_list, k=2)
47
+ return round(choice[0].similarity(choice[1]), 2), choice[0].text, choice[1].text
48
+
49
+ def span(text, span1, span2, label1, label2):
50
+ doc = nlp(text)
51
+ idx1_1 = 0
52
+ idx1_2 = 0
53
+ idx2_1 = 0
54
+ idx2_2 = 0
55
+
56
+ for tok in doc:
57
+ if span1[0] == tok.text:
58
+ idx1_1 = tok.idx
59
+ if span1[-1] == tok.text:
60
+ idx1_2 = tok.idx
61
+ if span2[0] == tok.text:
62
+ idx2_1 = tok.idx
63
+ if span2[-1] == tok.text:
64
+ idx2_2 = tok.idx
65
+
66
+
67
+ doc.spans["sc"] = [
68
+ Span(doc, idx1_1, idx1_2, label1),
69
+ Span(doc, idx2_1, idx2_2, label2),
70
+ ]
71
+
72
+ html = displacy.render(doc, style="span")
73
+ return html
74
+
75
+ list_chunks = [chunk.text for chunk in nlp(DEFAULT_TEXT).noun_chunks]
76
+
77
+ demo = gr.Blocks()
78
+
79
+ with demo:
80
+ # gr.Markdown("Input text here!")
81
+ text_input = gr.Textbox(value=DEFAULT_TEXT, interactive=True)
82
+ with gr.Tabs():
83
+ with gr.TabItem("Dependency"):
84
+ col_punct = gr.Checkbox(label="Collapse Punctuation", value=True)
85
+ col_phrase = gr.Checkbox(label="Collapse Phrases", value=True)
86
+ compact = gr.Checkbox(label="Compact", value=True)
87
+ depen_output = gr.HTML()
88
+ depen_button = gr.Button("Generate")
89
+ with gr.TabItem("Entity"):
90
+ entity_input = gr.CheckboxGroup(DEFAULT_ENTS, value=DEFAULT_ENTS)
91
+ entity_output = gr.HTML()
92
+ entity_button = gr.Button("Generate")
93
+ with gr.TabItem("Tokens"):
94
+ tok_input = gr.CheckboxGroup(DEFAULT_TOK_ATTR, value=DEFAULT_TOK_ATTR)
95
+ tok_output = gr.Dataframe()
96
+ tok_button = gr.Button("Generate")
97
+ with gr.TabItem("Similarity"):
98
+ sim_text1 = gr.Textbox(label="Chosen")
99
+ sim_text2 = gr.Textbox(label="Chosen")
100
+ sim_output = gr.Textbox(label="Similarity Score")
101
+ sim_button = gr.Button("Generate")
102
+
103
+ depen_button.click(dependency, inputs=[text_input, col_punct, col_phrase, compact], outputs=depen_output)
104
+ entity_button.click(entity, inputs=[text_input, entity_input], outputs=entity_output)
105
+ tok_button.click(token, inputs=[text_input, tok_input], outputs=tok_output)
106
+ sim_button.click(vectors, inputs=[text_input], outputs=[sim_output, sim_text1, sim_text2])
107
+
108
+ demo.launch()