import spacy from spacy import displacy import random from spacy.tokens import Span import gradio as gr DEFAULT_MODEL = "en_core_web_sm" DEFAULT_TEXT = "David Bowie moved to the US in 1974, initially staying in New York City before settling in Los Angeles." DEFAULT_TOK_ATTR = ['idx', 'text', 'pos_', 'lemma_', 'shape_', 'dep_'] DEFAULT_ENTS = ['CARDINAL', 'DATE', 'EVENT', 'FAC', 'GPE', 'LANGUAGE', 'LAW', 'LOC', 'MONEY', 'NORP', 'ORDINAL', 'ORG', 'PERCENT', 'PERSON', 'PRODUCT', 'QUANTITY', 'TIME', 'WORK_OF_ART'] nlp = spacy.load("en_core_web_sm") nlp2 = spacy.load("en_core_web_md") def dependency(text, col_punct, col_phrase, compact): doc = nlp(text) options = {"compact": compact, "collapse_phrases": col_phrase, "collapse_punct": col_punct} html = displacy.render(doc, style="dep", options=options) return html def entity(text, ents): doc = nlp(text) options = {"ents": ents} html = displacy.render(doc, style="ent", options=options) return html def text(default): if default: return default def token(text, attributes): data = [] doc = nlp(text) for tok in doc: tok_data = [] for attr in attributes: tok_data.append(getattr(tok, attr)) data.append(tok_data) return data def vectors(text): doc = nlp2(text) n_chunks = [chunk for chunk in doc.noun_chunks] words = [tok for tok in doc if not tok.is_stop and tok.pos_ not in [ 'PUNCT', "PROPN"]] str_list = n_chunks + words choice = random.choices(str_list, k=2) return round(choice[0].similarity(choice[1]), 2), choice[0].text, choice[1].text def span(text, span1, span2, label1, label2): doc = nlp(text) idx1_1 = 0 idx1_2 = 0 idx2_1 = 0 idx2_2 = 0 span1 = span1.split(" ") span2 = span2.split(" ") for i in range(len(list(doc))): tok = list(doc)[i] if span1[0] == tok.text: idx1_1 = i if span1[-1] == tok.text: idx1_2 = i + 1 if span2[0] == tok.text: idx2_1 = i if span2[-1] == tok.text: idx2_2 = i + 1 doc.spans["sc"] = [ Span(doc, idx1_1, idx1_2, label1), Span(doc, idx2_1, idx2_2, label2), ] html = displacy.render(doc, style="span") return html demo = gr.Blocks() with demo: # gr.Markdown("Input text here!") text_input = gr.Textbox(value=DEFAULT_TEXT, interactive=True) with gr.Tabs(): with gr.TabItem("Dependency"): col_punct = gr.Checkbox(label="Collapse Punctuation", value=True) col_phrase = gr.Checkbox(label="Collapse Phrases", value=True) compact = gr.Checkbox(label="Compact", value=True) depen_output = gr.HTML() depen_button = gr.Button("Generate") with gr.TabItem("Entity"): entity_input = gr.CheckboxGroup(DEFAULT_ENTS, value=DEFAULT_ENTS) entity_output = gr.HTML() entity_button = gr.Button("Generate") with gr.TabItem("Tokens"): tok_input = gr.CheckboxGroup( DEFAULT_TOK_ATTR, value=DEFAULT_TOK_ATTR) tok_output = gr.Dataframe() tok_button = gr.Button("Generate") with gr.TabItem("Similarity"): sim_text1 = gr.Textbox(value="David Bowie", label="Chosen") sim_text2 = gr.Textbox(value="the US", label="Chosen") sim_output = gr.Textbox(value="0.09", label="Similarity Score") sim_button = gr.Button("Generate") with gr.TabItem("Spans"): span1 = gr.Textbox(value="David Bowie", label="Span 1") label1 = gr.Textbox(value="Full Name", label="Label for Span 1") span2 = gr.Textbox(value="David", label="Span 2") label2 = gr.Textbox(value="First Name", label="Label for Span 2") span_output = gr.HTML() span_button = gr.Button("Generate") depen_button.click(dependency, inputs=[ text_input, col_punct, col_phrase, compact], outputs=depen_output) entity_button.click( entity, inputs=[text_input, entity_input], outputs=entity_output) tok_button.click(token, inputs=[text_input, tok_input], outputs=tok_output) sim_button.click(vectors, inputs=[text_input], outputs=[ sim_output, sim_text1, sim_text2]) span_button.click( span, inputs=[text_input, span1, span2, label1, label2], outputs=span_output) demo.launch()