Spaces:

ml6team
/

Knowledge-graphs

Build error

App Files Files Community

khaerens commited on Jan 12, 2022

Commit

ba0e651

1 Parent(s): 376bc0c

updates

Browse files

Files changed (5) hide show

.vscode/settings.json +3 -3
__pycache__/app.cpython-38.pyc +0 -0
__pycache__/rebel.cpython-38.pyc +0 -0
app.py +48 -14
rebel.py +7 -6

.vscode/settings.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
     "workbench.colorCustomizations": {
-        "activityBar.background": "#630018",
-        "titleBar.activeBackground": "#8A0121",
-        "titleBar.activeForeground": "#FFFBFC"
     }
 }

 {
     "workbench.colorCustomizations": {
+        "activityBar.background": "#09323E",
+        "titleBar.activeBackground": "#0C4656",
+        "titleBar.activeForeground": "#F6FCFE"
     }
 }

__pycache__/app.cpython-38.pyc CHANGED Viewed

Binary files a/__pycache__/app.cpython-38.pyc and b/__pycache__/app.cpython-38.pyc differ

__pycache__/rebel.cpython-38.pyc CHANGED Viewed

Binary files a/__pycache__/rebel.cpython-38.pyc and b/__pycache__/rebel.cpython-38.pyc differ

app.py CHANGED Viewed

@@ -14,7 +14,7 @@ network_filename = "test.html"
 state_variables = {
     'has_run':False,
-    'wiki_suggestions': "",
     'wiki_text' : [],
     'nodes':[]
 }
@@ -23,11 +23,10 @@ for k, v in state_variables.items():
     if k not in st.session_state:
         st.session_state[k] = v
-def clip_text(t, lenght = 5):
     return ".".join(t.split(".")[:lenght]) + "."
 def generate_graph():
     if 'wiki_text' not in st.session_state:
         return
@@ -42,12 +41,14 @@ def generate_graph():
     st.success('Done!')
 def show_suggestion():
-    reset_session()
     with st.spinner(text="fetching wiki topics..."):
         if st.session_state['input_method'] == "wikipedia":
             text = st.session_state.text
             if text is not None:
-                st.session_state['wiki_suggestions'] = wikipedia.search(text, results = 3)
 def show_wiki_text(page_title):
     with st.spinner(text="fetching wiki page..."):
@@ -64,7 +65,8 @@ def add_text(term):
     try:
         extra_text = clip_text(wikipedia.page(title=term, auto_suggest=True).summary)
         st.session_state['wiki_text'].append(extra_text)
-    except wikipedia.DisambiguationError as e:
         st.session_state["nodes"].remove(term)
 def reset_session():
@@ -74,6 +76,17 @@ def reset_session():
 st.title('REBELious knowledge graph generation')
 st.session_state['input_method'] = "wikipedia"
 # st.selectbox(
 #      'input method',
 #      ('wikipedia', 'free text'),  key="input_method")
@@ -82,13 +95,25 @@ if st.session_state['input_method'] != "wikipedia":
     # st.text_area("Your text", key="text")
     pass
 else:
-    st.text_input("wikipedia search term",on_change=show_suggestion, key="text")
 if len(st.session_state['wiki_suggestions']) != 0:
-    columns = st.columns([1] * len(st.session_state['wiki_suggestions']))
-    for i, (c, s) in enumerate(zip(columns, st.session_state['wiki_suggestions'])):
-        with c:
-            st.button(s, on_click=show_wiki_text, args=(s,), key=str(i)+s)
 if len(st.session_state['wiki_text']) != 0:
     for i, t in enumerate(st.session_state['wiki_text']):
@@ -102,17 +127,26 @@ if st.session_state['input_method'] != "wikipedia":
     #         st.button("generate", on_click=generate_graph, key="gen_graph")
     pass
 else:
-    st.button("generate", on_click=generate_graph, key="gen_graph")
 if st.session_state['has_run']:
-    cols = st.columns([4, 1])
     with cols[0]:
         HtmlFile = open(network_filename, 'r', encoding='utf-8')
         source_code = HtmlFile.read()
         components.html(source_code, height=2000,width=2000)
     with cols[1]:
-        st.text("expand")
         for i,s in enumerate(st.session_state["nodes"]):
             st.button(s, on_click=add_text, args=(s,), key=s+str(i))

 state_variables = {
     'has_run':False,
+    'wiki_suggestions': [],
     'wiki_text' : [],
     'nodes':[]
 }
     if k not in st.session_state:
         st.session_state[k] = v
+def clip_text(t, lenght = 10):
     return ".".join(t.split(".")[:lenght]) + "."
 def generate_graph():
     if 'wiki_text' not in st.session_state:
         return
     st.success('Done!')
 def show_suggestion():
+    st.session_state['wiki_suggestions'] = []
     with st.spinner(text="fetching wiki topics..."):
         if st.session_state['input_method'] == "wikipedia":
             text = st.session_state.text
             if text is not None:
+                subjects = text.split(",")
+                for subj in subjects:
+                    st.session_state['wiki_suggestions'] += wikipedia.search(subj, results = 3)
 def show_wiki_text(page_title):
     with st.spinner(text="fetching wiki page..."):
     try:
         extra_text = clip_text(wikipedia.page(title=term, auto_suggest=True).summary)
         st.session_state['wiki_text'].append(extra_text)
+    except wikipedia.WikipediaException:
+        st.error("Woops, no wikipedia page for this node")
         st.session_state["nodes"].remove(term)
 def reset_session():
 st.title('REBELious knowledge graph generation')
 st.session_state['input_method'] = "wikipedia"
+st.sidebar.markdown(
+"""
+# how to
+- Enter wikipedia search terms, separated by comma's
+- Choose one or more of the suggested pages
+- Click generate!
+"""
+)
+st.sidebar.button("Reset", on_click=reset_session, key="reset_key")
 # st.selectbox(
 #      'input method',
 #      ('wikipedia', 'free text'),  key="input_method")
     # st.text_area("Your text", key="text")
     pass
 else:
+    cols = st.columns([8, 1])
+    with cols[0]:
+        st.text_input("wikipedia search term", on_change=show_suggestion, key="text")
+    with cols[1]:
+        st.text('')
+        st.text('')
+        st.button("Search", on_click=show_suggestion, key="show_suggestion_key")
 if len(st.session_state['wiki_suggestions']) != 0:
+    num_cols = 10
+    num_buttons = len(st.session_state['wiki_suggestions'])
+    columns = st.columns([1] * num_cols + [1])
+    print(st.session_state['wiki_suggestions'])
+    for q in range(1 + num_buttons//num_cols):
+        for i, (c, s) in enumerate(zip(columns, st.session_state['wiki_suggestions'][q*num_cols: (q+1)*num_cols])):
+            with c:
+                st.button(s, on_click=show_wiki_text, args=(s,), key=str(i)+s)
 if len(st.session_state['wiki_text']) != 0:
     for i, t in enumerate(st.session_state['wiki_text']):
     #         st.button("generate", on_click=generate_graph, key="gen_graph")
     pass
 else:
+    if len(st.session_state['wiki_text']) > 0:
+        st.button("Generate", on_click=generate_graph, key="gen_graph")
 if st.session_state['has_run']:
+    st.sidebar.markdown(
+    """
+# How to expand the graph
+- Click a button on the right to expand that node
+- Only nodes that have wiki pages will be expanded
+- Hit the Generate button again to expand your graph!
+"""
+)
+    cols = st.columns([5, 1])
     with cols[0]:
         HtmlFile = open(network_filename, 'r', encoding='utf-8')
         source_code = HtmlFile.read()
         components.html(source_code, height=2000,width=2000)
     with cols[1]:
         for i,s in enumerate(st.session_state["nodes"]):
             st.button(s, on_click=add_text, args=(s,), key=s+str(i))

rebel.py CHANGED Viewed

@@ -30,7 +30,7 @@ DEFAULT_LABEL_COLORS = {
 def generate_knowledge_graph(texts: List[str], filename: str):
     nlp = spacy.load("en_core_web_sm")
-    doc = nlp("\n".join(texts))
     NERs = [ent.text for ent in doc.ents]
     NER_types =  [ent.label_ for ent in doc.ents]
     for nr, nrt in zip(NERs, NER_types):
@@ -40,8 +40,8 @@ def generate_knowledge_graph(texts: List[str], filename: str):
     for triplet in texts:
         triplets.extend(generate_partial_graph(triplet))
     print(generate_partial_graph.cache_info())
-    heads = [ t["head"] for t in triplets]
-    tails = [ t["tail"] for t in triplets]
     nodes = set(heads + tails)
     net = Network(directed=True)
@@ -55,10 +55,10 @@ def generate_knowledge_graph(texts: List[str], filename: str):
             net.add_node(n, shape="circle")
     unique_triplets = set()
-    stringify_trip = lambda x : x["tail"] + x["head"] + x["type"]
     for triplet in triplets:
         if stringify_trip(triplet) not in unique_triplets:
-            net.add_edge(triplet["tail"], triplet["head"], title=triplet["type"], label=triplet["type"])
             unique_triplets.add(stringify_trip(triplet))
     net.repulsion(
@@ -74,7 +74,8 @@ def generate_knowledge_graph(texts: List[str], filename: str):
 @lru_cache
-def generate_partial_graph(text):
     triplet_extractor = pipeline('text2text-generation', model='Babelscape/rebel-large', tokenizer='Babelscape/rebel-large')
     a = triplet_extractor(text, return_tensors=True, return_text=False)[0]["generated_token_ids"]["output_ids"]
     extracted_text = triplet_extractor.tokenizer.batch_decode(a)

 def generate_knowledge_graph(texts: List[str], filename: str):
     nlp = spacy.load("en_core_web_sm")
+    doc = nlp("\n".join(texts).lower())
     NERs = [ent.text for ent in doc.ents]
     NER_types =  [ent.label_ for ent in doc.ents]
     for nr, nrt in zip(NERs, NER_types):
     for triplet in texts:
         triplets.extend(generate_partial_graph(triplet))
     print(generate_partial_graph.cache_info())
+    heads = [ t["head"].lower() for t in triplets]
+    tails = [ t["tail"].lower() for t in triplets]
     nodes = set(heads + tails)
     net = Network(directed=True)
             net.add_node(n, shape="circle")
     unique_triplets = set()
+    stringify_trip = lambda x : x["tail"] + x["head"] + x["type"].lower()
     for triplet in triplets:
         if stringify_trip(triplet) not in unique_triplets:
+            net.add_edge(triplet["head"].lower(), triplet["tail"].lower(), title=triplet["type"], label=triplet["type"])
             unique_triplets.add(stringify_trip(triplet))
     net.repulsion(
 @lru_cache
+def generate_partial_graph(text: str):
+    print(text[0:20], hash(text))
     triplet_extractor = pipeline('text2text-generation', model='Babelscape/rebel-large', tokenizer='Babelscape/rebel-large')
     a = triplet_extractor(text, return_tensors=True, return_text=False)[0]["generated_token_ids"]["output_ids"]
     extracted_text = triplet_extractor.tokenizer.batch_decode(a)