Spaces:
Build error
Build error
updates
Browse files- .vscode/settings.json +3 -3
- __pycache__/app.cpython-38.pyc +0 -0
- __pycache__/rebel.cpython-38.pyc +0 -0
- app.py +48 -14
- rebel.py +7 -6
.vscode/settings.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
{
|
2 |
"workbench.colorCustomizations": {
|
3 |
-
"activityBar.background": "#
|
4 |
-
"titleBar.activeBackground": "#
|
5 |
-
"titleBar.activeForeground": "#
|
6 |
}
|
7 |
}
|
|
|
1 |
{
|
2 |
"workbench.colorCustomizations": {
|
3 |
+
"activityBar.background": "#09323E",
|
4 |
+
"titleBar.activeBackground": "#0C4656",
|
5 |
+
"titleBar.activeForeground": "#F6FCFE"
|
6 |
}
|
7 |
}
|
__pycache__/app.cpython-38.pyc
CHANGED
Binary files a/__pycache__/app.cpython-38.pyc and b/__pycache__/app.cpython-38.pyc differ
|
|
__pycache__/rebel.cpython-38.pyc
CHANGED
Binary files a/__pycache__/rebel.cpython-38.pyc and b/__pycache__/rebel.cpython-38.pyc differ
|
|
app.py
CHANGED
@@ -14,7 +14,7 @@ network_filename = "test.html"
|
|
14 |
|
15 |
state_variables = {
|
16 |
'has_run':False,
|
17 |
-
'wiki_suggestions':
|
18 |
'wiki_text' : [],
|
19 |
'nodes':[]
|
20 |
}
|
@@ -23,11 +23,10 @@ for k, v in state_variables.items():
|
|
23 |
if k not in st.session_state:
|
24 |
st.session_state[k] = v
|
25 |
|
26 |
-
def clip_text(t, lenght =
|
27 |
return ".".join(t.split(".")[:lenght]) + "."
|
28 |
|
29 |
|
30 |
-
|
31 |
def generate_graph():
|
32 |
if 'wiki_text' not in st.session_state:
|
33 |
return
|
@@ -42,12 +41,14 @@ def generate_graph():
|
|
42 |
st.success('Done!')
|
43 |
|
44 |
def show_suggestion():
|
45 |
-
|
46 |
with st.spinner(text="fetching wiki topics..."):
|
47 |
if st.session_state['input_method'] == "wikipedia":
|
48 |
text = st.session_state.text
|
49 |
if text is not None:
|
50 |
-
|
|
|
|
|
51 |
|
52 |
def show_wiki_text(page_title):
|
53 |
with st.spinner(text="fetching wiki page..."):
|
@@ -64,7 +65,8 @@ def add_text(term):
|
|
64 |
try:
|
65 |
extra_text = clip_text(wikipedia.page(title=term, auto_suggest=True).summary)
|
66 |
st.session_state['wiki_text'].append(extra_text)
|
67 |
-
except wikipedia.
|
|
|
68 |
st.session_state["nodes"].remove(term)
|
69 |
|
70 |
def reset_session():
|
@@ -74,6 +76,17 @@ def reset_session():
|
|
74 |
st.title('REBELious knowledge graph generation')
|
75 |
st.session_state['input_method'] = "wikipedia"
|
76 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
# st.selectbox(
|
78 |
# 'input method',
|
79 |
# ('wikipedia', 'free text'), key="input_method")
|
@@ -82,13 +95,25 @@ if st.session_state['input_method'] != "wikipedia":
|
|
82 |
# st.text_area("Your text", key="text")
|
83 |
pass
|
84 |
else:
|
85 |
-
st.
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
|
87 |
if len(st.session_state['wiki_suggestions']) != 0:
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
|
93 |
if len(st.session_state['wiki_text']) != 0:
|
94 |
for i, t in enumerate(st.session_state['wiki_text']):
|
@@ -102,17 +127,26 @@ if st.session_state['input_method'] != "wikipedia":
|
|
102 |
# st.button("generate", on_click=generate_graph, key="gen_graph")
|
103 |
pass
|
104 |
else:
|
105 |
-
st.
|
|
|
106 |
|
107 |
|
108 |
if st.session_state['has_run']:
|
109 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
with cols[0]:
|
111 |
HtmlFile = open(network_filename, 'r', encoding='utf-8')
|
112 |
source_code = HtmlFile.read()
|
113 |
components.html(source_code, height=2000,width=2000)
|
114 |
with cols[1]:
|
115 |
-
st.text("expand")
|
116 |
for i,s in enumerate(st.session_state["nodes"]):
|
117 |
st.button(s, on_click=add_text, args=(s,), key=s+str(i))
|
118 |
|
|
|
14 |
|
15 |
state_variables = {
|
16 |
'has_run':False,
|
17 |
+
'wiki_suggestions': [],
|
18 |
'wiki_text' : [],
|
19 |
'nodes':[]
|
20 |
}
|
|
|
23 |
if k not in st.session_state:
|
24 |
st.session_state[k] = v
|
25 |
|
26 |
+
def clip_text(t, lenght = 10):
|
27 |
return ".".join(t.split(".")[:lenght]) + "."
|
28 |
|
29 |
|
|
|
30 |
def generate_graph():
|
31 |
if 'wiki_text' not in st.session_state:
|
32 |
return
|
|
|
41 |
st.success('Done!')
|
42 |
|
43 |
def show_suggestion():
|
44 |
+
st.session_state['wiki_suggestions'] = []
|
45 |
with st.spinner(text="fetching wiki topics..."):
|
46 |
if st.session_state['input_method'] == "wikipedia":
|
47 |
text = st.session_state.text
|
48 |
if text is not None:
|
49 |
+
subjects = text.split(",")
|
50 |
+
for subj in subjects:
|
51 |
+
st.session_state['wiki_suggestions'] += wikipedia.search(subj, results = 3)
|
52 |
|
53 |
def show_wiki_text(page_title):
|
54 |
with st.spinner(text="fetching wiki page..."):
|
|
|
65 |
try:
|
66 |
extra_text = clip_text(wikipedia.page(title=term, auto_suggest=True).summary)
|
67 |
st.session_state['wiki_text'].append(extra_text)
|
68 |
+
except wikipedia.WikipediaException:
|
69 |
+
st.error("Woops, no wikipedia page for this node")
|
70 |
st.session_state["nodes"].remove(term)
|
71 |
|
72 |
def reset_session():
|
|
|
76 |
st.title('REBELious knowledge graph generation')
|
77 |
st.session_state['input_method'] = "wikipedia"
|
78 |
|
79 |
+
st.sidebar.markdown(
|
80 |
+
"""
|
81 |
+
# how to
|
82 |
+
- Enter wikipedia search terms, separated by comma's
|
83 |
+
- Choose one or more of the suggested pages
|
84 |
+
- Click generate!
|
85 |
+
"""
|
86 |
+
)
|
87 |
+
|
88 |
+
st.sidebar.button("Reset", on_click=reset_session, key="reset_key")
|
89 |
+
|
90 |
# st.selectbox(
|
91 |
# 'input method',
|
92 |
# ('wikipedia', 'free text'), key="input_method")
|
|
|
95 |
# st.text_area("Your text", key="text")
|
96 |
pass
|
97 |
else:
|
98 |
+
cols = st.columns([8, 1])
|
99 |
+
with cols[0]:
|
100 |
+
st.text_input("wikipedia search term", on_change=show_suggestion, key="text")
|
101 |
+
with cols[1]:
|
102 |
+
st.text('')
|
103 |
+
st.text('')
|
104 |
+
st.button("Search", on_click=show_suggestion, key="show_suggestion_key")
|
105 |
|
106 |
if len(st.session_state['wiki_suggestions']) != 0:
|
107 |
+
|
108 |
+
num_cols = 10
|
109 |
+
num_buttons = len(st.session_state['wiki_suggestions'])
|
110 |
+
columns = st.columns([1] * num_cols + [1])
|
111 |
+
print(st.session_state['wiki_suggestions'])
|
112 |
+
|
113 |
+
for q in range(1 + num_buttons//num_cols):
|
114 |
+
for i, (c, s) in enumerate(zip(columns, st.session_state['wiki_suggestions'][q*num_cols: (q+1)*num_cols])):
|
115 |
+
with c:
|
116 |
+
st.button(s, on_click=show_wiki_text, args=(s,), key=str(i)+s)
|
117 |
|
118 |
if len(st.session_state['wiki_text']) != 0:
|
119 |
for i, t in enumerate(st.session_state['wiki_text']):
|
|
|
127 |
# st.button("generate", on_click=generate_graph, key="gen_graph")
|
128 |
pass
|
129 |
else:
|
130 |
+
if len(st.session_state['wiki_text']) > 0:
|
131 |
+
st.button("Generate", on_click=generate_graph, key="gen_graph")
|
132 |
|
133 |
|
134 |
if st.session_state['has_run']:
|
135 |
+
st.sidebar.markdown(
|
136 |
+
"""
|
137 |
+
# How to expand the graph
|
138 |
+
- Click a button on the right to expand that node
|
139 |
+
- Only nodes that have wiki pages will be expanded
|
140 |
+
- Hit the Generate button again to expand your graph!
|
141 |
+
"""
|
142 |
+
)
|
143 |
+
|
144 |
+
cols = st.columns([5, 1])
|
145 |
with cols[0]:
|
146 |
HtmlFile = open(network_filename, 'r', encoding='utf-8')
|
147 |
source_code = HtmlFile.read()
|
148 |
components.html(source_code, height=2000,width=2000)
|
149 |
with cols[1]:
|
|
|
150 |
for i,s in enumerate(st.session_state["nodes"]):
|
151 |
st.button(s, on_click=add_text, args=(s,), key=s+str(i))
|
152 |
|
rebel.py
CHANGED
@@ -30,7 +30,7 @@ DEFAULT_LABEL_COLORS = {
|
|
30 |
|
31 |
def generate_knowledge_graph(texts: List[str], filename: str):
|
32 |
nlp = spacy.load("en_core_web_sm")
|
33 |
-
doc = nlp("\n".join(texts))
|
34 |
NERs = [ent.text for ent in doc.ents]
|
35 |
NER_types = [ent.label_ for ent in doc.ents]
|
36 |
for nr, nrt in zip(NERs, NER_types):
|
@@ -40,8 +40,8 @@ def generate_knowledge_graph(texts: List[str], filename: str):
|
|
40 |
for triplet in texts:
|
41 |
triplets.extend(generate_partial_graph(triplet))
|
42 |
print(generate_partial_graph.cache_info())
|
43 |
-
heads = [ t["head"] for t in triplets]
|
44 |
-
tails = [ t["tail"] for t in triplets]
|
45 |
|
46 |
nodes = set(heads + tails)
|
47 |
net = Network(directed=True)
|
@@ -55,10 +55,10 @@ def generate_knowledge_graph(texts: List[str], filename: str):
|
|
55 |
net.add_node(n, shape="circle")
|
56 |
|
57 |
unique_triplets = set()
|
58 |
-
stringify_trip = lambda x : x["tail"] + x["head"] + x["type"]
|
59 |
for triplet in triplets:
|
60 |
if stringify_trip(triplet) not in unique_triplets:
|
61 |
-
net.add_edge(triplet["
|
62 |
unique_triplets.add(stringify_trip(triplet))
|
63 |
|
64 |
net.repulsion(
|
@@ -74,7 +74,8 @@ def generate_knowledge_graph(texts: List[str], filename: str):
|
|
74 |
|
75 |
|
76 |
@lru_cache
|
77 |
-
def generate_partial_graph(text):
|
|
|
78 |
triplet_extractor = pipeline('text2text-generation', model='Babelscape/rebel-large', tokenizer='Babelscape/rebel-large')
|
79 |
a = triplet_extractor(text, return_tensors=True, return_text=False)[0]["generated_token_ids"]["output_ids"]
|
80 |
extracted_text = triplet_extractor.tokenizer.batch_decode(a)
|
|
|
30 |
|
31 |
def generate_knowledge_graph(texts: List[str], filename: str):
|
32 |
nlp = spacy.load("en_core_web_sm")
|
33 |
+
doc = nlp("\n".join(texts).lower())
|
34 |
NERs = [ent.text for ent in doc.ents]
|
35 |
NER_types = [ent.label_ for ent in doc.ents]
|
36 |
for nr, nrt in zip(NERs, NER_types):
|
|
|
40 |
for triplet in texts:
|
41 |
triplets.extend(generate_partial_graph(triplet))
|
42 |
print(generate_partial_graph.cache_info())
|
43 |
+
heads = [ t["head"].lower() for t in triplets]
|
44 |
+
tails = [ t["tail"].lower() for t in triplets]
|
45 |
|
46 |
nodes = set(heads + tails)
|
47 |
net = Network(directed=True)
|
|
|
55 |
net.add_node(n, shape="circle")
|
56 |
|
57 |
unique_triplets = set()
|
58 |
+
stringify_trip = lambda x : x["tail"] + x["head"] + x["type"].lower()
|
59 |
for triplet in triplets:
|
60 |
if stringify_trip(triplet) not in unique_triplets:
|
61 |
+
net.add_edge(triplet["head"].lower(), triplet["tail"].lower(), title=triplet["type"], label=triplet["type"])
|
62 |
unique_triplets.add(stringify_trip(triplet))
|
63 |
|
64 |
net.repulsion(
|
|
|
74 |
|
75 |
|
76 |
@lru_cache
|
77 |
+
def generate_partial_graph(text: str):
|
78 |
+
print(text[0:20], hash(text))
|
79 |
triplet_extractor = pipeline('text2text-generation', model='Babelscape/rebel-large', tokenizer='Babelscape/rebel-large')
|
80 |
a = triplet_extractor(text, return_tensors=True, return_text=False)[0]["generated_token_ids"]["output_ids"]
|
81 |
extracted_text = triplet_extractor.tokenizer.batch_decode(a)
|