Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -101,7 +101,7 @@ def get_rebel_infer_df(path="./rebel_inference_processed_ss.csv"):
|
|
101 |
|
102 |
# Data source 3: luke inference
|
103 |
# @st.cache_data(persist="disk")
|
104 |
-
def get_luke_infer_df(path="
|
105 |
luke_infer_df = pd.read_csv(path)
|
106 |
|
107 |
luke_infer_df = luke_infer_df.rename(columns={"source_mention": "source_en", "target_mention": "target_en", "pred": "relation"})
|
@@ -136,7 +136,7 @@ instance_df = build_instance_df()
|
|
136 |
|
137 |
# Get KG df
|
138 |
@st.cache_data(persist="disk")
|
139 |
-
def get_kg_df(path="
|
140 |
kg_df = pd.read_csv(path)
|
141 |
kg_df['kg_id'] = kg_df.index
|
142 |
kg_df = kg_df[kg_df.mode_relation.apply(lambda x: x in ['material_of', 'part_of', 'has_use'])]
|
@@ -297,8 +297,11 @@ if len(match_df) > 0:
|
|
297 |
|
298 |
prefix = edge_prefixes[k]
|
299 |
|
300 |
-
|
301 |
-
|
|
|
|
|
|
|
302 |
relation_url = "https://www.wikidata.org/wiki/Property:P" + str(int(relation_to_id[path[f'{prefix}_mode_relation']]))
|
303 |
|
304 |
source_en = path[f'{prefix}_source_en']
|
@@ -314,9 +317,14 @@ if len(match_df) > 0:
|
|
314 |
|
315 |
instance_ids = eval(rel_data[f'{relation}_instances'])
|
316 |
instances = instance_df.loc[instance_ids]
|
|
|
317 |
neg_instance_ids = list(chain.from_iterable([eval(rel_data[k]) for k in rel_data.keys() if '_instances' in k and relation not in k]))
|
318 |
neg_instances = instance_df.loc[neg_instance_ids]
|
319 |
|
|
|
|
|
|
|
|
|
320 |
st.write(f"**Total Number of Evidence Instances:** {int(rel_data['n_evidence'])}")
|
321 |
st.write(f"**Number of Instances that support the relation:** {int(rel_data['n_support'])}")
|
322 |
st.write(f"**Average Evidence Score:** {rel_data['avg_score']}")
|
@@ -329,6 +337,7 @@ if len(match_df) > 0:
|
|
329 |
|
330 |
count_dict = dict(Counter(instances.relation.to_list() + neg_instances.relation.to_list()))
|
331 |
count_df = pd.DataFrame.from_dict(count_dict, orient='index')
|
|
|
332 |
st.write("Evidence relation distribution for above edge")
|
333 |
count_df
|
334 |
|
|
|
101 |
|
102 |
# Data source 3: luke inference
|
103 |
# @st.cache_data(persist="disk")
|
104 |
+
def get_luke_infer_df(path="../kg_infer/en_wiki/luke_fulltext_ss_infer_20240112.csv"):
|
105 |
luke_infer_df = pd.read_csv(path)
|
106 |
|
107 |
luke_infer_df = luke_infer_df.rename(columns={"source_mention": "source_en", "target_mention": "target_en", "pred": "relation"})
|
|
|
136 |
|
137 |
# Get KG df
|
138 |
@st.cache_data(persist="disk")
|
139 |
+
def get_kg_df(path="../knowledge_platform/kg_data/kg_master_ss_sample_20240215.csv"):
|
140 |
kg_df = pd.read_csv(path)
|
141 |
kg_df['kg_id'] = kg_df.index
|
142 |
kg_df = kg_df[kg_df.mode_relation.apply(lambda x: x in ['material_of', 'part_of', 'has_use'])]
|
|
|
297 |
|
298 |
prefix = edge_prefixes[k]
|
299 |
|
300 |
+
source_wikidata = int(path[f'{prefix}_source_wikidata'])
|
301 |
+
target_wikidata = int(path[f'{prefix}_target_wikidata'])
|
302 |
+
|
303 |
+
source_url = "https://www.wikidata.org/wiki/Q" + str(source_wikidata)
|
304 |
+
target_url = "https://www.wikidata.org/wiki/Q" + str(target_wikidata)
|
305 |
relation_url = "https://www.wikidata.org/wiki/Property:P" + str(int(relation_to_id[path[f'{prefix}_mode_relation']]))
|
306 |
|
307 |
source_en = path[f'{prefix}_source_en']
|
|
|
317 |
|
318 |
instance_ids = eval(rel_data[f'{relation}_instances'])
|
319 |
instances = instance_df.loc[instance_ids]
|
320 |
+
|
321 |
neg_instance_ids = list(chain.from_iterable([eval(rel_data[k]) for k in rel_data.keys() if '_instances' in k and relation not in k]))
|
322 |
neg_instances = instance_df.loc[neg_instance_ids]
|
323 |
|
324 |
+
# extra filtering
|
325 |
+
|
326 |
+
instances = instances[instances.apply(lambda x: x.source_wikidata in [source_wikidata, target_wikidata] and x.target_wikidata in [source_wikidata, target_wikidata], axis=1)]
|
327 |
+
|
328 |
st.write(f"**Total Number of Evidence Instances:** {int(rel_data['n_evidence'])}")
|
329 |
st.write(f"**Number of Instances that support the relation:** {int(rel_data['n_support'])}")
|
330 |
st.write(f"**Average Evidence Score:** {rel_data['avg_score']}")
|
|
|
337 |
|
338 |
count_dict = dict(Counter(instances.relation.to_list() + neg_instances.relation.to_list()))
|
339 |
count_df = pd.DataFrame.from_dict(count_dict, orient='index')
|
340 |
+
count_df.columns = ['count']
|
341 |
st.write("Evidence relation distribution for above edge")
|
342 |
count_df
|
343 |
|