naveed-stockmark commited on
Commit
c8746df
·
verified ·
1 Parent(s): a916ec7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -4
app.py CHANGED
@@ -101,7 +101,7 @@ def get_rebel_infer_df(path="./rebel_inference_processed_ss.csv"):
101
 
102
  # Data source 3: luke inference
103
  # @st.cache_data(persist="disk")
104
- def get_luke_infer_df(path="./luke_fulltext_ss_infer_20240112.csv"):
105
  luke_infer_df = pd.read_csv(path)
106
 
107
  luke_infer_df = luke_infer_df.rename(columns={"source_mention": "source_en", "target_mention": "target_en", "pred": "relation"})
@@ -136,7 +136,7 @@ instance_df = build_instance_df()
136
 
137
  # Get KG df
138
  @st.cache_data(persist="disk")
139
- def get_kg_df(path="./kg_master_ss_sample_20240215.csv"):
140
  kg_df = pd.read_csv(path)
141
  kg_df['kg_id'] = kg_df.index
142
  kg_df = kg_df[kg_df.mode_relation.apply(lambda x: x in ['material_of', 'part_of', 'has_use'])]
@@ -297,8 +297,11 @@ if len(match_df) > 0:
297
 
298
  prefix = edge_prefixes[k]
299
 
300
- source_url = "https://www.wikidata.org/wiki/Q" + str(int(path[f'{prefix}_source_wikidata']))
301
- target_url = "https://www.wikidata.org/wiki/Q" + str(int(path[f'{prefix}_target_wikidata']))
 
 
 
302
  relation_url = "https://www.wikidata.org/wiki/Property:P" + str(int(relation_to_id[path[f'{prefix}_mode_relation']]))
303
 
304
  source_en = path[f'{prefix}_source_en']
@@ -314,9 +317,14 @@ if len(match_df) > 0:
314
 
315
  instance_ids = eval(rel_data[f'{relation}_instances'])
316
  instances = instance_df.loc[instance_ids]
 
317
  neg_instance_ids = list(chain.from_iterable([eval(rel_data[k]) for k in rel_data.keys() if '_instances' in k and relation not in k]))
318
  neg_instances = instance_df.loc[neg_instance_ids]
319
 
 
 
 
 
320
  st.write(f"**Total Number of Evidence Instances:** {int(rel_data['n_evidence'])}")
321
  st.write(f"**Number of Instances that support the relation:** {int(rel_data['n_support'])}")
322
  st.write(f"**Average Evidence Score:** {rel_data['avg_score']}")
@@ -329,6 +337,7 @@ if len(match_df) > 0:
329
 
330
  count_dict = dict(Counter(instances.relation.to_list() + neg_instances.relation.to_list()))
331
  count_df = pd.DataFrame.from_dict(count_dict, orient='index')
 
332
  st.write("Evidence relation distribution for above edge")
333
  count_df
334
 
 
101
 
102
  # Data source 3: luke inference
103
  # @st.cache_data(persist="disk")
104
+ def get_luke_infer_df(path="../kg_infer/en_wiki/luke_fulltext_ss_infer_20240112.csv"):
105
  luke_infer_df = pd.read_csv(path)
106
 
107
  luke_infer_df = luke_infer_df.rename(columns={"source_mention": "source_en", "target_mention": "target_en", "pred": "relation"})
 
136
 
137
  # Get KG df
138
  @st.cache_data(persist="disk")
139
+ def get_kg_df(path="../knowledge_platform/kg_data/kg_master_ss_sample_20240215.csv"):
140
  kg_df = pd.read_csv(path)
141
  kg_df['kg_id'] = kg_df.index
142
  kg_df = kg_df[kg_df.mode_relation.apply(lambda x: x in ['material_of', 'part_of', 'has_use'])]
 
297
 
298
  prefix = edge_prefixes[k]
299
 
300
+ source_wikidata = int(path[f'{prefix}_source_wikidata'])
301
+ target_wikidata = int(path[f'{prefix}_target_wikidata'])
302
+
303
+ source_url = "https://www.wikidata.org/wiki/Q" + str(source_wikidata)
304
+ target_url = "https://www.wikidata.org/wiki/Q" + str(target_wikidata)
305
  relation_url = "https://www.wikidata.org/wiki/Property:P" + str(int(relation_to_id[path[f'{prefix}_mode_relation']]))
306
 
307
  source_en = path[f'{prefix}_source_en']
 
317
 
318
  instance_ids = eval(rel_data[f'{relation}_instances'])
319
  instances = instance_df.loc[instance_ids]
320
+
321
  neg_instance_ids = list(chain.from_iterable([eval(rel_data[k]) for k in rel_data.keys() if '_instances' in k and relation not in k]))
322
  neg_instances = instance_df.loc[neg_instance_ids]
323
 
324
+ # extra filtering
325
+
326
+ instances = instances[instances.apply(lambda x: x.source_wikidata in [source_wikidata, target_wikidata] and x.target_wikidata in [source_wikidata, target_wikidata], axis=1)]
327
+
328
  st.write(f"**Total Number of Evidence Instances:** {int(rel_data['n_evidence'])}")
329
  st.write(f"**Number of Instances that support the relation:** {int(rel_data['n_support'])}")
330
  st.write(f"**Average Evidence Score:** {rel_data['avg_score']}")
 
337
 
338
  count_dict = dict(Counter(instances.relation.to_list() + neg_instances.relation.to_list()))
339
  count_df = pd.DataFrame.from_dict(count_dict, orient='index')
340
+ count_df.columns = ['count']
341
  st.write("Evidence relation distribution for above edge")
342
  count_df
343