ghomasHudson commited on
Commit
a50857e
·
1 Parent(s): e568ca3
Files changed (1) hide show
  1. app.py +15 -8
app.py CHANGED
@@ -17,6 +17,7 @@ article = st.text_area('Article to analyze:', value=open("example.txt").read())
17
 
18
  seen_entities = []
19
  seen_surnames = []
 
20
  if st.button('Submit'):
21
  good_ents = []
22
 
@@ -25,18 +26,24 @@ if st.button('Submit'):
25
  for ent in doc.ents:
26
  if ent._.kb_qid is None or ent.label_ not in ["ORG", "PERSON", "GPE"] or ent.text in seen_entities:
27
  continue
 
 
28
 
29
- if ent.label_ == "PERSON":
30
- if len(ent.text.split()) == 1:
31
- # Single name
32
- if ent.text in seen_surnames:
33
- continue
34
- else:
35
- # Multipart name
36
- seen_surnames.append(ent.text.split()[-1])
37
 
38
  seen_entities.append(ent.text)
39
  print((ent.text, ent.label_, ent._.kb_qid, ent._.url_wikidata, ent._.nerd_score))
 
 
 
 
 
40
  r = requests.get("https://www.wikidata.org/w/api.php?action=wbgetclaims&format=json&property=P18&entity=" + ent._.kb_qid)
41
  data = r.json()["claims"]
42
  if "P18" in data.keys():
 
17
 
18
  seen_entities = []
19
  seen_surnames = []
20
+ seen_qids = []
21
  if st.button('Submit'):
22
  good_ents = []
23
 
 
26
  for ent in doc.ents:
27
  if ent._.kb_qid is None or ent.label_ not in ["ORG", "PERSON", "GPE"] or ent.text in seen_entities:
28
  continue
29
+ if ent._.nerd_score < 0.5:
30
+ continue
31
 
32
+ if len(ent.text.split()) == 1:
33
+ # Single name
34
+ if ent.text in seen_surnames:
35
+ continue
36
+ elif ent.label_ == "PERSON":
37
+ # Multipart name
38
+ seen_surnames.append(ent.text.split()[-1])
 
39
 
40
  seen_entities.append(ent.text)
41
  print((ent.text, ent.label_, ent._.kb_qid, ent._.url_wikidata, ent._.nerd_score))
42
+
43
+ if ent._.kb_qid in seen_qids:
44
+ continue
45
+ seen_qids.append(ent._.kb_qid)
46
+
47
  r = requests.get("https://www.wikidata.org/w/api.php?action=wbgetclaims&format=json&property=P18&entity=" + ent._.kb_qid)
48
  data = r.json()["claims"]
49
  if "P18" in data.keys():