Spaces:
Build error
Build error
Commit
·
a50857e
1
Parent(s):
e568ca3
Filtering
Browse files
app.py
CHANGED
@@ -17,6 +17,7 @@ article = st.text_area('Article to analyze:', value=open("example.txt").read())
|
|
17 |
|
18 |
seen_entities = []
|
19 |
seen_surnames = []
|
|
|
20 |
if st.button('Submit'):
|
21 |
good_ents = []
|
22 |
|
@@ -25,18 +26,24 @@ if st.button('Submit'):
|
|
25 |
for ent in doc.ents:
|
26 |
if ent._.kb_qid is None or ent.label_ not in ["ORG", "PERSON", "GPE"] or ent.text in seen_entities:
|
27 |
continue
|
|
|
|
|
28 |
|
29 |
-
if ent.
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
seen_surnames.append(ent.text.split()[-1])
|
37 |
|
38 |
seen_entities.append(ent.text)
|
39 |
print((ent.text, ent.label_, ent._.kb_qid, ent._.url_wikidata, ent._.nerd_score))
|
|
|
|
|
|
|
|
|
|
|
40 |
r = requests.get("https://www.wikidata.org/w/api.php?action=wbgetclaims&format=json&property=P18&entity=" + ent._.kb_qid)
|
41 |
data = r.json()["claims"]
|
42 |
if "P18" in data.keys():
|
|
|
17 |
|
18 |
seen_entities = []
|
19 |
seen_surnames = []
|
20 |
+
seen_qids = []
|
21 |
if st.button('Submit'):
|
22 |
good_ents = []
|
23 |
|
|
|
26 |
for ent in doc.ents:
|
27 |
if ent._.kb_qid is None or ent.label_ not in ["ORG", "PERSON", "GPE"] or ent.text in seen_entities:
|
28 |
continue
|
29 |
+
if ent._.nerd_score < 0.5:
|
30 |
+
continue
|
31 |
|
32 |
+
if len(ent.text.split()) == 1:
|
33 |
+
# Single name
|
34 |
+
if ent.text in seen_surnames:
|
35 |
+
continue
|
36 |
+
elif ent.label_ == "PERSON":
|
37 |
+
# Multipart name
|
38 |
+
seen_surnames.append(ent.text.split()[-1])
|
|
|
39 |
|
40 |
seen_entities.append(ent.text)
|
41 |
print((ent.text, ent.label_, ent._.kb_qid, ent._.url_wikidata, ent._.nerd_score))
|
42 |
+
|
43 |
+
if ent._.kb_qid in seen_qids:
|
44 |
+
continue
|
45 |
+
seen_qids.append(ent._.kb_qid)
|
46 |
+
|
47 |
r = requests.get("https://www.wikidata.org/w/api.php?action=wbgetclaims&format=json&property=P18&entity=" + ent._.kb_qid)
|
48 |
data = r.json()["claims"]
|
49 |
if "P18" in data.keys():
|