Spaces:
Runtime error
Runtime error
arogeriogel
commited on
Commit
•
2e33ce2
1
Parent(s):
981d935
fix bug
Browse files- app.py +16 -24
- requirements.txt +5 -5
app.py
CHANGED
@@ -15,7 +15,7 @@ from detoxify import Detoxify
|
|
15 |
|
16 |
st.title("Anonymise your text!")
|
17 |
st.markdown(
|
18 |
-
"This mini-app anonymises text using Flair and Presidio. You can find the code in the Files and
|
19 |
)
|
20 |
|
21 |
# Configure logger
|
@@ -25,7 +25,7 @@ logging.basicConfig(format="\n%(asctime)s\n%(message)s", level=logging.INFO, for
|
|
25 |
###### Define functions ######
|
26 |
##############################
|
27 |
|
28 |
-
@st.
|
29 |
def analyzer_engine():
|
30 |
"""Return AnalyzerEngine."""
|
31 |
analyzer = AnalyzerEngine()
|
@@ -40,19 +40,6 @@ def analyze(**kwargs):
|
|
40 |
if "entities" not in kwargs or "All" in kwargs["entities"]:
|
41 |
kwargs["entities"] = None
|
42 |
|
43 |
-
# if st.session_state.excluded_words:
|
44 |
-
|
45 |
-
# deny_list = [i.strip() for i in st.session_state.excluded_words.split(',')]
|
46 |
-
|
47 |
-
# logging.info(
|
48 |
-
# f"words excluded : {deny_list}\n"
|
49 |
-
# )
|
50 |
-
|
51 |
-
# excluded_words_recognizer = PatternRecognizer(supported_entity="MANUAL ADD",
|
52 |
-
# name="Excluded words recognizer",
|
53 |
-
# deny_list=deny_list)
|
54 |
-
# analyzer_engine().registry.add_recognizer(excluded_words_recognizer)
|
55 |
-
|
56 |
results = analyzer_engine().analyze(**kwargs)
|
57 |
st.session_state.analyze_results = results
|
58 |
|
@@ -119,17 +106,17 @@ def analyze_text():
|
|
119 |
)
|
120 |
|
121 |
if st.session_state.excluded_words:
|
122 |
-
|
123 |
|
124 |
if st.session_state.allowed_words:
|
125 |
-
|
126 |
|
127 |
logging.info(
|
128 |
f"analyse results: {st.session_state.analyze_results}\n"
|
129 |
)
|
130 |
|
131 |
|
132 |
-
def
|
133 |
deny_list = [i.strip() for i in st.session_state.excluded_words.split(',')]
|
134 |
def _deny_list_to_regex(deny_list):
|
135 |
"""
|
@@ -161,7 +148,13 @@ def include_manual_input():
|
|
161 |
score=1.0,
|
162 |
)
|
163 |
|
164 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
165 |
|
166 |
results = EntityRecognizer.remove_duplicates(results)
|
167 |
|
@@ -171,7 +164,7 @@ def include_manual_input():
|
|
171 |
f"analyse results after adding excluded words: {st.session_state.analyze_results}\n"
|
172 |
)
|
173 |
|
174 |
-
def
|
175 |
analyze_results_fltered=[]
|
176 |
|
177 |
for token in st.session_state.analyze_results:
|
@@ -182,7 +175,7 @@ def exclude_manual_input():
|
|
182 |
)
|
183 |
st.session_state.analyze_results = analyze_results_fltered
|
184 |
|
185 |
-
@st.
|
186 |
def anonymizer_engine():
|
187 |
"""Return AnonymizerEngine."""
|
188 |
return AnonymizerEngine()
|
@@ -215,7 +208,6 @@ def anonymise_text():
|
|
215 |
def clear_results():
|
216 |
st.session_state.anon_results=""
|
217 |
st.session_state.analyze_results=""
|
218 |
-
# analyzer_engine().registry.remove_recognizer("Excluded words recognizer")
|
219 |
|
220 |
#######################################
|
221 |
#### Initialize "global" variables ####
|
@@ -305,10 +297,10 @@ with col1:
|
|
305 |
annotated_text(*annotated_tokens)
|
306 |
st.write(st.session_state.analyze_results)
|
307 |
if not st.session_state.analyze_results and analyze_now and not st.session_state.text_error:
|
308 |
-
st.write("No PII was found.")
|
309 |
|
310 |
with col2:
|
311 |
if st.session_state.anon_results:
|
312 |
st.write(st.session_state.anon_results.text)
|
313 |
if not st.session_state.analyze_results and anonymise_now and not st.session_state.text_error:
|
314 |
-
st.write("No PII was found.")
|
|
|
15 |
|
16 |
st.title("Anonymise your text!")
|
17 |
st.markdown(
|
18 |
+
"This mini-app anonymises text using Flair and Presidio. You can find the code in the Files and Versions tabs in the [HuggingFace page](https://huggingface.co/spaces/arogeriogel/anonymise_this)"
|
19 |
)
|
20 |
|
21 |
# Configure logger
|
|
|
25 |
###### Define functions ######
|
26 |
##############################
|
27 |
|
28 |
+
@st.cache_resource(show_spinner="Fetching model from cache...")
|
29 |
def analyzer_engine():
|
30 |
"""Return AnalyzerEngine."""
|
31 |
analyzer = AnalyzerEngine()
|
|
|
40 |
if "entities" not in kwargs or "All" in kwargs["entities"]:
|
41 |
kwargs["entities"] = None
|
42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
results = analyzer_engine().analyze(**kwargs)
|
44 |
st.session_state.analyze_results = results
|
45 |
|
|
|
106 |
)
|
107 |
|
108 |
if st.session_state.excluded_words:
|
109 |
+
exclude_manual_input()
|
110 |
|
111 |
if st.session_state.allowed_words:
|
112 |
+
allow_manual_input()
|
113 |
|
114 |
logging.info(
|
115 |
f"analyse results: {st.session_state.analyze_results}\n"
|
116 |
)
|
117 |
|
118 |
|
119 |
+
def exclude_manual_input():
|
120 |
deny_list = [i.strip() for i in st.session_state.excluded_words.split(',')]
|
121 |
def _deny_list_to_regex(deny_list):
|
122 |
"""
|
|
|
148 |
score=1.0,
|
149 |
)
|
150 |
|
151 |
+
# check if already in detected strings
|
152 |
+
found=False
|
153 |
+
for token in st.session_state.analyze_results:
|
154 |
+
if token.start==start and token.end==end:
|
155 |
+
found=True
|
156 |
+
if found==False:
|
157 |
+
results.append(pattern_result)
|
158 |
|
159 |
results = EntityRecognizer.remove_duplicates(results)
|
160 |
|
|
|
164 |
f"analyse results after adding excluded words: {st.session_state.analyze_results}\n"
|
165 |
)
|
166 |
|
167 |
+
def allow_manual_input():
|
168 |
analyze_results_fltered=[]
|
169 |
|
170 |
for token in st.session_state.analyze_results:
|
|
|
175 |
)
|
176 |
st.session_state.analyze_results = analyze_results_fltered
|
177 |
|
178 |
+
@st.cache_resource(show_spinner="Fetching model from cache...")
|
179 |
def anonymizer_engine():
|
180 |
"""Return AnonymizerEngine."""
|
181 |
return AnonymizerEngine()
|
|
|
208 |
def clear_results():
|
209 |
st.session_state.anon_results=""
|
210 |
st.session_state.analyze_results=""
|
|
|
211 |
|
212 |
#######################################
|
213 |
#### Initialize "global" variables ####
|
|
|
297 |
annotated_text(*annotated_tokens)
|
298 |
st.write(st.session_state.analyze_results)
|
299 |
if not st.session_state.analyze_results and analyze_now and not st.session_state.text_error:
|
300 |
+
st.write("### No PII was found. ###")
|
301 |
|
302 |
with col2:
|
303 |
if st.session_state.anon_results:
|
304 |
st.write(st.session_state.anon_results.text)
|
305 |
if not st.session_state.analyze_results and anonymise_now and not st.session_state.text_error:
|
306 |
+
st.write("### No PII was found. ###")
|
requirements.txt
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
detoxify==0.5.1
|
2 |
-
flair==0.
|
3 |
-
presidio-anonymizer
|
4 |
-
presidio-analyzer
|
5 |
-
st-annotated-text
|
6 |
-
spacy
|
7 |
https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.0.0/en_core_web_lg-3.0.0.tar.gz#egg=en_core_web_lg
|
|
|
1 |
detoxify==0.5.1
|
2 |
+
flair==0.12.2
|
3 |
+
presidio-anonymizer==2.2.33
|
4 |
+
presidio-analyzer==2.2.33
|
5 |
+
st-annotated-text==4.0.1
|
6 |
+
spacy==3.7.1
|
7 |
https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.0.0/en_core_web_lg-3.0.0.tar.gz#egg=en_core_web_lg
|