omri374 commited on
Commit
bbe07f8
1 Parent(s): 46c6fe7

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -149
app.py DELETED
@@ -1,149 +0,0 @@
1
- """Streamlit app for Presidio."""
2
-
3
- import json
4
- from json import JSONEncoder
5
-
6
- import pandas as pd
7
- import streamlit as st
8
- from presidio_analyzer import AnalyzerEngine, RecognizerRegistry
9
- from presidio_anonymizer import AnonymizerEngine
10
-
11
- from transformers_recognizer import TransformersRecognizer
12
-
13
-
14
- import spacy
15
- spacy.cli.download("en_core_web_lg")
16
-
17
-
18
- # Helper methods
19
- @st.cache(allow_output_mutation=True)
20
- def analyzer_engine():
21
- """Return AnalyzerEngine."""
22
-
23
- transformers_recognizer = TransformersRecognizer()
24
-
25
- registry = RecognizerRegistry()
26
- registry.add_recognizer(transformers_recognizer)
27
- registry.load_predefined_recognizers()
28
-
29
- analyzer = AnalyzerEngine(registry=registry)
30
- return analyzer
31
-
32
-
33
- @st.cache(allow_output_mutation=True)
34
- def anonymizer_engine():
35
- """Return AnonymizerEngine."""
36
- return AnonymizerEngine()
37
-
38
-
39
- def get_supported_entities():
40
- """Return supported entities from the Analyzer Engine."""
41
- return analyzer_engine().get_supported_entities()
42
-
43
-
44
- def analyze(**kwargs):
45
- """Analyze input using Analyzer engine and input arguments (kwargs)."""
46
- if "entities" not in kwargs or "All" in kwargs["entities"]:
47
- kwargs["entities"] = None
48
- return analyzer_engine().analyze(**kwargs)
49
-
50
-
51
- def anonymize(text, analyze_results):
52
- """Anonymize identified input using Presidio Abonymizer."""
53
-
54
- res = anonymizer_engine().anonymize(text, analyze_results)
55
- return res.text
56
-
57
-
58
- st.set_page_config(page_title="Presidio demo (English)", layout="wide")
59
-
60
- # Side bar
61
- st.sidebar.markdown(
62
- """
63
- Anonymize PII entities with [presidio](https://aka.ms/presidio), spaCy and a [PHI detection Roberta model](https://huggingface.co/obi/deid_roberta_i2b2).
64
- """
65
- )
66
-
67
- st_entities = st.sidebar.multiselect(
68
- label="Which entities to look for?",
69
- options=get_supported_entities(),
70
- default=list(get_supported_entities()),
71
- )
72
-
73
- st_threhsold = st.sidebar.slider(
74
- label="Acceptance threshold", min_value=0.0, max_value=1.0, value=0.35
75
- )
76
-
77
- st_return_decision_process = st.sidebar.checkbox("Add analysis explanations in json")
78
-
79
- st.sidebar.info(
80
- "Presidio is an open source framework for PII detection and anonymization. "
81
- "For more info visit [aka.ms/presidio](https://aka.ms/presidio)"
82
- )
83
-
84
-
85
- # Main panel
86
- analyzer_load_state = st.info("Starting Presidio analyzer...")
87
- engine = analyzer_engine()
88
- analyzer_load_state.empty()
89
-
90
-
91
- # Create two columns for before and after
92
- col1, col2 = st.columns(2)
93
-
94
- # Before:
95
- col1.subheader("Input string:")
96
- st_text = col1.text_area(
97
- label="Enter text",
98
- value="Type in some text, "
99
- "like a phone number (212-141-4544) "
100
- "or a name (Lebron James).",
101
- height=400,
102
- )
103
-
104
- # After
105
- col2.subheader("Output:")
106
-
107
- st_analyze_results = analyze(
108
- text=st_text,
109
- entities=st_entities,
110
- language="en",
111
- score_threshold=st_threhsold,
112
- return_decision_process=st_return_decision_process,
113
- )
114
- st_anonymize_results = anonymize(st_text, st_analyze_results)
115
- col2.text_area(label="", value=st_anonymize_results, height=400)
116
-
117
-
118
- # table result
119
- st.subheader("Findings")
120
- if st_analyze_results:
121
- df = pd.DataFrame.from_records([r.to_dict() for r in st_analyze_results])
122
- df = df[["entity_type", "start", "end", "score"]].rename(
123
- {
124
- "entity_type": "Entity type",
125
- "start": "Start",
126
- "end": "End",
127
- "score": "Confidence",
128
- },
129
- axis=1,
130
- )
131
-
132
- st.dataframe(df, width=1000)
133
- else:
134
- st.text("No findings")
135
-
136
-
137
- # json result
138
- class ToDictListEncoder(JSONEncoder):
139
- """Encode dict to json."""
140
-
141
- def default(self, o):
142
- """Encode to JSON using to_dict."""
143
- if o:
144
- return o.to_dict()
145
- return []
146
-
147
-
148
- if st_return_decision_process:
149
- st.json(json.dumps(st_analyze_results, cls=ToDictListEncoder))