Spaces:
Sleeping
Sleeping
Chris Finlayson
commited on
Commit
•
9f2c2b4
1
Parent(s):
319ac5e
Updates to logging and filename
Browse files
app.py
CHANGED
@@ -103,6 +103,7 @@ class PiiRegex(object):
|
|
103 |
for match in regexes.keys():
|
104 |
# If we've got a result, add it to matches.
|
105 |
if getattr(self, match):
|
|
|
106 |
matches.append(match)
|
107 |
|
108 |
return True if matches else False
|
@@ -127,14 +128,8 @@ class Redactor:
|
|
127 |
def redaction(self):
|
128 |
|
129 |
""" main redactor code """
|
130 |
-
# opening the pdf
|
131 |
doc = fitz.open(self.file)
|
132 |
-
# iterating through pages
|
133 |
for page in doc:
|
134 |
-
# _wrapContents is needed for fixing
|
135 |
-
# alignment issues with rect boxes in some
|
136 |
-
# cases where there is alignment issue
|
137 |
-
# page._wrapContents()
|
138 |
sensitive = self.get_sensitive_data(page.get_text("text")
|
139 |
.split('\n'))
|
140 |
for data in sensitive:
|
@@ -142,14 +137,14 @@ class Redactor:
|
|
142 |
# drawing outline over sensitive datas
|
143 |
if data:
|
144 |
for area in areas:
|
145 |
-
# annot = page.add_redact_annot(area)
|
146 |
annot = page.add_redact_annot(area.quad, text='REDACTED', fontname=None, fontsize=11, fill=(1, 1, 1), text_color=(0, 0, 0), cross_out=True)
|
147 |
annot.update()
|
148 |
# applying the redaction
|
149 |
page.apply_redactions()
|
150 |
# saving it to a new pdf
|
151 |
-
|
152 |
-
|
|
|
153 |
|
154 |
|
155 |
|
@@ -159,11 +154,11 @@ def redact_pdf(file):
|
|
159 |
redactor = Redactor(file)
|
160 |
redactor.redaction()
|
161 |
# Return the redacted pdf file
|
162 |
-
return '
|
163 |
|
164 |
|
165 |
inputs = [
|
166 |
-
gr.File(label="Upload PDF")
|
167 |
]
|
168 |
|
169 |
outputs = [
|
|
|
103 |
for match in regexes.keys():
|
104 |
# If we've got a result, add it to matches.
|
105 |
if getattr(self, match):
|
106 |
+
print (f"PII located in document: {match}")
|
107 |
matches.append(match)
|
108 |
|
109 |
return True if matches else False
|
|
|
128 |
def redaction(self):
|
129 |
|
130 |
""" main redactor code """
|
|
|
131 |
doc = fitz.open(self.file)
|
|
|
132 |
for page in doc:
|
|
|
|
|
|
|
|
|
133 |
sensitive = self.get_sensitive_data(page.get_text("text")
|
134 |
.split('\n'))
|
135 |
for data in sensitive:
|
|
|
137 |
# drawing outline over sensitive datas
|
138 |
if data:
|
139 |
for area in areas:
|
|
|
140 |
annot = page.add_redact_annot(area.quad, text='REDACTED', fontname=None, fontsize=11, fill=(1, 1, 1), text_color=(0, 0, 0), cross_out=True)
|
141 |
annot.update()
|
142 |
# applying the redaction
|
143 |
page.apply_redactions()
|
144 |
# saving it to a new pdf
|
145 |
+
redacted_file = os.path.splitext(self.file.name)[0] + '_redacted.pdf'
|
146 |
+
doc.save(redacted_file)
|
147 |
+
print(f"Successfully redacted. The redacted file is saved as {redacted_file}")
|
148 |
|
149 |
|
150 |
|
|
|
154 |
redactor = Redactor(file)
|
155 |
redactor.redaction()
|
156 |
# Return the redacted pdf file
|
157 |
+
return os.path.splitext(file.name)[0] + '_redacted.pdf'
|
158 |
|
159 |
|
160 |
inputs = [
|
161 |
+
gr.inputs.File(type="file", label="Upload PDF")
|
162 |
]
|
163 |
|
164 |
outputs = [
|