Chris Finlayson commited on
Commit
9f2c2b4
1 Parent(s): 319ac5e

Updates to logging and filename

Browse files
Files changed (1) hide show
  1. app.py +6 -11
app.py CHANGED
@@ -103,6 +103,7 @@ class PiiRegex(object):
103
  for match in regexes.keys():
104
  # If we've got a result, add it to matches.
105
  if getattr(self, match):
 
106
  matches.append(match)
107
 
108
  return True if matches else False
@@ -127,14 +128,8 @@ class Redactor:
127
  def redaction(self):
128
 
129
  """ main redactor code """
130
- # opening the pdf
131
  doc = fitz.open(self.file)
132
- # iterating through pages
133
  for page in doc:
134
- # _wrapContents is needed for fixing
135
- # alignment issues with rect boxes in some
136
- # cases where there is alignment issue
137
- # page._wrapContents()
138
  sensitive = self.get_sensitive_data(page.get_text("text")
139
  .split('\n'))
140
  for data in sensitive:
@@ -142,14 +137,14 @@ class Redactor:
142
  # drawing outline over sensitive datas
143
  if data:
144
  for area in areas:
145
- # annot = page.add_redact_annot(area)
146
  annot = page.add_redact_annot(area.quad, text='REDACTED', fontname=None, fontsize=11, fill=(1, 1, 1), text_color=(0, 0, 0), cross_out=True)
147
  annot.update()
148
  # applying the redaction
149
  page.apply_redactions()
150
  # saving it to a new pdf
151
- doc.save('redacted.pdf')
152
- print("Successfully redacted")
 
153
 
154
 
155
 
@@ -159,11 +154,11 @@ def redact_pdf(file):
159
  redactor = Redactor(file)
160
  redactor.redaction()
161
  # Return the redacted pdf file
162
- return 'redacted.pdf'
163
 
164
 
165
  inputs = [
166
- gr.File(label="Upload PDF")
167
  ]
168
 
169
  outputs = [
 
103
  for match in regexes.keys():
104
  # If we've got a result, add it to matches.
105
  if getattr(self, match):
106
+ print (f"PII located in document: {match}")
107
  matches.append(match)
108
 
109
  return True if matches else False
 
128
  def redaction(self):
129
 
130
  """ main redactor code """
 
131
  doc = fitz.open(self.file)
 
132
  for page in doc:
 
 
 
 
133
  sensitive = self.get_sensitive_data(page.get_text("text")
134
  .split('\n'))
135
  for data in sensitive:
 
137
  # drawing outline over sensitive datas
138
  if data:
139
  for area in areas:
 
140
  annot = page.add_redact_annot(area.quad, text='REDACTED', fontname=None, fontsize=11, fill=(1, 1, 1), text_color=(0, 0, 0), cross_out=True)
141
  annot.update()
142
  # applying the redaction
143
  page.apply_redactions()
144
  # saving it to a new pdf
145
+ redacted_file = os.path.splitext(self.file.name)[0] + '_redacted.pdf'
146
+ doc.save(redacted_file)
147
+ print(f"Successfully redacted. The redacted file is saved as {redacted_file}")
148
 
149
 
150
 
 
154
  redactor = Redactor(file)
155
  redactor.redaction()
156
  # Return the redacted pdf file
157
+ return os.path.splitext(file.name)[0] + '_redacted.pdf'
158
 
159
 
160
  inputs = [
161
+ gr.inputs.File(type="file", label="Upload PDF")
162
  ]
163
 
164
  outputs = [