seanpedrickcase commited on
Commit
056204b
·
1 Parent(s): 8235bbb

Changed default options for AWS.

Browse files
Files changed (2) hide show
  1. app.py +2 -3
  2. tools/file_redaction.py +0 -2
app.py CHANGED
@@ -49,7 +49,7 @@ aws_pii_detector = "AWS Comprehend"
49
 
50
  if RUN_AWS_FUNCTIONS == "1":
51
  default_ocr_val = textract_option
52
- default_pii_detector = aws_pii_detector
53
  else:
54
  default_ocr_val = text_ocr_option
55
  default_pii_detector = local_pii_detector
@@ -83,7 +83,6 @@ with app:
83
  output_file_list_state = gr.State([])
84
  text_output_file_list_state = gr.State([])
85
  log_files_output_list_state = gr.State([])
86
-
87
 
88
  # Logging state
89
  log_file_name = 'log.csv'
@@ -131,7 +130,7 @@ with app:
131
  with gr.Tab("PDFs/images"):
132
  with gr.Accordion("Redact document", open = True):
133
  in_doc_files = gr.File(label="Choose a document or image file (PDF, JPG, PNG)", file_count= "single", file_types=['.pdf', '.jpg', '.png', '.json'])
134
- in_redaction_method = gr.Radio(label="Choose document redaction method. AWS Textract has a cost per page so please only use when needed.", value = text_ocr_option, choices=[text_ocr_option, tesseract_ocr_option, textract_option])
135
  pii_identification_method_drop = gr.Radio(label = "Choose PII detection method", value = default_pii_detector, choices=[local_pii_detector, aws_pii_detector])
136
 
137
  gr.Markdown("""If you only want to redact certain pages, or certain entities (e.g. just email addresses), please go to the redaction settings tab.""")
 
49
 
50
  if RUN_AWS_FUNCTIONS == "1":
51
  default_ocr_val = textract_option
52
+ default_pii_detector = local_pii_detector
53
  else:
54
  default_ocr_val = text_ocr_option
55
  default_pii_detector = local_pii_detector
 
83
  output_file_list_state = gr.State([])
84
  text_output_file_list_state = gr.State([])
85
  log_files_output_list_state = gr.State([])
 
86
 
87
  # Logging state
88
  log_file_name = 'log.csv'
 
130
  with gr.Tab("PDFs/images"):
131
  with gr.Accordion("Redact document", open = True):
132
  in_doc_files = gr.File(label="Choose a document or image file (PDF, JPG, PNG)", file_count= "single", file_types=['.pdf', '.jpg', '.png', '.json'])
133
+ in_redaction_method = gr.Radio(label="Choose document redaction method. AWS Textract has a cost per page so please only use when needed.", value = default_ocr_val, choices=[text_ocr_option, tesseract_ocr_option, textract_option])
134
  pii_identification_method_drop = gr.Radio(label = "Choose PII detection method", value = default_pii_detector, choices=[local_pii_detector, aws_pii_detector])
135
 
136
  gr.Markdown("""If you only want to redact certain pages, or certain entities (e.g. just email addresses), please go to the redaction settings tab.""")
tools/file_redaction.py CHANGED
@@ -946,8 +946,6 @@ def redact_image_pdf(file_path:str,
946
  # Step 2: Analyze text and identify PII
947
  if chosen_redact_entities:
948
 
949
- pii_identification_method= "AWS Comprehend" #"Local"
950
-
951
  redaction_bboxes, comprehend_query_number_new = image_analyser.analyze_text(
952
  line_level_ocr_results,
953
  line_level_ocr_results_with_children,
 
946
  # Step 2: Analyze text and identify PII
947
  if chosen_redact_entities:
948
 
 
 
949
  redaction_bboxes, comprehend_query_number_new = image_analyser.analyze_text(
950
  line_level_ocr_results,
951
  line_level_ocr_results_with_children,