Spaces:
Sleeping
Sleeping
Commit
·
056204b
1
Parent(s):
8235bbb
Changed default options for AWS.
Browse files- app.py +2 -3
- tools/file_redaction.py +0 -2
app.py
CHANGED
@@ -49,7 +49,7 @@ aws_pii_detector = "AWS Comprehend"
|
|
49 |
|
50 |
if RUN_AWS_FUNCTIONS == "1":
|
51 |
default_ocr_val = textract_option
|
52 |
-
default_pii_detector =
|
53 |
else:
|
54 |
default_ocr_val = text_ocr_option
|
55 |
default_pii_detector = local_pii_detector
|
@@ -83,7 +83,6 @@ with app:
|
|
83 |
output_file_list_state = gr.State([])
|
84 |
text_output_file_list_state = gr.State([])
|
85 |
log_files_output_list_state = gr.State([])
|
86 |
-
|
87 |
|
88 |
# Logging state
|
89 |
log_file_name = 'log.csv'
|
@@ -131,7 +130,7 @@ with app:
|
|
131 |
with gr.Tab("PDFs/images"):
|
132 |
with gr.Accordion("Redact document", open = True):
|
133 |
in_doc_files = gr.File(label="Choose a document or image file (PDF, JPG, PNG)", file_count= "single", file_types=['.pdf', '.jpg', '.png', '.json'])
|
134 |
-
in_redaction_method = gr.Radio(label="Choose document redaction method. AWS Textract has a cost per page so please only use when needed.", value =
|
135 |
pii_identification_method_drop = gr.Radio(label = "Choose PII detection method", value = default_pii_detector, choices=[local_pii_detector, aws_pii_detector])
|
136 |
|
137 |
gr.Markdown("""If you only want to redact certain pages, or certain entities (e.g. just email addresses), please go to the redaction settings tab.""")
|
|
|
49 |
|
50 |
if RUN_AWS_FUNCTIONS == "1":
|
51 |
default_ocr_val = textract_option
|
52 |
+
default_pii_detector = local_pii_detector
|
53 |
else:
|
54 |
default_ocr_val = text_ocr_option
|
55 |
default_pii_detector = local_pii_detector
|
|
|
83 |
output_file_list_state = gr.State([])
|
84 |
text_output_file_list_state = gr.State([])
|
85 |
log_files_output_list_state = gr.State([])
|
|
|
86 |
|
87 |
# Logging state
|
88 |
log_file_name = 'log.csv'
|
|
|
130 |
with gr.Tab("PDFs/images"):
|
131 |
with gr.Accordion("Redact document", open = True):
|
132 |
in_doc_files = gr.File(label="Choose a document or image file (PDF, JPG, PNG)", file_count= "single", file_types=['.pdf', '.jpg', '.png', '.json'])
|
133 |
+
in_redaction_method = gr.Radio(label="Choose document redaction method. AWS Textract has a cost per page so please only use when needed.", value = default_ocr_val, choices=[text_ocr_option, tesseract_ocr_option, textract_option])
|
134 |
pii_identification_method_drop = gr.Radio(label = "Choose PII detection method", value = default_pii_detector, choices=[local_pii_detector, aws_pii_detector])
|
135 |
|
136 |
gr.Markdown("""If you only want to redact certain pages, or certain entities (e.g. just email addresses), please go to the redaction settings tab.""")
|
tools/file_redaction.py
CHANGED
@@ -946,8 +946,6 @@ def redact_image_pdf(file_path:str,
|
|
946 |
# Step 2: Analyze text and identify PII
|
947 |
if chosen_redact_entities:
|
948 |
|
949 |
-
pii_identification_method= "AWS Comprehend" #"Local"
|
950 |
-
|
951 |
redaction_bboxes, comprehend_query_number_new = image_analyser.analyze_text(
|
952 |
line_level_ocr_results,
|
953 |
line_level_ocr_results_with_children,
|
|
|
946 |
# Step 2: Analyze text and identify PII
|
947 |
if chosen_redact_entities:
|
948 |
|
|
|
|
|
949 |
redaction_bboxes, comprehend_query_number_new = image_analyser.analyze_text(
|
950 |
line_level_ocr_results,
|
951 |
line_level_ocr_results_with_children,
|