seanpedrickcase commited on
Commit
e5dfae7
1 Parent(s): e2aae24

Added option for running redact function through CLI (i.e. not going through Gradio UI or API). Test functions for running this through AWS Lambda.

Browse files
Dockerfile CHANGED
@@ -14,6 +14,9 @@ RUN pip install --no-cache-dir --target=/install -r requirements.txt
14
 
15
  RUN rm requirements.txt
16
 
 
 
 
17
  # Stage 2: Final runtime image
18
  FROM public.ecr.aws/docker/library/python:3.11.9-slim-bookworm
19
 
@@ -62,4 +65,7 @@ WORKDIR $HOME/app
62
  # Copy the current directory contents into the container at $HOME/app setting the owner to the user
63
  COPY --chown=user . $HOME/app
64
 
65
- CMD ["python", "app.py"]
 
 
 
 
14
 
15
  RUN rm requirements.txt
16
 
17
+ # Add lambda_entrypoint.py to the container
18
+ COPY lambda_entrypoint.py .
19
+
20
  # Stage 2: Final runtime image
21
  FROM public.ecr.aws/docker/library/python:3.11.9-slim-bookworm
22
 
 
65
  # Copy the current directory contents into the container at $HOME/app setting the owner to the user
66
  COPY --chown=user . $HOME/app
67
 
68
+ # Keep the default entrypoint as flexible
69
+ ENTRYPOINT ["python", "-u", "entrypoint_router.py"]
70
+
71
+ #CMD ["python", "app.py"]
app.py CHANGED
@@ -364,7 +364,7 @@ with app:
364
 
365
  # If running on AWS, load in the default allow list file from S3
366
  if RUN_AWS_FUNCTIONS == "1":
367
- print("default_allow_list_output_folder_location:", default_allow_list_output_folder_location)
368
  if not os.path.exists(default_allow_list_loc):
369
  app.load(download_file_from_s3, inputs=[s3_default_bucket, s3_default_allow_list_file, default_allow_list_output_folder_location]).\
370
  then(load_in_default_allow_list, inputs = [default_allow_list_output_folder_location], outputs=[in_allow_list])
@@ -399,11 +399,26 @@ with app:
399
  COGNITO_AUTH = get_or_create_env_var('COGNITO_AUTH', '0')
400
  print(f'The value of COGNITO_AUTH is {COGNITO_AUTH}')
401
 
 
 
 
402
  if __name__ == "__main__":
403
- if os.environ['COGNITO_AUTH'] == "1":
404
- app.queue(max_size=5).launch(show_error=True, auth=authenticate_user, max_file_size='100mb')
 
 
 
 
 
 
 
 
405
  else:
406
- app.queue(max_size=5).launch(show_error=True, inbrowser=True, max_file_size='100mb')
 
 
 
 
407
 
408
 
409
  # AWS options - placeholder for possibility of storing data on s3 and retrieving it in app
 
364
 
365
  # If running on AWS, load in the default allow list file from S3
366
  if RUN_AWS_FUNCTIONS == "1":
367
+ print("default_allow_list_output_folder_location:", default_allow_list_loc)
368
  if not os.path.exists(default_allow_list_loc):
369
  app.load(download_file_from_s3, inputs=[s3_default_bucket, s3_default_allow_list_file, default_allow_list_output_folder_location]).\
370
  then(load_in_default_allow_list, inputs = [default_allow_list_output_folder_location], outputs=[in_allow_list])
 
399
  COGNITO_AUTH = get_or_create_env_var('COGNITO_AUTH', '0')
400
  print(f'The value of COGNITO_AUTH is {COGNITO_AUTH}')
401
 
402
+ RUN_DIRECT_MODE = get_or_create_env_var('RUN_DIRECT_MODE', '0')
403
+ print(f'The value of RUN_DIRECT_MODE is {RUN_DIRECT_MODE}')
404
+
405
  if __name__ == "__main__":
406
+
407
+ if RUN_DIRECT_MODE == "0":
408
+ max_queue_size = 5
409
+ max_file_size = '100mb'
410
+
411
+ if os.environ['COGNITO_AUTH'] == "1":
412
+ app.queue(max_size=max_queue_size).launch(show_error=True, auth=authenticate_user, max_file_size=max_file_size)
413
+ else:
414
+ app.queue(max_size=max_queue_size).launch(show_error=True, inbrowser=True, max_file_size=max_file_size)
415
+
416
  else:
417
+ from tools.cli_redact import main
418
+
419
+ main(first_loop_state, latest_file_completed=0, output_summary="", output_file_list=None,
420
+ log_files_list=None, estimated_time=0, textract_metadata="", comprehend_query_num=0,
421
+ current_loop_page=0, page_break=False, pdf_doc_state = [], all_image_annotations = [], all_line_level_ocr_results = pd.DataFrame(), all_decision_process_table = pd.DataFrame(),chosen_comprehend_entities = chosen_comprehend_entities, chosen_redact_entities = chosen_redact_entities, handwrite_signature_checkbox = ["Redact all identified handwriting", "Redact all identified signatures"])
422
 
423
 
424
  # AWS options - placeholder for possibility of storing data on s3 and retrieving it in app
entrypoint_router.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import subprocess
3
+
4
+ if __name__ == "__main__":
5
+ run_direct_mode = os.getenv("RUN_DIRECT_MODE", "0")
6
+
7
+ if run_direct_mode == "1":
8
+ # Lambda execution or CLI invocation (Direct Mode)
9
+ from lambda_entrypoint import lambda_handler
10
+
11
+ # Simulate the Lambda event and context for local testing
12
+ event = os.getenv("LAMBDA_TEST_EVENT", '{}')
13
+ context = None # Add mock context if needed
14
+ response = lambda_handler(eval(event), context)
15
+ print(response)
16
+ else:
17
+ # Gradio App execution
18
+ from app import app # Replace with actual import if needed
19
+
20
+ if os.getenv("COGNITO_AUTH", "0") == "1":
21
+ app.queue(max_size=app.max_queue_size).launch(show_error=True, auth=app.authenticate_user, max_file_size=app.max_file_size)
22
+ else:
23
+ app.queue(max_size=app.max_queue_size).launch(show_error=True, inbrowser=True, max_file_size=app.max_file_size)
lambda_entrypoint.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import boto3
2
+ import os
3
+ import subprocess
4
+ from urllib.parse import unquote_plus
5
+
6
+ s3_client = boto3.client("s3")
7
+
8
+ def download_file_from_s3(bucket_name, key, download_path):
9
+ """Download a file from S3 to the local filesystem."""
10
+ s3_client.download_file(bucket_name, key, download_path)
11
+ print(f"Downloaded {key} to {download_path}")
12
+
13
+ def upload_file_to_s3(file_path, bucket_name, key):
14
+ """Upload a file to S3."""
15
+ s3_client.upload_file(file_path, bucket_name, key)
16
+ print(f"Uploaded {file_path} to {key}")
17
+
18
+ def lambda_handler(event, context):
19
+ """Main Lambda function handler."""
20
+ # Parse the S3 event
21
+ for record in event["Records"]:
22
+ bucket_name = record["s3"]["bucket"]["name"]
23
+ input_key = unquote_plus(record["s3"]["object"]["key"])
24
+ print(f"Processing file {input_key} from bucket {bucket_name}")
25
+
26
+ # Prepare paths
27
+ input_file_path = f"/tmp/{os.path.basename(input_key)}"
28
+ allow_list_path = f"/tmp/allow_list.csv" # Adjust this as needed
29
+ output_dir = "/tmp/output"
30
+ os.makedirs(output_dir, exist_ok=True)
31
+
32
+ # Download input file
33
+ download_file_from_s3(bucket_name, input_key, input_file_path)
34
+
35
+ # (Optional) Download allow_list if needed
36
+ allow_list_key = "path/to/allow_list.csv" # Adjust path as required
37
+ download_file_from_s3(bucket_name, allow_list_key, allow_list_path)
38
+
39
+ # Construct and run the command
40
+ command = [
41
+ "python",
42
+ "app.py",
43
+ "--input_file", input_file_path,
44
+ "--ocr_method", "Complex image analysis - docs with handwriting/signatures (AWS Textract)",
45
+ "--pii_detector", "AWS Comprehend",
46
+ "--page_min", "0",
47
+ "--page_max", "0",
48
+ "--allow_list", allow_list_path,
49
+ "--output_dir", output_dir,
50
+ ]
51
+
52
+ try:
53
+ result = subprocess.run(command, capture_output=True, text=True, check=True)
54
+ print("Processing succeeded:", result.stdout)
55
+ except subprocess.CalledProcessError as e:
56
+ print("Error during processing:", e.stderr)
57
+ raise e
58
+
59
+ # Upload output files back to S3
60
+ for root, _, files in os.walk(output_dir):
61
+ for file_name in files:
62
+ local_file_path = os.path.join(root, file_name)
63
+ output_key = f"{os.path.dirname(input_key)}/output/{file_name}"
64
+ upload_file_to_s3(local_file_path, bucket_name, output_key)
65
+
66
+ return {"statusCode": 200, "body": "Processing complete."}
tools/aws_functions.py CHANGED
@@ -10,7 +10,7 @@ PandasDataFrame = Type[pd.DataFrame]
10
  # Get AWS credentials
11
  bucket_name=""
12
 
13
- RUN_AWS_FUNCTIONS = get_or_create_env_var("RUN_AWS_FUNCTIONS", "0")
14
  print(f'The value of RUN_AWS_FUNCTIONS is {RUN_AWS_FUNCTIONS}')
15
 
16
  AWS_REGION = get_or_create_env_var('AWS_REGION', 'eu-west-2')
 
10
  # Get AWS credentials
11
  bucket_name=""
12
 
13
+ RUN_AWS_FUNCTIONS = get_or_create_env_var("RUN_AWS_FUNCTIONS", "1")
14
  print(f'The value of RUN_AWS_FUNCTIONS is {RUN_AWS_FUNCTIONS}')
15
 
16
  AWS_REGION = get_or_create_env_var('AWS_REGION', 'eu-west-2')
tools/cli_redact.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import os
3
+ from tools.helper_functions import ensure_output_folder_exists, get_or_create_env_var, tesseract_ocr_option, text_ocr_option, textract_option, local_pii_detector, aws_pii_detector
4
+ from tools.file_conversion import get_input_file_names, prepare_image_or_pdf
5
+ from tools.file_redaction import choose_and_run_redactor
6
+ import pandas as pd
7
+ from datetime import datetime
8
+
9
+ chosen_comprehend_entities = ['BANK_ACCOUNT_NUMBER','BANK_ROUTING','CREDIT_DEBIT_NUMBER', 'CREDIT_DEBIT_CVV', 'CREDIT_DEBIT_EXPIRY','PIN','EMAIL','ADDRESS',
10
+ 'NAME','PHONE', 'PASSPORT_NUMBER','DRIVER_ID', 'USERNAME','PASSWORD',
11
+ 'IP_ADDRESS','MAC_ADDRESS','LICENSE_PLATE',
12
+ 'VEHICLE_IDENTIFICATION_NUMBER','UK_NATIONAL_INSURANCE_NUMBER',
13
+ 'INTERNATIONAL_BANK_ACCOUNT_NUMBER','SWIFT_CODE',
14
+ 'UK_NATIONAL_HEALTH_SERVICE_NUMBER']
15
+ chosen_redact_entities = ["TITLES", "PERSON", "PHONE_NUMBER", "EMAIL_ADDRESS",
16
+ "STREETNAME", "UKPOSTCODE"]
17
+
18
+ def main(first_loop_state=True, latest_file_completed=0, output_summary="", output_file_list=None,
19
+ log_files_list=None, estimated_time=0, textract_metadata="", comprehend_query_num=0,
20
+ current_loop_page=0, page_break=False, pdf_doc_state = [], all_image_annotations = [], all_line_level_ocr_results = pd.DataFrame(), all_decision_process_table = pd.DataFrame(),chosen_comprehend_entities = chosen_comprehend_entities, chosen_redact_entities = chosen_redact_entities, handwrite_signature_checkbox = ["Redact all identified handwriting", "Redact all identified signatures"]):
21
+
22
+ if output_file_list is None:
23
+ output_file_list = []
24
+ if log_files_list is None:
25
+ log_files_list = []
26
+
27
+ parser = argparse.ArgumentParser(description='Redact PII from documents via command line')
28
+
29
+ # Required arguments
30
+ parser.add_argument('--input_file', help='Path to input file (PDF, JPG, or PNG)')
31
+
32
+ # Optional arguments with defaults matching the GUI app
33
+ parser.add_argument('--ocr_method', choices=[text_ocr_option, tesseract_ocr_option, textract_option],
34
+ default='Quick image analysis', help='OCR method to use')
35
+ parser.add_argument('--pii_detector', choices=[local_pii_detector, aws_pii_detector],
36
+ default='Local', help='PII detection method')
37
+ parser.add_argument('--page_min', type=int, default=0, help='First page to redact')
38
+ parser.add_argument('--page_max', type=int, default=0, help='Last page to redact')
39
+ parser.add_argument('--allow_list', help='Path to allow list CSV file')
40
+ parser.add_argument('--output_dir', default='output', help='Output directory')
41
+
42
+ args = parser.parse_args()
43
+
44
+ # Ensure output directory exists
45
+ ensure_output_folder_exists()
46
+
47
+ # Create file object similar to what Gradio provides
48
+ file_obj = {"name": args.input_file}
49
+
50
+ # Load allow list if provided
51
+ allow_list_df = pd.DataFrame()
52
+ if args.allow_list:
53
+ allow_list_df = pd.read_csv(args.allow_list)
54
+
55
+ # Get file names
56
+ file_name_no_ext, file_name_with_ext, full_file_name = get_input_file_names(file_obj)
57
+
58
+ # Initialize empty states for PDF processing
59
+
60
+ # Prepare PDF/image
61
+ output_summary, prepared_pdf, images_pdf, max_pages, annotate_max_pages_bottom, pdf_doc_state, all_image_annotations = prepare_image_or_pdf(
62
+ file_obj, args.ocr_method, allow_list_df, latest_file_completed,
63
+ output_summary, first_loop_state, args.page_max, current_loop_page, all_image_annotations
64
+ )
65
+
66
+ output_summary, output_files, output_file_list, latest_file_completed, log_files, \
67
+ log_files_list, estimated_time, textract_metadata, pdf_doc_state, all_image_annotations, \
68
+ current_loop_page, page_break, all_line_level_ocr_results, all_decision_process_table, \
69
+ comprehend_query_num = choose_and_run_redactor(
70
+ file_obj, prepared_pdf, images_pdf, "en", chosen_redact_entities,
71
+ chosen_comprehend_entities, args.ocr_method, allow_list_df,
72
+ latest_file_completed, output_summary, output_file_list, log_files_list,
73
+ first_loop_state, args.page_min, args.page_max, estimated_time,
74
+ handwrite_signature_checkbox, textract_metadata, all_image_annotations,
75
+ all_line_level_ocr_results, all_decision_process_table, pdf_doc_state,
76
+ current_loop_page, page_break, args.pii_detector, comprehend_query_num
77
+ )
78
+
79
+ print(f"\nRedaction complete. Output summary:\n{output_summary}")
80
+ print(f"\nOutput files saved to: {args.output_dir}")
81
+
82
+ if __name__ == "__main__":
83
+ main()
tools/file_conversion.py CHANGED
@@ -9,6 +9,7 @@ import gradio as gr
9
  import time
10
  import json
11
  import pymupdf
 
12
  from gradio import Progress
13
  from typing import List, Optional
14
 
@@ -47,6 +48,8 @@ def is_pdf(filename):
47
 
48
  def convert_pdf_to_images(pdf_path:str, page_min:int = 0, image_dpi:float = image_dpi, progress=Progress(track_tqdm=True)):
49
 
 
 
50
  # Get the number of pages in the PDF
51
  page_count = pdfinfo_from_path(pdf_path)['Pages']
52
  print("Number of pages in PDF: ", str(page_count))
@@ -55,7 +58,9 @@ def convert_pdf_to_images(pdf_path:str, page_min:int = 0, image_dpi:float = imag
55
 
56
  # Open the PDF file
57
  #for page_num in progress.tqdm(range(0,page_count), total=page_count, unit="pages", desc="Converting pages"): range(page_min,page_count): #
58
- for page_num in progress.tqdm(range(page_min,page_count), total=page_count, unit="pages", desc="Preparing pages"):
 
 
59
 
60
  print("Converting page: ", str(page_num + 1))
61
 
@@ -98,7 +103,7 @@ def convert_pdf_to_images(pdf_path:str, page_min:int = 0, image_dpi:float = imag
98
  return images
99
 
100
  # Function to take in a file path, decide if it is an image or pdf, then process appropriately.
101
- def process_file(file_path):
102
  # Get the file extension
103
  file_extension = os.path.splitext(file_path)[1].lower()
104
 
@@ -130,7 +135,9 @@ def get_input_file_names(file_input):
130
  file_name_with_extension = ""
131
  full_file_name = ""
132
 
133
- #print("file_input:", file_input)
 
 
134
 
135
  if isinstance(file_input, str):
136
  file_input_list = [file_input]
@@ -225,6 +232,9 @@ def prepare_image_or_pdf(
225
  if not file_paths:
226
  file_paths = []
227
 
 
 
 
228
  if isinstance(file_paths, str):
229
  file_path_number = 1
230
  else:
@@ -277,8 +287,9 @@ def prepare_image_or_pdf(
277
 
278
  file_extension = os.path.splitext(file_path)[1].lower()
279
 
280
- # Check if the file is an image type
281
- if file_extension in ['.jpg', '.jpeg', '.png']:
 
282
  in_redact_method = tesseract_ocr_option
283
 
284
 
@@ -333,6 +344,9 @@ def prepare_image_or_pdf(
333
  json.dump(json_contents, json_file, indent=4) # indent=4 makes the JSON file pretty-printed
334
  continue
335
 
 
 
 
336
  # Convert pdf/image file to correct format for redaction
337
  if in_redact_method == tesseract_ocr_option or in_redact_method == textract_option:
338
  if is_pdf_or_image(file_path) == False:
@@ -340,6 +354,9 @@ def prepare_image_or_pdf(
340
  print(out_message)
341
  return out_message, converted_file_paths, image_file_paths, number_of_pages, number_of_pages, pymupdf_doc, all_annotations_object
342
 
 
 
 
343
  converted_file_path = process_file(file_path)
344
  image_file_path = converted_file_path
345
 
 
9
  import time
10
  import json
11
  import pymupdf
12
+ from tqdm import tqdm
13
  from gradio import Progress
14
  from typing import List, Optional
15
 
 
48
 
49
  def convert_pdf_to_images(pdf_path:str, page_min:int = 0, image_dpi:float = image_dpi, progress=Progress(track_tqdm=True)):
50
 
51
+ print("pdf_path in convert_pdf_to_images:", pdf_path)
52
+
53
  # Get the number of pages in the PDF
54
  page_count = pdfinfo_from_path(pdf_path)['Pages']
55
  print("Number of pages in PDF: ", str(page_count))
 
58
 
59
  # Open the PDF file
60
  #for page_num in progress.tqdm(range(0,page_count), total=page_count, unit="pages", desc="Converting pages"): range(page_min,page_count): #
61
+ for page_num in tqdm(range(page_min,page_count), total=page_count, unit="pages", desc="Preparing pages"):
62
+
63
+ print("page_num in convert_pdf_to_images:", page_num)
64
 
65
  print("Converting page: ", str(page_num + 1))
66
 
 
103
  return images
104
 
105
  # Function to take in a file path, decide if it is an image or pdf, then process appropriately.
106
+ def process_file(file_path:str):
107
  # Get the file extension
108
  file_extension = os.path.splitext(file_path)[1].lower()
109
 
 
135
  file_name_with_extension = ""
136
  full_file_name = ""
137
 
138
+ print("file_input in input file names:", file_input)
139
+ if isinstance(file_input, dict):
140
+ file_input = os.path.abspath(file_input["name"])
141
 
142
  if isinstance(file_input, str):
143
  file_input_list = [file_input]
 
232
  if not file_paths:
233
  file_paths = []
234
 
235
+ if isinstance(file_paths, dict):
236
+ file_paths = os.path.abspath(file_paths["name"])
237
+
238
  if isinstance(file_paths, str):
239
  file_path_number = 1
240
  else:
 
287
 
288
  file_extension = os.path.splitext(file_path)[1].lower()
289
 
290
+
291
+ # Check if the file is an image type and the user selected text ocr option
292
+ if file_extension in ['.jpg', '.jpeg', '.png'] and in_redact_method == text_ocr_option:
293
  in_redact_method = tesseract_ocr_option
294
 
295
 
 
344
  json.dump(json_contents, json_file, indent=4) # indent=4 makes the JSON file pretty-printed
345
  continue
346
 
347
+
348
+ print("in_redact_method:", in_redact_method)
349
+
350
  # Convert pdf/image file to correct format for redaction
351
  if in_redact_method == tesseract_ocr_option or in_redact_method == textract_option:
352
  if is_pdf_or_image(file_path) == False:
 
354
  print(out_message)
355
  return out_message, converted_file_paths, image_file_paths, number_of_pages, number_of_pages, pymupdf_doc, all_annotations_object
356
 
357
+ print("In correct preparation area.")
358
+
359
+ print("file_path at process_file:", file_path)
360
  converted_file_path = process_file(file_path)
361
  image_file_path = converted_file_path
362
 
tools/file_redaction.py CHANGED
@@ -180,8 +180,12 @@ def choose_and_run_redactor(file_paths:List[str],
180
  return combined_out_message, out_file_paths, out_file_paths, gr.Number(value=latest_file_completed, label="Number of documents redacted", interactive=False, visible=False), log_files_output_paths, log_files_output_paths, estimated_time_taken_state, all_request_metadata_str, pymupdf_doc, annotations_all_pages, gr.Number(value=current_loop_page,precision=0, interactive=False, label = "Last redacted page in document", visible=False), gr.Checkbox(value = False, label="Page break reached", visible=False), all_line_level_ocr_results_df, all_decision_process_table, comprehend_query_number
181
 
182
  # Create allow list
 
 
 
 
183
  if not in_allow_list.empty:
184
- in_allow_list_flat = in_allow_list[0].tolist()
185
  print("In allow list:", in_allow_list_flat)
186
  else:
187
  in_allow_list_flat = []
@@ -215,12 +219,18 @@ def choose_and_run_redactor(file_paths:List[str],
215
  progress(0.5, desc="Redacting file")
216
 
217
  if isinstance(file_paths, str):
218
- file_paths_list = [file_paths]
 
 
 
 
219
  file_paths_loop = file_paths_list
220
  else:
221
  file_paths_list = file_paths
222
  file_paths_loop = [file_paths_list[int(latest_file_completed)]]
223
 
 
 
224
 
225
  for file in file_paths_loop:
226
  if isinstance(file, str):
 
180
  return combined_out_message, out_file_paths, out_file_paths, gr.Number(value=latest_file_completed, label="Number of documents redacted", interactive=False, visible=False), log_files_output_paths, log_files_output_paths, estimated_time_taken_state, all_request_metadata_str, pymupdf_doc, annotations_all_pages, gr.Number(value=current_loop_page,precision=0, interactive=False, label = "Last redacted page in document", visible=False), gr.Checkbox(value = False, label="Page break reached", visible=False), all_line_level_ocr_results_df, all_decision_process_table, comprehend_query_number
181
 
182
  # Create allow list
183
+ # If string, assume file path
184
+ if isinstance(in_allow_list, str):
185
+ in_allow_list = pd.read_csv(in_allow_list)
186
+
187
  if not in_allow_list.empty:
188
+ in_allow_list_flat = in_allow_list.iloc[:,0].tolist()
189
  print("In allow list:", in_allow_list_flat)
190
  else:
191
  in_allow_list_flat = []
 
219
  progress(0.5, desc="Redacting file")
220
 
221
  if isinstance(file_paths, str):
222
+ file_paths_list = [os.path.abspath(file_paths)]
223
+ file_paths_loop = file_paths_list
224
+ elif isinstance(file_paths, dict):
225
+ file_paths = file_paths["name"]
226
+ file_paths_list = [os.path.abspath(file_paths)]
227
  file_paths_loop = file_paths_list
228
  else:
229
  file_paths_list = file_paths
230
  file_paths_loop = [file_paths_list[int(latest_file_completed)]]
231
 
232
+ print("file_paths_list in choose_redactor function:", file_paths_list)
233
+
234
 
235
  for file in file_paths_loop:
236
  if isinstance(file, str):
tools/redaction_review.py CHANGED
@@ -72,7 +72,7 @@ def update_annotator(image_annotator_object:AnnotatedImageData, page_num:int, zo
72
 
73
  return out_image_annotator, number_reported, number_reported
74
 
75
- print("page_num at start of update_annotator function:", page_num)
76
 
77
  if page_num is None:
78
  page_num = 0
 
72
 
73
  return out_image_annotator, number_reported, number_reported
74
 
75
+ #print("page_num at start of update_annotator function:", page_num)
76
 
77
  if page_num is None:
78
  page_num = 0