seanpedrickcase
commited on
Commit
•
9337aae
1
Parent(s):
35e6d45
Added lambda_entrypoint.py to main folder
Browse files- lambda_entrypoint.py +39 -20
lambda_entrypoint.py
CHANGED
@@ -4,6 +4,7 @@ import subprocess
|
|
4 |
from urllib.parse import unquote_plus
|
5 |
|
6 |
s3_client = boto3.client("s3")
|
|
|
7 |
|
8 |
def download_file_from_s3(bucket_name, key, download_path):
|
9 |
"""Download a file from S3 to the local filesystem."""
|
@@ -16,39 +17,57 @@ def upload_file_to_s3(file_path, bucket_name, key):
|
|
16 |
print(f"Uploaded {file_path} to {key}")
|
17 |
|
18 |
def lambda_handler(event, context):
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
|
|
|
|
|
|
24 |
print(f"Processing file {input_key} from bucket {bucket_name}")
|
25 |
|
26 |
-
#
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
|
32 |
# Download input file
|
|
|
33 |
download_file_from_s3(bucket_name, input_key, input_file_path)
|
34 |
|
35 |
-
#
|
36 |
-
allow_list_key = "path/to/allow_list.csv" # Adjust path as required
|
37 |
-
download_file_from_s3(bucket_name, allow_list_key, allow_list_path)
|
38 |
-
|
39 |
-
# Construct and run the command
|
40 |
command = [
|
41 |
"python",
|
42 |
"app.py",
|
43 |
"--input_file", input_file_path,
|
44 |
-
"--ocr_method",
|
45 |
-
"--pii_detector",
|
46 |
-
"--page_min",
|
47 |
-
"--page_max",
|
48 |
-
"--allow_list", allow_list_path,
|
49 |
"--output_dir", output_dir,
|
50 |
]
|
51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
try:
|
53 |
result = subprocess.run(command, capture_output=True, text=True, check=True)
|
54 |
print("Processing succeeded:", result.stdout)
|
|
|
4 |
from urllib.parse import unquote_plus
|
5 |
|
6 |
s3_client = boto3.client("s3")
|
7 |
+
TMP_DIR = "/tmp" # Use absolute path
|
8 |
|
9 |
def download_file_from_s3(bucket_name, key, download_path):
|
10 |
"""Download a file from S3 to the local filesystem."""
|
|
|
17 |
print(f"Uploaded {file_path} to {key}")
|
18 |
|
19 |
def lambda_handler(event, context):
|
20 |
+
# Create necessary directories
|
21 |
+
os.makedirs(os.path.join(TMP_DIR, "input"), exist_ok=True)
|
22 |
+
os.makedirs(os.path.join(TMP_DIR, "output"), exist_ok=True)
|
23 |
+
|
24 |
+
# Extract S3 bucket and object key from the Records
|
25 |
+
for record in event.get("Records", [{}]):
|
26 |
+
bucket_name = record.get("s3", {}).get("bucket", {}).get("name")
|
27 |
+
input_key = record.get("s3", {}).get("object", {}).get("key")
|
28 |
print(f"Processing file {input_key} from bucket {bucket_name}")
|
29 |
|
30 |
+
# Extract additional arguments
|
31 |
+
arguments = event.get("arguments", {})
|
32 |
+
|
33 |
+
if not input_key:
|
34 |
+
input_key = arguments.get("input_file", "")
|
35 |
+
|
36 |
+
ocr_method = arguments.get("ocr_method", "Complex image analysis - docs with handwriting/signatures (AWS Textract)")
|
37 |
+
pii_detector = arguments.get("pii_detector", "AWS Comprehend")
|
38 |
+
page_min = str(arguments.get("page_min", 0))
|
39 |
+
page_max = str(arguments.get("page_max", 0))
|
40 |
+
allow_list = arguments.get("allow_list", None)
|
41 |
+
output_dir = arguments.get("output_dir", os.path.join(TMP_DIR, "output"))
|
42 |
+
|
43 |
+
print(f"OCR Method: {ocr_method}")
|
44 |
+
print(f"PII Detector: {pii_detector}")
|
45 |
+
print(f"Page Range: {page_min} - {page_max}")
|
46 |
+
print(f"Allow List: {allow_list}")
|
47 |
+
print(f"Output Directory: {output_dir}")
|
48 |
|
49 |
# Download input file
|
50 |
+
input_file_path = os.path.join(TMP_DIR, "input", os.path.basename(input_key))
|
51 |
download_file_from_s3(bucket_name, input_key, input_file_path)
|
52 |
|
53 |
+
# Construct command
|
|
|
|
|
|
|
|
|
54 |
command = [
|
55 |
"python",
|
56 |
"app.py",
|
57 |
"--input_file", input_file_path,
|
58 |
+
"--ocr_method", ocr_method,
|
59 |
+
"--pii_detector", pii_detector,
|
60 |
+
"--page_min", page_min,
|
61 |
+
"--page_max", page_max,
|
|
|
62 |
"--output_dir", output_dir,
|
63 |
]
|
64 |
|
65 |
+
# Add allow_list only if provided
|
66 |
+
if allow_list:
|
67 |
+
allow_list_path = os.path.join(TMP_DIR, "allow_list.csv")
|
68 |
+
download_file_from_s3(bucket_name, allow_list, allow_list_path)
|
69 |
+
command.extend(["--allow_list", allow_list_path])
|
70 |
+
|
71 |
try:
|
72 |
result = subprocess.run(command, capture_output=True, text=True, check=True)
|
73 |
print("Processing succeeded:", result.stdout)
|