Fix original image downsizing / Add image size check before upload
Browse files
image_processing_pipeline.py
CHANGED
@@ -113,7 +113,7 @@ def main():
|
|
113 |
downsize_processed_images(input_directory, output_directory, target_width)
|
114 |
|
115 |
original_output_directory = os.path.join(args.output_dir, "web-original-images")
|
116 |
-
downsize_processed_images(
|
117 |
|
118 |
if args.dataset_name:
|
119 |
upload_to_dataset(original_output_directory, args.output_dir, args.dataset_name, dry_run=not args.push_dataset)
|
|
|
113 |
downsize_processed_images(input_directory, output_directory, target_width)
|
114 |
|
115 |
original_output_directory = os.path.join(args.output_dir, "web-original-images")
|
116 |
+
downsize_processed_images(input_resized_dir, original_output_directory, target_width)
|
117 |
|
118 |
if args.dataset_name:
|
119 |
upload_to_dataset(original_output_directory, args.output_dir, args.dataset_name, dry_run=not args.push_dataset)
|
utils/resize_processed_images.py
CHANGED
@@ -31,14 +31,13 @@ def resize_image(input_path, output_path, target_width):
|
|
31 |
with Image.open(input_path) as img:
|
32 |
# Correct orientation
|
33 |
img = correct_orientation(img)
|
34 |
-
|
35 |
# Calculate the new height to maintain the aspect ratio
|
36 |
width_percent = target_width / img.width
|
37 |
target_height = int(img.height * width_percent)
|
38 |
|
39 |
# Resize the image
|
40 |
img = img.resize((target_width, target_height), Image.LANCZOS)
|
41 |
-
|
42 |
# Save the resized image in the same format as the input
|
43 |
img.save(output_path, format=img.format)
|
44 |
|
|
|
31 |
with Image.open(input_path) as img:
|
32 |
# Correct orientation
|
33 |
img = correct_orientation(img)
|
34 |
+
|
35 |
# Calculate the new height to maintain the aspect ratio
|
36 |
width_percent = target_width / img.width
|
37 |
target_height = int(img.height * width_percent)
|
38 |
|
39 |
# Resize the image
|
40 |
img = img.resize((target_width, target_height), Image.LANCZOS)
|
|
|
41 |
# Save the resized image in the same format as the input
|
42 |
img.save(output_path, format=img.format)
|
43 |
|
utils/upload_to_dataset.py
CHANGED
@@ -4,6 +4,8 @@ import os
|
|
4 |
from collections import defaultdict
|
5 |
import pandas as pd
|
6 |
import argparse
|
|
|
|
|
7 |
|
8 |
def upload_to_dataset(original_images_dir, processed_images_dir, dataset_name, dry_run=False):
|
9 |
# Define the dataset features with dedicated columns for each model
|
@@ -53,8 +55,21 @@ def upload_to_dataset(original_images_dir, processed_images_dir, dataset_name, d
|
|
53 |
"original_filename": []
|
54 |
}
|
55 |
|
|
|
|
|
56 |
for filename, entry in data.items():
|
57 |
if "original_image" in entry:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
dataset_dict["original_image"].append(entry["original_image"])
|
59 |
dataset_dict["clipdrop_image"].append(entry["clipdrop_image"])
|
60 |
dataset_dict["bria_image"].append(entry["bria_image"])
|
@@ -62,6 +77,11 @@ def upload_to_dataset(original_images_dir, processed_images_dir, dataset_name, d
|
|
62 |
dataset_dict["removebg_image"].append(entry["removebg_image"])
|
63 |
dataset_dict["original_filename"].append(filename)
|
64 |
|
|
|
|
|
|
|
|
|
|
|
65 |
# Save the data dictionary to a CSV file for inspection
|
66 |
df = pd.DataFrame.from_dict(dataset_dict)
|
67 |
df.to_csv("image_data.csv", index=False)
|
|
|
4 |
from collections import defaultdict
|
5 |
import pandas as pd
|
6 |
import argparse
|
7 |
+
from PIL import Image as PILImage
|
8 |
+
import sys
|
9 |
|
10 |
def upload_to_dataset(original_images_dir, processed_images_dir, dataset_name, dry_run=False):
|
11 |
# Define the dataset features with dedicated columns for each model
|
|
|
55 |
"original_filename": []
|
56 |
}
|
57 |
|
58 |
+
errors = []
|
59 |
+
|
60 |
for filename, entry in data.items():
|
61 |
if "original_image" in entry:
|
62 |
+
# Check if all images have the same size
|
63 |
+
try:
|
64 |
+
original_size = PILImage.open(entry["original_image"]).size
|
65 |
+
for source in ["clipdrop_image", "bria_image", "photoroom_image", "removebg_image"]:
|
66 |
+
if entry[source] is not None:
|
67 |
+
processed_size = PILImage.open(entry[source]).size
|
68 |
+
if processed_size != original_size:
|
69 |
+
errors.append(f"Size mismatch for {filename}: {source} image size {processed_size} does not match original size {original_size}.")
|
70 |
+
except Exception as e:
|
71 |
+
errors.append(f"Error processing {filename}: {e}")
|
72 |
+
|
73 |
dataset_dict["original_image"].append(entry["original_image"])
|
74 |
dataset_dict["clipdrop_image"].append(entry["clipdrop_image"])
|
75 |
dataset_dict["bria_image"].append(entry["bria_image"])
|
|
|
77 |
dataset_dict["removebg_image"].append(entry["removebg_image"])
|
78 |
dataset_dict["original_filename"].append(filename)
|
79 |
|
80 |
+
if errors:
|
81 |
+
for error in errors:
|
82 |
+
print(error)
|
83 |
+
sys.exit(1)
|
84 |
+
|
85 |
# Save the data dictionary to a CSV file for inspection
|
86 |
df = pd.DataFrame.from_dict(dataset_dict)
|
87 |
df.to_csv("image_data.csv", index=False)
|