Consolidate image preparation pipeline
Browse files- image_processing_pipeline.py +118 -0
- utils/add_green_background.py +27 -17
- utils/bria_rmbg20.py +1 -1
- utils/photoroom.py +2 -2
- utils/remove_backgrounds.py +0 -66
- utils/removebg.py +1 -1
- utils/resize_images.py +21 -3
- utils/resize_processed_images.py +41 -14
- utils/upload-to-dataset.py +0 -84
image_processing_pipeline.py
ADDED
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import argparse
|
3 |
+
import shutil
|
4 |
+
import sys
|
5 |
+
from dotenv import load_dotenv, find_dotenv
|
6 |
+
|
7 |
+
# Importing modules from the utils package
|
8 |
+
from utils.resize_images import main as resize_images_main
|
9 |
+
from utils.removebg import iterate_over_directory as removebg_iterate
|
10 |
+
from utils.photoroom import iterate_over_directory as photoroom_iterate
|
11 |
+
from utils.bria_rmbg20 import iterate_over_directory as bria_iterate
|
12 |
+
from utils.add_green_background import process_directory as add_green_background_process
|
13 |
+
from utils.upload_to_dataset import upload_to_dataset
|
14 |
+
from utils.resize_processed_images import process_images
|
15 |
+
|
16 |
+
def check_env_variables():
|
17 |
+
"""Check if the necessary environment variables are loaded."""
|
18 |
+
if not find_dotenv():
|
19 |
+
sys.exit("Error: .env file not found.")
|
20 |
+
|
21 |
+
load_dotenv()
|
22 |
+
|
23 |
+
required_keys = ['REMOVEBG_API_KEY', 'PHOTOROOM_API_KEY', 'BRIA_API_TOKEN']
|
24 |
+
missing_keys = [key for key in required_keys if not os.getenv(key)]
|
25 |
+
|
26 |
+
if missing_keys:
|
27 |
+
sys.exit(f"Error: Missing environment variables: {', '.join(missing_keys)}")
|
28 |
+
|
29 |
+
def copy_images(source_dir, dest_dir):
|
30 |
+
os.makedirs(dest_dir, exist_ok=True)
|
31 |
+
valid_extensions = ('.png', '.jpg', '.jpeg')
|
32 |
+
|
33 |
+
# Walk through the source directory
|
34 |
+
for root, _, files in os.walk(source_dir):
|
35 |
+
for filename in files:
|
36 |
+
if filename.lower().endswith(valid_extensions):
|
37 |
+
source_file = os.path.join(root, filename)
|
38 |
+
|
39 |
+
# Extract the folder name
|
40 |
+
folder_name = os.path.basename(root)
|
41 |
+
# Append folder name to the filename
|
42 |
+
new_filename = f"{folder_name}_{filename}"
|
43 |
+
dest_file = os.path.join(dest_dir, new_filename)
|
44 |
+
|
45 |
+
# Check if the file is an image and doesn't already exist in the destination
|
46 |
+
if os.path.isfile(source_file) and not os.path.exists(dest_file):
|
47 |
+
shutil.copy2(source_file, dest_file)
|
48 |
+
print(f"Copied: {new_filename}")
|
49 |
+
else:
|
50 |
+
print(f"Skipped: {filename} (already exists or not a file)")
|
51 |
+
|
52 |
+
def main():
|
53 |
+
check_env_variables()
|
54 |
+
|
55 |
+
parser = argparse.ArgumentParser(description="Image Processing Pipeline")
|
56 |
+
parser.add_argument("--input-dir", type=str, default="original-images", help="Input directory for images")
|
57 |
+
parser.add_argument("--work-dir", type=str, default="workdir", help="Working directory for intermediate images")
|
58 |
+
parser.add_argument("--output-dir", type=str, default="final-images", help="Output directory for final images")
|
59 |
+
parser.add_argument("--dataset-name", type=str, help="Name of the dataset to upload to Hugging Face Hub")
|
60 |
+
parser.add_argument("--push-dataset", action="store_true", help="Push the dataset to the Hugging Face Hub")
|
61 |
+
|
62 |
+
args = parser.parse_args()
|
63 |
+
|
64 |
+
# Define intermediate directories within the work directory
|
65 |
+
input_resized_dir = os.path.join(args.work_dir, "resized")
|
66 |
+
bg_removed_dir = os.path.join(args.work_dir, "background-removed")
|
67 |
+
green_bg_dir = os.path.join(args.work_dir, "green-background")
|
68 |
+
|
69 |
+
# Ensure all directories exist
|
70 |
+
for directory in [input_resized_dir, bg_removed_dir, green_bg_dir]:
|
71 |
+
os.makedirs(directory, exist_ok=True)
|
72 |
+
|
73 |
+
# Step 4: Move images to final output directory
|
74 |
+
print("Moving images to final output directory...")
|
75 |
+
original_images_dir = os.path.join(args.work_dir, "merged-categories")
|
76 |
+
copy_images(args.input_dir, original_images_dir)
|
77 |
+
|
78 |
+
# Step 1: Resize images
|
79 |
+
print("Resizing images...")
|
80 |
+
resize_images_main(input_directory=original_images_dir, output_directory=input_resized_dir)
|
81 |
+
|
82 |
+
# Step 2: Remove background
|
83 |
+
print("Removing backgrounds...")
|
84 |
+
bg_removal_dirs = {
|
85 |
+
"removebg": os.path.join(bg_removed_dir, "removebg"),
|
86 |
+
"photoroom": os.path.join(bg_removed_dir, "photoroom"),
|
87 |
+
"bria": os.path.join(bg_removed_dir, "bria")
|
88 |
+
}
|
89 |
+
|
90 |
+
for dir_path in bg_removal_dirs.values():
|
91 |
+
os.makedirs(dir_path, exist_ok=True)
|
92 |
+
|
93 |
+
removebg_iterate(input_resized_dir, bg_removal_dirs["removebg"])
|
94 |
+
photoroom_iterate(input_resized_dir, bg_removal_dirs["photoroom"])
|
95 |
+
bria_iterate(input_resized_dir, bg_removal_dirs["bria"])
|
96 |
+
|
97 |
+
print("Adding green background...")
|
98 |
+
add_green_background_process(bg_removed_dir, green_bg_dir)
|
99 |
+
|
100 |
+
print("Resizing processed images...")
|
101 |
+
target_width = 800
|
102 |
+
subdirectories = ["bria", "photoroom", "clipdrop", "removebg"]
|
103 |
+
os.makedirs(args.output_dir, exist_ok=True)
|
104 |
+
for subdir in subdirectories:
|
105 |
+
input_directory = os.path.join(green_bg_dir, subdir)
|
106 |
+
output_directory = os.path.join(args.output_dir, subdir)
|
107 |
+
process_images(input_directory, output_directory, target_width)
|
108 |
+
|
109 |
+
original_output_directory = os.path.join(args.output_dir, "web-original-images")
|
110 |
+
process_images(original_images_dir, original_output_directory, target_width)
|
111 |
+
|
112 |
+
if args.dataset_name:
|
113 |
+
upload_to_dataset(original_output_directory, args.output_dir, args.dataset_name, dry_run=not args.push_dataset)
|
114 |
+
else:
|
115 |
+
print("Please provide a dataset name using --dataset-name")
|
116 |
+
|
117 |
+
if __name__ == "__main__":
|
118 |
+
main()
|
utils/add_green_background.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
import os
|
2 |
from PIL import Image
|
|
|
3 |
|
4 |
def add_green_background_to_image(image_path, output_path, background_color=(0, 255, 0)):
|
5 |
"""Add a green background to an image and save it as PNG."""
|
@@ -9,28 +10,37 @@ def add_green_background_to_image(image_path, output_path, background_color=(0,
|
|
9 |
combined = Image.alpha_composite(background, img)
|
10 |
combined.save(output_path, "PNG")
|
11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
def process_directory(input_dir, output_dir, background_color=(0, 255, 0)):
|
13 |
"""Recursively process a directory to add a green background to all images and convert them to PNG."""
|
14 |
if not os.path.exists(output_dir):
|
15 |
os.makedirs(output_dir)
|
16 |
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
#
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
|
|
34 |
|
35 |
# Example usage
|
36 |
input_directory = "../../background-removal-arena-v0/train/data/resized"
|
|
|
1 |
import os
|
2 |
from PIL import Image
|
3 |
+
from concurrent.futures import ThreadPoolExecutor
|
4 |
|
5 |
def add_green_background_to_image(image_path, output_path, background_color=(0, 255, 0)):
|
6 |
"""Add a green background to an image and save it as PNG."""
|
|
|
10 |
combined = Image.alpha_composite(background, img)
|
11 |
combined.save(output_path, "PNG")
|
12 |
|
13 |
+
def process_image_file(input_path, output_path, background_color):
|
14 |
+
"""Process a single image file to add a green background."""
|
15 |
+
if not os.path.exists(output_path):
|
16 |
+
add_green_background_to_image(input_path, output_path, background_color)
|
17 |
+
print(f"Processed: {input_path} -> {output_path}")
|
18 |
+
else:
|
19 |
+
print(f"Skipped: {output_path} already exists")
|
20 |
+
|
21 |
def process_directory(input_dir, output_dir, background_color=(0, 255, 0)):
|
22 |
"""Recursively process a directory to add a green background to all images and convert them to PNG."""
|
23 |
if not os.path.exists(output_dir):
|
24 |
os.makedirs(output_dir)
|
25 |
|
26 |
+
tasks = []
|
27 |
+
with ThreadPoolExecutor() as executor:
|
28 |
+
for root, _, files in os.walk(input_dir):
|
29 |
+
for file in files:
|
30 |
+
if file.lower().endswith(('.png', '.jpg', '.jpeg')):
|
31 |
+
input_path = os.path.join(root, file)
|
32 |
+
relative_path = os.path.relpath(input_path, input_dir)
|
33 |
+
output_path = os.path.join(output_dir, os.path.splitext(relative_path)[0] + '.png')
|
34 |
+
|
35 |
+
# Ensure the output directory exists
|
36 |
+
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
37 |
+
|
38 |
+
# Submit the task to the executor
|
39 |
+
tasks.append(executor.submit(process_image_file, input_path, output_path, background_color))
|
40 |
+
|
41 |
+
# Wait for all tasks to complete
|
42 |
+
for task in tasks:
|
43 |
+
task.result()
|
44 |
|
45 |
# Example usage
|
46 |
input_directory = "../../background-removal-arena-v0/train/data/resized"
|
utils/bria_rmbg20.py
CHANGED
@@ -51,7 +51,7 @@ def iterate_over_directory(directory_path, result_directory):
|
|
51 |
file_path = os.path.join(root, file)
|
52 |
|
53 |
result_file_name = os.path.splitext(os.path.basename(file_path))[0] + '.png'
|
54 |
-
result_file_directory = os.path.join(result_directory
|
55 |
|
56 |
if not os.path.exists(result_file_directory):
|
57 |
os.makedirs(result_file_directory)
|
|
|
51 |
file_path = os.path.join(root, file)
|
52 |
|
53 |
result_file_name = os.path.splitext(os.path.basename(file_path))[0] + '.png'
|
54 |
+
result_file_directory = os.path.join(result_directory)
|
55 |
|
56 |
if not os.path.exists(result_file_directory):
|
57 |
os.makedirs(result_file_directory)
|
utils/photoroom.py
CHANGED
@@ -41,8 +41,8 @@ def iterate_over_directory(directory_path, result_directory):
|
|
41 |
file_path = os.path.join(root, file)
|
42 |
|
43 |
result_file_name = os.path.splitext(os.path.basename(file_path))[0] + '.png'
|
44 |
-
result_file_directory = os.path.join(result_directory
|
45 |
-
|
46 |
if not os.path.exists(result_file_directory):
|
47 |
os.makedirs(result_file_directory)
|
48 |
|
|
|
41 |
file_path = os.path.join(root, file)
|
42 |
|
43 |
result_file_name = os.path.splitext(os.path.basename(file_path))[0] + '.png'
|
44 |
+
result_file_directory = os.path.join(result_directory)
|
45 |
+
|
46 |
if not os.path.exists(result_file_directory):
|
47 |
os.makedirs(result_file_directory)
|
48 |
|
utils/remove_backgrounds.py
DELETED
@@ -1,66 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
from photoroom import process_image as photoroom_process
|
3 |
-
from removebg import process_image as removebg_process
|
4 |
-
#from clipdrop import process_image as clipdrop_process
|
5 |
-
from bria_rmbg20 import process_image as bria_process
|
6 |
-
|
7 |
-
def create_directory(path):
|
8 |
-
if not os.path.exists(path):
|
9 |
-
os.makedirs(path)
|
10 |
-
|
11 |
-
def process_images(input_directory, output_directory, process_function, limit=None):
|
12 |
-
count = 0
|
13 |
-
for root, _, files in os.walk(input_directory):
|
14 |
-
for file in files:
|
15 |
-
if file.lower().endswith(('.png', '.jpg', '.jpeg', '.webp', '.heic')):
|
16 |
-
file_path = os.path.join(root, file)
|
17 |
-
result_file_name = os.path.splitext(os.path.basename(file_path))[0] + '.png'
|
18 |
-
result_file_directory = os.path.join(output_directory)
|
19 |
-
|
20 |
-
if not os.path.exists(result_file_directory):
|
21 |
-
os.makedirs(result_file_directory)
|
22 |
-
|
23 |
-
result_path = os.path.join(result_file_directory, result_file_name)
|
24 |
-
|
25 |
-
if not os.path.exists(result_path): # Check if the image has already been processed
|
26 |
-
print(file_path, result_path)
|
27 |
-
process_function(file_path, result_path)
|
28 |
-
count += 1
|
29 |
-
if limit and count >= limit:
|
30 |
-
return
|
31 |
-
|
32 |
-
def main(dry_run=False):
|
33 |
-
input_directory = "../data/resized-original-images"
|
34 |
-
output_base_directory = "../data/processed"
|
35 |
-
|
36 |
-
# Define output directories for each API
|
37 |
-
output_directories = {
|
38 |
-
"photoroom": os.path.join(output_base_directory, "photoroom"),
|
39 |
-
"removebg": os.path.join(output_base_directory, "removebg"),
|
40 |
-
#"clipdrop": os.path.join(output_base_directory, "clipdrop"),
|
41 |
-
"bria": os.path.join(output_base_directory, "bria")
|
42 |
-
}
|
43 |
-
|
44 |
-
# Create output directories if they don't exist
|
45 |
-
for directory in output_directories.values():
|
46 |
-
create_directory(directory)
|
47 |
-
|
48 |
-
if dry_run:
|
49 |
-
print("Starting dry run...")
|
50 |
-
k = 5
|
51 |
-
process_images(input_directory, output_directories["photoroom"], photoroom_process, limit=k)
|
52 |
-
process_images(input_directory, output_directories["removebg"], removebg_process, limit=k)
|
53 |
-
#process_images(input_directory, output_directories["clipdrop"], clipdrop_process, limit=k)
|
54 |
-
process_images(input_directory, output_directories["bria"], bria_process, limit=k)
|
55 |
-
print("Dry run completed.")
|
56 |
-
else:
|
57 |
-
print("Starting full processing...")
|
58 |
-
process_images(input_directory, output_directories["photoroom"], photoroom_process)
|
59 |
-
process_images(input_directory, output_directories["removebg"], removebg_process)
|
60 |
-
#process_images(input_directory, output_directories["clipdrop"], clipdrop_process)
|
61 |
-
process_images(input_directory, output_directories["bria"], bria_process)
|
62 |
-
print("Full processing completed.")
|
63 |
-
|
64 |
-
if __name__ == "__main__":
|
65 |
-
# Set dry_run to True for a dry run, or False for full processing
|
66 |
-
main(dry_run=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
utils/removebg.py
CHANGED
@@ -41,7 +41,7 @@ def iterate_over_directory(directory_path, result_directory):
|
|
41 |
file_path = os.path.join(root, file)
|
42 |
|
43 |
result_file_name = os.path.splitext(os.path.basename(file_path))[0] + '.png'
|
44 |
-
result_file_directory = os.path.join(result_directory
|
45 |
|
46 |
if not os.path.exists(result_file_directory):
|
47 |
os.makedirs(result_file_directory)
|
|
|
41 |
file_path = os.path.join(root, file)
|
42 |
|
43 |
result_file_name = os.path.splitext(os.path.basename(file_path))[0] + '.png'
|
44 |
+
result_file_directory = os.path.join(result_directory)
|
45 |
|
46 |
if not os.path.exists(result_file_directory):
|
47 |
os.makedirs(result_file_directory)
|
utils/resize_images.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
import os
|
2 |
-
from PIL import Image
|
3 |
import concurrent.futures
|
4 |
|
5 |
# Define the directories
|
@@ -11,6 +11,24 @@ os.makedirs(output_directory, exist_ok=True)
|
|
11 |
|
12 |
def resize_image(input_path, output_path):
|
13 |
with Image.open(input_path) as img:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
# Calculate the current megapixels
|
15 |
current_megapixels = (img.width * img.height) / 1_000_000
|
16 |
max_megapixels = 10
|
@@ -27,7 +45,7 @@ def resize_image(input_path, output_path):
|
|
27 |
# If the image is smaller than 10 megapixels, save it as is
|
28 |
img.save(output_path)
|
29 |
|
30 |
-
def main():
|
31 |
# Iterate over the input directory
|
32 |
with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
|
33 |
for filename in os.listdir(input_directory):
|
@@ -44,4 +62,4 @@ def main():
|
|
44 |
print("All images have been resized and saved to the output directory.")
|
45 |
|
46 |
if __name__ == "__main__":
|
47 |
-
main()
|
|
|
1 |
import os
|
2 |
+
from PIL import Image, ExifTags
|
3 |
import concurrent.futures
|
4 |
|
5 |
# Define the directories
|
|
|
11 |
|
12 |
def resize_image(input_path, output_path):
|
13 |
with Image.open(input_path) as img:
|
14 |
+
# Correct image orientation using EXIF data
|
15 |
+
try:
|
16 |
+
for orientation in ExifTags.TAGS.keys():
|
17 |
+
if ExifTags.TAGS[orientation] == 'Orientation':
|
18 |
+
break
|
19 |
+
exif = img._getexif()
|
20 |
+
if exif is not None:
|
21 |
+
orientation = exif.get(orientation, None)
|
22 |
+
if orientation == 3:
|
23 |
+
img = img.rotate(180, expand=True)
|
24 |
+
elif orientation == 6:
|
25 |
+
img = img.rotate(270, expand=True)
|
26 |
+
elif orientation == 8:
|
27 |
+
img = img.rotate(90, expand=True)
|
28 |
+
except (AttributeError, KeyError, IndexError):
|
29 |
+
# Cases: image don't have getexif
|
30 |
+
pass
|
31 |
+
|
32 |
# Calculate the current megapixels
|
33 |
current_megapixels = (img.width * img.height) / 1_000_000
|
34 |
max_megapixels = 10
|
|
|
45 |
# If the image is smaller than 10 megapixels, save it as is
|
46 |
img.save(output_path)
|
47 |
|
48 |
+
def main(input_directory, output_directory):
|
49 |
# Iterate over the input directory
|
50 |
with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
|
51 |
for filename in os.listdir(input_directory):
|
|
|
62 |
print("All images have been resized and saved to the output directory.")
|
63 |
|
64 |
if __name__ == "__main__":
|
65 |
+
main(input_directory, output_directory)
|
utils/resize_processed_images.py
CHANGED
@@ -1,13 +1,37 @@
|
|
1 |
-
from PIL import Image
|
2 |
import os
|
|
|
3 |
|
4 |
def create_directory(path):
|
5 |
"""Create a directory if it doesn't exist."""
|
6 |
os.makedirs(path, exist_ok=True)
|
7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
def resize_image(input_path, output_path, target_width):
|
9 |
"""Resize an image to the target width while maintaining aspect ratio."""
|
10 |
with Image.open(input_path) as img:
|
|
|
|
|
|
|
11 |
# Calculate the new height to maintain the aspect ratio
|
12 |
width_percent = target_width / img.width
|
13 |
target_height = int(img.height * width_percent)
|
@@ -18,23 +42,26 @@ def resize_image(input_path, output_path, target_width):
|
|
18 |
# Save the resized image in the same format as the input
|
19 |
img.save(output_path, format=img.format)
|
20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
def process_images(input_directory, output_directory, target_width):
|
22 |
"""Process and resize images from the input directory to the output directory."""
|
23 |
create_directory(output_directory)
|
24 |
|
25 |
-
|
26 |
-
for
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
if not os.path.exists(result_path):
|
34 |
-
print(f"Resizing {file_path} to {result_path}")
|
35 |
-
resize_image(file_path, result_path, target_width)
|
36 |
-
else:
|
37 |
-
print(f"Skipped {file_path}, already resized.")
|
38 |
|
39 |
def main():
|
40 |
"""Main function to resize images in specified subdirectories."""
|
|
|
1 |
+
from PIL import Image, ExifTags
|
2 |
import os
|
3 |
+
from concurrent.futures import ThreadPoolExecutor
|
4 |
|
5 |
def create_directory(path):
|
6 |
"""Create a directory if it doesn't exist."""
|
7 |
os.makedirs(path, exist_ok=True)
|
8 |
|
9 |
+
def correct_orientation(img):
|
10 |
+
"""Correct image orientation using EXIF data."""
|
11 |
+
try:
|
12 |
+
for orientation in ExifTags.TAGS.keys():
|
13 |
+
if ExifTags.TAGS[orientation] == 'Orientation':
|
14 |
+
break
|
15 |
+
exif = img._getexif()
|
16 |
+
if exif is not None:
|
17 |
+
orientation = exif.get(orientation, None)
|
18 |
+
if orientation == 3:
|
19 |
+
img = img.rotate(180, expand=True)
|
20 |
+
elif orientation == 6:
|
21 |
+
img = img.rotate(270, expand=True)
|
22 |
+
elif orientation == 8:
|
23 |
+
img = img.rotate(90, expand=True)
|
24 |
+
except (AttributeError, KeyError, IndexError):
|
25 |
+
# Cases: image doesn't have getexif
|
26 |
+
pass
|
27 |
+
return img
|
28 |
+
|
29 |
def resize_image(input_path, output_path, target_width):
|
30 |
"""Resize an image to the target width while maintaining aspect ratio."""
|
31 |
with Image.open(input_path) as img:
|
32 |
+
# Correct orientation
|
33 |
+
img = correct_orientation(img)
|
34 |
+
|
35 |
# Calculate the new height to maintain the aspect ratio
|
36 |
width_percent = target_width / img.width
|
37 |
target_height = int(img.height * width_percent)
|
|
|
42 |
# Save the resized image in the same format as the input
|
43 |
img.save(output_path, format=img.format)
|
44 |
|
45 |
+
def process_image_file(file_path, result_path, target_width):
|
46 |
+
"""Process a single image file."""
|
47 |
+
if not os.path.exists(result_path):
|
48 |
+
print(f"Resizing {file_path} to {result_path}")
|
49 |
+
resize_image(file_path, result_path, target_width)
|
50 |
+
else:
|
51 |
+
print(f"Skipped {file_path}, already resized.")
|
52 |
+
|
53 |
def process_images(input_directory, output_directory, target_width):
|
54 |
"""Process and resize images from the input directory to the output directory."""
|
55 |
create_directory(output_directory)
|
56 |
|
57 |
+
with ThreadPoolExecutor() as executor:
|
58 |
+
for root, _, files in os.walk(input_directory):
|
59 |
+
for file in files:
|
60 |
+
if file.lower().endswith(('.png', '.jpg', '.jpeg')):
|
61 |
+
file_path = os.path.join(root, file)
|
62 |
+
result_file_name = os.path.splitext(file)[0] + os.path.splitext(file)[1]
|
63 |
+
result_path = os.path.join(output_directory, result_file_name)
|
64 |
+
executor.submit(process_image_file, file_path, result_path, target_width)
|
|
|
|
|
|
|
|
|
|
|
65 |
|
66 |
def main():
|
67 |
"""Main function to resize images in specified subdirectories."""
|
utils/upload-to-dataset.py
DELETED
@@ -1,84 +0,0 @@
|
|
1 |
-
from datasets import Dataset, Features, Value, Image
|
2 |
-
from huggingface_hub import HfApi
|
3 |
-
import os
|
4 |
-
from collections import defaultdict
|
5 |
-
import pandas as pd
|
6 |
-
import argparse
|
7 |
-
|
8 |
-
def upload_to_dataset(image_dir, dataset_name):
|
9 |
-
# Define the dataset features with dedicated columns for each model
|
10 |
-
features = Features({
|
11 |
-
"original_image": Image(), # Original image feature
|
12 |
-
"clipdrop_image": Image(), # Clipdrop segmented image
|
13 |
-
"bria_image": Image(), # Bria segmented image
|
14 |
-
"photoroom_image": Image(), # Photoroom segmented image
|
15 |
-
"removebg_image": Image(), # RemoveBG segmented image
|
16 |
-
"original_filename": Value("string") # Original filename
|
17 |
-
})
|
18 |
-
|
19 |
-
# Load image paths and metadata
|
20 |
-
data = defaultdict(lambda: {
|
21 |
-
"clipdrop_image": None,
|
22 |
-
"bria_image": None,
|
23 |
-
"photoroom_image": None,
|
24 |
-
"removebg_image": None
|
25 |
-
})
|
26 |
-
|
27 |
-
# Walk into the web-original-images folder
|
28 |
-
web_original_images_dir = os.path.join(image_dir, "web-original-images")
|
29 |
-
for root, _, files in os.walk(web_original_images_dir):
|
30 |
-
for f in files:
|
31 |
-
if f.endswith(('.png', '.jpg', '.jpeg')):
|
32 |
-
original_image_path = os.path.join(root, f)
|
33 |
-
data[f]["original_image"] = original_image_path
|
34 |
-
data[f]["original_filename"] = f
|
35 |
-
|
36 |
-
# Check for corresponding images in other directories
|
37 |
-
for source in ["clipdrop", "bria", "photoroom", "removebg"]:
|
38 |
-
# Check for processed images ending in .png or .jpg
|
39 |
-
for ext in ['.png', '.jpg']:
|
40 |
-
processed_image_filename = os.path.splitext(f)[0] + ext
|
41 |
-
source_image_path = os.path.join(image_dir, source, processed_image_filename)
|
42 |
-
|
43 |
-
if os.path.exists(source_image_path):
|
44 |
-
data[f][f"{source}_image"] = source_image_path
|
45 |
-
break # Stop checking other extensions if a file is found
|
46 |
-
|
47 |
-
# Convert the data to a dictionary of lists
|
48 |
-
dataset_dict = {
|
49 |
-
"original_image": [],
|
50 |
-
"clipdrop_image": [],
|
51 |
-
"bria_image": [],
|
52 |
-
"photoroom_image": [],
|
53 |
-
"removebg_image": [],
|
54 |
-
"original_filename": []
|
55 |
-
}
|
56 |
-
|
57 |
-
for filename, entry in data.items():
|
58 |
-
if "original_image" in entry:
|
59 |
-
dataset_dict["original_image"].append(entry["original_image"])
|
60 |
-
dataset_dict["clipdrop_image"].append(entry["clipdrop_image"])
|
61 |
-
dataset_dict["bria_image"].append(entry["bria_image"])
|
62 |
-
dataset_dict["photoroom_image"].append(entry["photoroom_image"])
|
63 |
-
dataset_dict["removebg_image"].append(entry["removebg_image"])
|
64 |
-
dataset_dict["original_filename"].append(filename)
|
65 |
-
|
66 |
-
# Save the data dictionary to a CSV file for inspection
|
67 |
-
df = pd.DataFrame.from_dict(dataset_dict)
|
68 |
-
df.to_csv("image_data.csv", index=False)
|
69 |
-
|
70 |
-
# Create a Dataset
|
71 |
-
dataset = Dataset.from_dict(dataset_dict, features=features)
|
72 |
-
|
73 |
-
# Push the dataset to Hugging Face Hub
|
74 |
-
api = HfApi()
|
75 |
-
dataset.push_to_hub(dataset_name, token=api.token)
|
76 |
-
|
77 |
-
if __name__ == "__main__":
|
78 |
-
parser = argparse.ArgumentParser(description="Upload images to a Hugging Face dataset.")
|
79 |
-
parser.add_argument("image_dir", type=str, help="Directory containing the images.")
|
80 |
-
parser.add_argument("dataset_name", type=str, help="Name of the dataset to upload to Hugging Face Hub.")
|
81 |
-
|
82 |
-
args = parser.parse_args()
|
83 |
-
|
84 |
-
upload_to_dataset(args.image_dir, args.dataset_name)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|