|
import os |
|
import argparse |
|
import shutil |
|
import sys |
|
from dotenv import load_dotenv, find_dotenv |
|
|
|
|
|
from utils.resize_images import main as resize_images_main |
|
from utils.removebg import iterate_over_directory as removebg_iterate |
|
from utils.photoroom import iterate_over_directory as photoroom_iterate |
|
from utils.bria_rmbg20 import iterate_over_directory as bria_iterate |
|
from utils.add_green_background import process_directory as add_green_background_process |
|
from utils.upload_to_dataset import upload_to_dataset |
|
from utils.resize_processed_images import process_images |
|
|
|
def check_env_variables(): |
|
"""Check if the necessary environment variables are loaded.""" |
|
if not find_dotenv(): |
|
sys.exit("Error: .env file not found.") |
|
|
|
load_dotenv() |
|
|
|
required_keys = ['REMOVEBG_API_KEY', 'PHOTOROOM_API_KEY', 'BRIA_API_TOKEN'] |
|
missing_keys = [key for key in required_keys if not os.getenv(key)] |
|
|
|
if missing_keys: |
|
sys.exit(f"Error: Missing environment variables: {', '.join(missing_keys)}") |
|
|
|
def copy_images(source_dir, dest_dir): |
|
os.makedirs(dest_dir, exist_ok=True) |
|
valid_extensions = ('.png', '.jpg', '.jpeg') |
|
|
|
|
|
for root, _, files in os.walk(source_dir): |
|
for filename in files: |
|
if filename.lower().endswith(valid_extensions): |
|
source_file = os.path.join(root, filename) |
|
|
|
|
|
folder_name = os.path.basename(root) |
|
|
|
new_filename = f"{folder_name}_{filename}" |
|
dest_file = os.path.join(dest_dir, new_filename) |
|
|
|
|
|
if os.path.isfile(source_file) and not os.path.exists(dest_file): |
|
shutil.copy2(source_file, dest_file) |
|
print(f"Copied: {new_filename}") |
|
else: |
|
print(f"Skipped: {filename} (already exists or not a file)") |
|
|
|
def main(): |
|
check_env_variables() |
|
|
|
parser = argparse.ArgumentParser(description="Image Processing Pipeline") |
|
parser.add_argument("--input-dir", type=str, default="original-images", help="Input directory for images") |
|
parser.add_argument("--work-dir", type=str, default="workdir", help="Working directory for intermediate images") |
|
parser.add_argument("--output-dir", type=str, default="final-images", help="Output directory for final images") |
|
parser.add_argument("--dataset-name", type=str, help="Name of the dataset to upload to Hugging Face Hub") |
|
parser.add_argument("--push-dataset", action="store_true", help="Push the dataset to the Hugging Face Hub") |
|
|
|
args = parser.parse_args() |
|
|
|
|
|
input_resized_dir = os.path.join(args.work_dir, "resized") |
|
bg_removed_dir = os.path.join(args.work_dir, "background-removed") |
|
green_bg_dir = os.path.join(args.work_dir, "green-background") |
|
|
|
|
|
for directory in [input_resized_dir, bg_removed_dir, green_bg_dir]: |
|
os.makedirs(directory, exist_ok=True) |
|
|
|
|
|
print("Moving images to final output directory...") |
|
original_images_dir = os.path.join(args.work_dir, "merged-categories") |
|
copy_images(args.input_dir, original_images_dir) |
|
|
|
|
|
print("Resizing images...") |
|
resize_images_main(input_directory=original_images_dir, output_directory=input_resized_dir) |
|
|
|
|
|
print("Removing backgrounds...") |
|
bg_removal_dirs = { |
|
"removebg": os.path.join(bg_removed_dir, "removebg"), |
|
"photoroom": os.path.join(bg_removed_dir, "photoroom"), |
|
"bria": os.path.join(bg_removed_dir, "bria") |
|
} |
|
|
|
for dir_path in bg_removal_dirs.values(): |
|
os.makedirs(dir_path, exist_ok=True) |
|
|
|
removebg_iterate(input_resized_dir, bg_removal_dirs["removebg"]) |
|
photoroom_iterate(input_resized_dir, bg_removal_dirs["photoroom"]) |
|
bria_iterate(input_resized_dir, bg_removal_dirs["bria"]) |
|
|
|
print("Adding green background...") |
|
add_green_background_process(bg_removed_dir, green_bg_dir) |
|
|
|
print("Resizing processed images...") |
|
target_width = 800 |
|
subdirectories = ["bria", "photoroom", "clipdrop", "removebg"] |
|
os.makedirs(args.output_dir, exist_ok=True) |
|
for subdir in subdirectories: |
|
input_directory = os.path.join(green_bg_dir, subdir) |
|
output_directory = os.path.join(args.output_dir, subdir) |
|
process_images(input_directory, output_directory, target_width) |
|
|
|
original_output_directory = os.path.join(args.output_dir, "web-original-images") |
|
process_images(original_images_dir, original_output_directory, target_width) |
|
|
|
if args.dataset_name: |
|
upload_to_dataset(original_output_directory, args.output_dir, args.dataset_name, dry_run=not args.push_dataset) |
|
else: |
|
print("Please provide a dataset name using --dataset-name") |
|
|
|
if __name__ == "__main__": |
|
main() |