tdurbor commited on
Commit
3bbedf7
1 Parent(s): c134030

Include Clipdrop processing

Browse files
Files changed (2) hide show
  1. image_processing_pipeline.py +15 -9
  2. utils/clipdrop.py +2 -2
image_processing_pipeline.py CHANGED
@@ -3,15 +3,17 @@ import argparse
3
  import shutil
4
  import sys
5
  from dotenv import load_dotenv, find_dotenv
 
6
 
7
  # Importing modules from the utils package
8
  from utils.resize_images import main as resize_images_main
9
  from utils.removebg import iterate_over_directory as removebg_iterate
10
  from utils.photoroom import iterate_over_directory as photoroom_iterate
11
  from utils.bria_rmbg20 import iterate_over_directory as bria_iterate
 
12
  from utils.add_green_background import process_directory as add_green_background_process
13
  from utils.upload_to_dataset import upload_to_dataset
14
- from utils.resize_processed_images import process_images
15
 
16
  def check_env_variables():
17
  """Check if the necessary environment variables are loaded."""
@@ -20,9 +22,9 @@ def check_env_variables():
20
 
21
  load_dotenv()
22
 
23
- required_keys = ['REMOVEBG_API_KEY', 'PHOTOROOM_API_KEY', 'BRIA_API_TOKEN']
24
  missing_keys = [key for key in required_keys if not os.getenv(key)]
25
-
26
  if missing_keys:
27
  sys.exit(f"Error: Missing environment variables: {', '.join(missing_keys)}")
28
 
@@ -84,15 +86,19 @@ def main():
84
  bg_removal_dirs = {
85
  "removebg": os.path.join(bg_removed_dir, "removebg"),
86
  "photoroom": os.path.join(bg_removed_dir, "photoroom"),
87
- "bria": os.path.join(bg_removed_dir, "bria")
 
88
  }
89
 
90
  for dir_path in bg_removal_dirs.values():
91
  os.makedirs(dir_path, exist_ok=True)
92
 
93
- removebg_iterate(input_resized_dir, bg_removal_dirs["removebg"])
94
- photoroom_iterate(input_resized_dir, bg_removal_dirs["photoroom"])
95
- bria_iterate(input_resized_dir, bg_removal_dirs["bria"])
 
 
 
96
 
97
  print("Adding green background...")
98
  add_green_background_process(bg_removed_dir, green_bg_dir)
@@ -104,10 +110,10 @@ def main():
104
  for subdir in subdirectories:
105
  input_directory = os.path.join(green_bg_dir, subdir)
106
  output_directory = os.path.join(args.output_dir, subdir)
107
- process_images(input_directory, output_directory, target_width)
108
 
109
  original_output_directory = os.path.join(args.output_dir, "web-original-images")
110
- process_images(original_images_dir, original_output_directory, target_width)
111
 
112
  if args.dataset_name:
113
  upload_to_dataset(original_output_directory, args.output_dir, args.dataset_name, dry_run=not args.push_dataset)
 
3
  import shutil
4
  import sys
5
  from dotenv import load_dotenv, find_dotenv
6
+ from concurrent.futures import ThreadPoolExecutor
7
 
8
  # Importing modules from the utils package
9
  from utils.resize_images import main as resize_images_main
10
  from utils.removebg import iterate_over_directory as removebg_iterate
11
  from utils.photoroom import iterate_over_directory as photoroom_iterate
12
  from utils.bria_rmbg20 import iterate_over_directory as bria_iterate
13
+ from utils.clipdrop import iterate_over_directory as clipdrop_iterate
14
  from utils.add_green_background import process_directory as add_green_background_process
15
  from utils.upload_to_dataset import upload_to_dataset
16
+ from utils.resize_processed_images import process_images as downsize_processed_images
17
 
18
  def check_env_variables():
19
  """Check if the necessary environment variables are loaded."""
 
22
 
23
  load_dotenv()
24
 
25
+ required_keys = ['REMOVEBG_API_KEY', 'PHOTOROOM_API_KEY', 'BRIA_API_TOKEN', 'CLIPDROP_API_KEY']
26
  missing_keys = [key for key in required_keys if not os.getenv(key)]
27
+
28
  if missing_keys:
29
  sys.exit(f"Error: Missing environment variables: {', '.join(missing_keys)}")
30
 
 
86
  bg_removal_dirs = {
87
  "removebg": os.path.join(bg_removed_dir, "removebg"),
88
  "photoroom": os.path.join(bg_removed_dir, "photoroom"),
89
+ "bria": os.path.join(bg_removed_dir, "bria"),
90
+ "clipdrop": os.path.join(bg_removed_dir, "clipdrop")
91
  }
92
 
93
  for dir_path in bg_removal_dirs.values():
94
  os.makedirs(dir_path, exist_ok=True)
95
 
96
+ # Use ThreadPoolExecutor to parallelize API calls
97
+ with ThreadPoolExecutor(max_workers=4) as executor:
98
+ executor.submit(removebg_iterate, input_resized_dir, bg_removal_dirs["removebg"])
99
+ executor.submit(photoroom_iterate, input_resized_dir, bg_removal_dirs["photoroom"])
100
+ executor.submit(bria_iterate, input_resized_dir, bg_removal_dirs["bria"])
101
+ executor.submit(clipdrop_iterate, input_resized_dir, bg_removal_dirs["clipdrop"])
102
 
103
  print("Adding green background...")
104
  add_green_background_process(bg_removed_dir, green_bg_dir)
 
110
  for subdir in subdirectories:
111
  input_directory = os.path.join(green_bg_dir, subdir)
112
  output_directory = os.path.join(args.output_dir, subdir)
113
+ downsize_processed_images(input_directory, output_directory, target_width)
114
 
115
  original_output_directory = os.path.join(args.output_dir, "web-original-images")
116
+ downsize_processed_images(original_images_dir, original_output_directory, target_width)
117
 
118
  if args.dataset_name:
119
  upload_to_dataset(original_output_directory, args.output_dir, args.dataset_name, dry_run=not args.push_dataset)
utils/clipdrop.py CHANGED
@@ -37,9 +37,9 @@ def iterate_over_directory(directory_path, result_directory):
37
  for file in files:
38
  if file.lower().endswith(('.png', '.jpg', '.jpeg', '.webp', '.heic')):
39
  file_path = os.path.join(root, file)
40
-
41
  result_file_name = os.path.splitext(os.path.basename(file_path))[0] + '.png'
42
- result_file_directory = os.path.join(result_directory, os.path.basename(root))
43
 
44
  if not os.path.exists(result_file_directory):
45
  os.makedirs(result_file_directory)
 
37
  for file in files:
38
  if file.lower().endswith(('.png', '.jpg', '.jpeg', '.webp', '.heic')):
39
  file_path = os.path.join(root, file)
40
+
41
  result_file_name = os.path.splitext(os.path.basename(file_path))[0] + '.png'
42
+ result_file_directory = os.path.join(result_directory)
43
 
44
  if not os.path.exists(result_file_directory):
45
  os.makedirs(result_file_directory)