tdurbor commited on
Commit
4dccf1d
1 Parent(s): c01e765

inital commit

Browse files
app.py ADDED
@@ -0,0 +1,264 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+ from typing import Tuple
4
+ from dotenv import load_dotenv
5
+ import gradio as gr
6
+ import numpy as np
7
+ from PIL import Image
8
+ import random
9
+ from db import compute_elo_scores, get_all_votes
10
+
11
+ # Configure logging
12
+ logging.basicConfig(level=logging.INFO)
13
+
14
+ # Load environment variables from .env file
15
+ load_dotenv()
16
+
17
+ # Access the API key
18
+ PHOTOROOM_API_KEY = os.getenv('PHOTOROOM_API_KEY')
19
+ CLIPDROP_API_KEY = os.getenv('CLIPDROP_API_KEY')
20
+
21
+
22
+ def fetch_elo_scores():
23
+ """Fetch and log Elo scores."""
24
+ try:
25
+ elo_scores = compute_elo_scores()
26
+ logging.info("Elo scores successfully computed.")
27
+ return elo_scores
28
+ except Exception as e:
29
+ logging.error("Error computing Elo scores: %s", str(e))
30
+ return None
31
+
32
+ def update_rankings_table():
33
+ """Update and return the rankings table based on Elo scores."""
34
+ elo_scores = fetch_elo_scores()
35
+ if elo_scores:
36
+ rankings = [
37
+ ["Photoroom", int(elo_scores.get("Photoroom", 1000))],
38
+ #["Clipdrop", int(elo_scores.get("Clipdrop", 1000))],
39
+ ["RemoveBG", int(elo_scores.get("RemoveBG", 1000))],
40
+ ["BRIA RMBG 2.0", int(elo_scores.get("BRIA RMBG 2.0", 1000))],
41
+ ]
42
+ rankings.sort(key=lambda x: x[1], reverse=True)
43
+ return rankings
44
+ else:
45
+ return [
46
+ ["Photoroom", -1],
47
+ #["Clipdrop", -1],
48
+ ["RemoveBG", -1],
49
+ ["BRIA RMBG 2.0", -1],
50
+ ]
51
+
52
+ def select_new_image():
53
+ """Select a new image and its segmented versions."""
54
+ image_paths = load_images_from_directory("data/resized-original-images")
55
+ last_image_path = None
56
+ max_attempts = 10
57
+
58
+ for _ in range(max_attempts):
59
+ available_images = [path for path in image_paths if path != last_image_path]
60
+
61
+ if not available_images:
62
+ logging.error("No available images to select from.")
63
+ return None
64
+
65
+ random_image_path = random.choice(available_images)
66
+ input_image = Image.open(random_image_path)
67
+
68
+ image_filename = os.path.splitext(os.path.basename(random_image_path))[0] + ".png"
69
+ segmented_image_paths = {
70
+ "Photoroom": os.path.join("data/processed/photoroom", image_filename),
71
+ #"Clipdrop": os.path.join("data/processed/clipdrop", image_filename),
72
+ "RemoveBG": os.path.join("data/processed/removebg", image_filename),
73
+ "BRIA RMBG 2.0": os.path.join("data/processed/bria", image_filename)
74
+ }
75
+
76
+ try:
77
+ selected_models = random.sample(list(segmented_image_paths.keys()), 2)
78
+ model_a_name, model_b_name = selected_models
79
+ model_a_output_path = segmented_image_paths[model_a_name]
80
+ model_b_output_path = segmented_image_paths[model_b_name]
81
+ model_a_output_image = Image.open(model_a_output_path)
82
+ model_b_output_image = Image.open(model_b_output_path)
83
+ return (random_image_path, input_image, model_a_output_path, model_a_output_image,
84
+ model_b_output_path, model_b_output_image, model_a_name, model_b_name)
85
+ except FileNotFoundError as e:
86
+ logging.error("File not found: %s. Resampling another image.", e)
87
+ last_image_path = random_image_path
88
+
89
+ logging.error("Failed to select a new image after %d attempts.", max_attempts)
90
+ return None
91
+
92
+ def get_notice_markdown():
93
+ """Generate the notice markdown with dynamic vote count."""
94
+ total_votes = len(get_all_votes())
95
+ return f"""
96
+ # ⚔️ Background Removal Arena: Compare & Test the Best Background Removal Models
97
+
98
+ ## 📜 How It Works
99
+ - **Blind Test**: You will see two images with their background removed from two anonymous background removal models (Clipdrop, RemoveBG, Photoroom, BRIA RMBG 2.0).
100
+ - **Vote for the Best**: Choose the best result, if none stand out choose "Tie".
101
+
102
+ ## 📊 Stats
103
+ - **Total #votes**: {total_votes}
104
+
105
+ ## 👇 Test now!
106
+ """
107
+ def compute_mask_difference(segmented_a, segmented_b):
108
+ """Compute the absolute difference between two image masks."""
109
+ mask_a = np.asarray(segmented_a)
110
+ mask_b = np.asarray(segmented_b)
111
+
112
+ # Set transparent pixels to zero and compute the sum in one step
113
+ mask_a_1d = np.where(mask_a[..., 3] == 0, 0, np.sum(mask_a[..., :3], axis=-1))
114
+ mask_b_1d = np.where(mask_b[..., 3] == 0, 0, np.sum(mask_b[..., :3], axis=-1))
115
+
116
+ # Compute the absolute difference between the masks
117
+ return np.abs(mask_a_1d - mask_b_1d)
118
+
119
+ def gradio_interface():
120
+ """Create and return the Gradio interface."""
121
+ with gr.Blocks() as demo:
122
+ gr.Markdown("# Background Removal Arena")
123
+
124
+ with gr.Tabs() as tabs:
125
+ with gr.Tab("⚔️ Arena (battle)", id=0):
126
+ notice_markdown = gr.Markdown(get_notice_markdown(), elem_id="notice_markdown")
127
+
128
+ (fpath_input, input_image, fpath_a, segmented_a, fpath_b, segmented_b,
129
+ a_name, b_name) = select_new_image()
130
+ model_a_name = gr.State(a_name)
131
+ model_b_name = gr.State(b_name)
132
+ fpath_input = gr.State(fpath_input)
133
+ fpath_a = gr.State(fpath_a)
134
+ fpath_b = gr.State(fpath_b)
135
+
136
+ # Compute the absolute difference between the masks
137
+ mask_difference = compute_mask_difference(segmented_a, segmented_b)
138
+
139
+ with gr.Row():
140
+ image_a_display = gr.Image(
141
+ value=segmented_a,
142
+ type="pil",
143
+ label="Model A",
144
+ width=500,
145
+ height=500
146
+ )
147
+ input_image_display = gr.AnnotatedImage(
148
+ value=(input_image, [(mask_difference > 0, "Difference Mask")]),
149
+ label="Input Image",
150
+ width=500,
151
+ height=500
152
+ )
153
+ image_b_display = gr.Image(
154
+ value=segmented_b,
155
+ type="pil",
156
+ label="Model B",
157
+ width=500,
158
+ height=500
159
+ )
160
+ tie = gr.State("Tie")
161
+ with gr.Row():
162
+ vote_a_btn = gr.Button("👈 A is better")
163
+ vote_tie = gr.Button("🤝 Tie")
164
+ vote_b_btn = gr.Button("👉 B is better")
165
+
166
+
167
+ vote_a_btn.click(
168
+ fn=lambda: vote_for_model("model_a", (fpath_input, fpath_a, fpath_b), model_a_name, model_b_name),
169
+ outputs=[
170
+ fpath_input, input_image_display, fpath_a, image_a_display, fpath_b, image_b_display, model_a_name, model_b_name, notice_markdown
171
+ ]
172
+ )
173
+ vote_b_btn.click(
174
+ fn=lambda: vote_for_model("model_b", (fpath_input, fpath_a, fpath_b), model_a_name, model_b_name),
175
+ outputs=[
176
+ fpath_input, input_image_display, fpath_a, image_a_display, fpath_b, image_b_display, model_a_name, model_b_name, notice_markdown
177
+ ]
178
+ )
179
+ vote_tie.click(
180
+ fn=lambda: vote_for_model("tie", (fpath_input, fpath_a, fpath_b), model_a_name, model_b_name),
181
+ outputs=[
182
+ fpath_input, input_image_display, fpath_a, image_a_display, fpath_b, image_b_display, model_a_name, model_b_name, notice_markdown
183
+ ]
184
+ )
185
+
186
+ def vote_for_model(choice, fpaths, model_a_name, model_b_name):
187
+ """Submit a vote for a model and return updated images and model names."""
188
+ logging.info("Voting for model: %s", choice)
189
+
190
+ vote_data = {
191
+ "image_id": fpaths[0].value,
192
+ "model_a": model_a_name.value,
193
+ "model_b": model_b_name.value,
194
+ "winner": choice,
195
+ "fpath_a": fpaths[1].value,
196
+ "fpath_b": fpaths[2].value,
197
+ }
198
+
199
+ try:
200
+ logging.debug("Adding vote data to the database: %s", vote_data)
201
+ from db import add_vote
202
+ result = add_vote(vote_data)
203
+ logging.info("Vote successfully recorded in the database with ID: %s", result["id"])
204
+ except Exception as e:
205
+ logging.error("Error recording vote: %s", str(e))
206
+
207
+ (new_fpath_input, new_input_image, new_fpath_a, new_segmented_a,
208
+ new_fpath_b, new_segmented_b, new_a_name, new_b_name) = select_new_image()
209
+ model_a_name.value = new_a_name
210
+ model_b_name.value = new_b_name
211
+ fpath_input.value = new_fpath_input
212
+ fpath_a.value = new_fpath_a
213
+ fpath_b.value = new_fpath_b
214
+
215
+ mask_difference = compute_mask_difference(new_segmented_a, new_segmented_b)
216
+
217
+ # Update the notice markdown with the new vote count
218
+ new_notice_markdown = get_notice_markdown()
219
+
220
+ return (fpath_input.value, (new_input_image, [(mask_difference, "Mask")]), fpath_a.value, new_segmented_a,
221
+ fpath_b.value, new_segmented_b, model_a_name.value, model_b_name.value, new_notice_markdown)
222
+
223
+ with gr.Tab("🏆 Leaderboard", id=1) as leaderboard_tab:
224
+ rankings_table = gr.Dataframe(
225
+ headers=["Model", "Ranking"],
226
+ value=update_rankings_table(),
227
+ label="Current Model Rankings",
228
+ column_widths=[180, 60],
229
+ row_count=4
230
+ )
231
+
232
+ leaderboard_tab.select(
233
+ fn=lambda: update_rankings_table(),
234
+ outputs=rankings_table
235
+ )
236
+
237
+ with gr.Tab("📊 Vote Data", id=2) as vote_data_tab:
238
+ def update_vote_data():
239
+ votes = get_all_votes()
240
+ return [[vote.id, vote.image_id, vote.model_a, vote.model_b, vote.winner, vote.timestamp] for vote in votes]
241
+
242
+ vote_table = gr.Dataframe(
243
+ headers=["ID", "Image ID", "Model A", "Model B", "Winner", "Timestamp"],
244
+ value=update_vote_data(),
245
+ label="Vote Data",
246
+ column_widths=[20, 150, 100, 100, 100, 150],
247
+ row_count=0
248
+ )
249
+
250
+ vote_data_tab.select(
251
+ fn=lambda: update_vote_data(),
252
+ outputs=vote_table
253
+ )
254
+
255
+ return demo
256
+
257
+ def load_images_from_directory(directory):
258
+ """Load and return image paths from a directory."""
259
+ image_files = [f for f in os.listdir(directory) if f.endswith(('.png', '.jpg', '.jpeg'))]
260
+ return [os.path.join(directory, f) for f in image_files]
261
+
262
+ if __name__ == "__main__":
263
+ demo = gradio_interface()
264
+ demo.launch()
db.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sqlalchemy import create_engine, Column, Integer, String, DateTime
2
+ from sqlalchemy.ext.declarative import declarative_base
3
+ from sqlalchemy.orm import sessionmaker, Session
4
+ from datetime import datetime
5
+ import pandas as pd
6
+ import uuid
7
+ from rating_systems import compute_elo
8
+
9
+
10
+ DATABASE_URL = "sqlite:///./data/votes.db" # Example with SQLite, replace with PostgreSQL for production
11
+ engine = create_engine(DATABASE_URL, connect_args={"check_same_thread": False})
12
+ SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
13
+ Base = declarative_base()
14
+
15
+ # Database model
16
+ class Vote(Base):
17
+ __tablename__ = "votes"
18
+ id = Column(Integer, primary_key=True, index=True)
19
+ image_id = Column(String, index=True)
20
+ model_a = Column(String)
21
+ model_b = Column(String)
22
+ winner = Column(String)
23
+ user_id = Column(String, index=True)
24
+ fpath_a = Column(String)
25
+ fpath_b = Column(String)
26
+ timestamp = Column(DateTime, default=datetime.utcnow)
27
+
28
+ Base.metadata.create_all(bind=engine)
29
+
30
+ # Dependency for database session
31
+ def get_db():
32
+ db = SessionLocal()
33
+ try:
34
+ yield db
35
+ finally:
36
+ db.close()
37
+
38
+ def add_vote(vote_data):
39
+ with SessionLocal() as db:
40
+ db_vote = Vote(**vote_data)
41
+ db.add(db_vote)
42
+ db.commit()
43
+ db.refresh(db_vote)
44
+ return {"id": db_vote.id, "user_id": db_vote.user_id, "timestamp": db_vote.timestamp}
45
+
46
+ # Function to get all votes
47
+ def get_all_votes():
48
+ with SessionLocal() as db:
49
+ votes = db.query(Vote).all()
50
+ return votes
51
+
52
+ # Function to compute Elo scores
53
+ def compute_elo_scores():
54
+ with SessionLocal() as db:
55
+ votes = db.query(Vote).all()
56
+ data = {
57
+ "model_a": [vote.model_a for vote in votes],
58
+ "model_b": [vote.model_b for vote in votes],
59
+ "winner": [vote.winner for vote in votes]
60
+ }
61
+ df = pd.DataFrame(data)
62
+ elo_scores = compute_elo(df)
63
+ return elo_scores
launch.sh ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ pip3 install -r requirements.txt
2
+ python3 app.py
rating_systems.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This code is copied from the following source:
2
+ # https://github.com/lm-sys/FastChat/blob/main/fastchat/serve/monitor/rating_systems.py
3
+
4
+ import math
5
+ import pandas as pd
6
+ import numpy as np
7
+ from sqlalchemy.orm import Session
8
+ import pandas as pd
9
+
10
+ def get_matchups_models(df):
11
+ n_rows = len(df)
12
+ model_indices, models = pd.factorize(pd.concat([df["model_a"], df["model_b"]]))
13
+ matchups = np.column_stack([model_indices[:n_rows], model_indices[n_rows:]])
14
+ return matchups, models.to_list()
15
+
16
+
17
+ def preprocess_for_elo(df):
18
+ """
19
+ in Elo we want numpy arrays for matchups and outcomes
20
+ matchups: int32 (N,2) contains model ids for the competitors in a match
21
+ outcomes: float64 (N,) contains 1.0, 0.5, or 0.0 representing win, tie, or loss for model_a
22
+ """
23
+ matchups, models = get_matchups_models(df)
24
+ outcomes = np.full(len(df), 0.5)
25
+ outcomes[df["winner"] == "model_a"] = 1.0
26
+ outcomes[df["winner"] == "model_b"] = 0.0
27
+ return matchups, outcomes, models
28
+
29
+
30
+ def compute_elo(df, k=4.0, base=10.0, init_rating=1000.0, scale=400.0):
31
+ matchups, outcomes, models = preprocess_for_elo(df)
32
+ alpha = math.log(base) / scale
33
+ ratings = np.full(shape=(len(models),), fill_value=init_rating)
34
+ for (model_a_idx, model_b_idx), outcome in zip(matchups, outcomes):
35
+ prob = 1.0 / (
36
+ 1.0 + math.exp(alpha * (ratings[model_b_idx] - ratings[model_a_idx]))
37
+ )
38
+ update = k * (outcome - prob)
39
+ ratings[model_a_idx] += update
40
+ ratings[model_b_idx] -= update
41
+ return {model: ratings[idx] for idx, model in enumerate(models)}
42
+
43
+
44
+ def compute_elo_from_votes(db: Session):
45
+ # Retrieve all votes from the database
46
+ votes = db.query(Vote).all()
47
+
48
+ # Convert votes to a DataFrame
49
+ data = {
50
+ "model_a": [vote.model_a for vote in votes],
51
+ "model_b": [vote.model_b for vote in votes],
52
+ "winner": [vote.winner for vote in votes]
53
+ }
54
+ df = pd.DataFrame(data)
55
+
56
+ # Compute Elo scores using the existing function
57
+ elo_scores = compute_elo(df)
58
+
59
+ return elo_scores
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ certifi==2024.8.30
2
+ fastapi==0.115.4
3
+ gradio==5.4.0
4
+ numpy==2.0.2
5
+ pandas==2.2.3
6
+ pillow==11.0.0
7
+ python-dotenv==1.0.1
8
+ requests==2.32.3
9
+ SQLAlchemy==2.0.36
10
+ uvicorn==0.30.1
utils/__init__.py ADDED
File without changes
utils/bria_rmbg20.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ import concurrent.futures
4
+ from dotenv import load_dotenv
5
+
6
+ # Load environment variables from .env file
7
+ load_dotenv()
8
+ API_TOKEN = os.getenv('BRIA_API_TOKEN')
9
+
10
+ def process_image(input_image_path, output_image_path):
11
+ try:
12
+ url = "https://engine.prod.bria-api.com/v1/background/remove"
13
+
14
+ # Prepare the file payload
15
+ files = [
16
+ ('file', (os.path.basename(input_image_path), open(input_image_path, 'rb'), 'image/jpeg'))
17
+ ]
18
+
19
+ headers = {
20
+ 'api_token': API_TOKEN
21
+ }
22
+
23
+ # Make the POST request to the Bria API
24
+ response = requests.post(url, headers=headers, files=files)
25
+ response.raise_for_status()
26
+
27
+ # Get the result from the response
28
+ data = response.json()
29
+
30
+ # Extract the result URL from the response
31
+ processed_image_url = data.get('result_url')
32
+
33
+ # Download the processed image
34
+ if processed_image_url:
35
+ image_response = requests.get(processed_image_url)
36
+ image_response.raise_for_status() # Ensure the request was successful
37
+
38
+ with open(output_image_path, 'wb') as f:
39
+ f.write(image_response.content)
40
+ print(f"Image processed and saved to {output_image_path}")
41
+
42
+ except requests.RequestException as e:
43
+ print(f"Error: {str(e)} ({input_image_path})")
44
+ return str(e)
45
+
46
+ def iterate_over_directory(directory_path, result_directory):
47
+ with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
48
+ for root, _, files in os.walk(directory_path):
49
+ for file in files:
50
+ if file.lower().endswith(('.png', '.jpg', '.jpeg', '.webp', '.heic')):
51
+ file_path = os.path.join(root, file)
52
+
53
+ result_file_name = os.path.splitext(os.path.basename(file_path))[0] + '.png'
54
+ result_file_directory = os.path.join(result_directory, os.path.basename(root))
55
+
56
+ if not os.path.exists(result_file_directory):
57
+ os.makedirs(result_file_directory)
58
+
59
+ result_path = os.path.join(result_file_directory, result_file_name)
60
+
61
+ if not os.path.exists(result_path): # don't re-process images
62
+ executor.submit(process_image, file_path, result_path)
63
+
64
+ if __name__ == "__main__":
65
+ INPUT_DIRECTORY = "../original-images/"
66
+ OUTPUT_DIRECTORY = "../result-bria-rmbg20/"
67
+
68
+ if not os.path.exists(OUTPUT_DIRECTORY):
69
+ os.makedirs(OUTPUT_DIRECTORY)
70
+
71
+ iterate_over_directory(directory_path=INPUT_DIRECTORY, result_directory=OUTPUT_DIRECTORY)
utils/check_images.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from PIL import Image
3
+ import pandas as pd
4
+
5
+ # Define the directories
6
+ original_directories = [
7
+ "../data/resized-original-images"
8
+ ]
9
+ processed_directories = {
10
+ "Photoroom": "../data/processed/photoroom",
11
+ "Clipdrop": "../data/processed/clipdrop",
12
+ "RemoveBG": "../data/processed/removebg",
13
+ "BRIA RMBG 2.0": "../data/processed/bria"
14
+ }
15
+
16
+ def compute_megapixels(width, height):
17
+ return (width * height) / 1_000_000
18
+
19
+ def check_image_sizes_comparison():
20
+ data = []
21
+
22
+ for original_directory in original_directories:
23
+ for filename in os.listdir(original_directory):
24
+ if filename.endswith(('.png', '.jpg', '.jpeg')):
25
+ original_path = os.path.join(original_directory, filename)
26
+ with Image.open(original_path) as img:
27
+ original_size = f"{img.size[0]}x{img.size[1]}"
28
+ original_megapixels = compute_megapixels(img.size[0], img.size[1])
29
+
30
+ sizes = {
31
+ "original_file_name": filename,
32
+ "original_size": original_size,
33
+ "original_megapixels": original_megapixels
34
+ }
35
+
36
+ png_filename = os.path.splitext(filename)[0] + ".png"
37
+ for model, directory in processed_directories.items():
38
+ image_path = os.path.join(directory, png_filename)
39
+ if os.path.exists(image_path):
40
+ with Image.open(image_path) as img:
41
+ image_size = f"{img.size[0]}x{img.size[1]}"
42
+ image_megapixels = compute_megapixels(img.size[0], img.size[1])
43
+ else:
44
+ image_size = "Not found"
45
+ image_megapixels = "Not found"
46
+ sizes[model] = image_size
47
+ sizes[f"{model}_megapixels"] = image_megapixels
48
+
49
+ data.append(sizes)
50
+
51
+ df = pd.DataFrame(data)
52
+ df.to_csv("image_sizes_comparison.csv", index=False)
53
+
54
+ if __name__ == "__main__":
55
+ check_image_sizes_comparison()
utils/clipdrop.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+
4
+ # Load environment variables from .env file
5
+ from dotenv import load_dotenv
6
+ load_dotenv()
7
+ API_KEY = os.getenv('CLIPDROP_API_KEY')
8
+
9
+ def process_image(input_image_path, output_image_path):
10
+ try:
11
+ url = "https://clipdrop-api.co/remove-background/v1"
12
+
13
+ with open(input_image_path, 'rb') as image_file:
14
+ files = { "image_file": image_file }
15
+
16
+ payload = {
17
+ "transparency_handling": "discard_alpha_layer"
18
+ }
19
+ headers = {
20
+ "Accept": "image/png",
21
+ "x-api-key": API_KEY
22
+ }
23
+
24
+ response = requests.post(url, data=payload, files=files, headers=headers)
25
+ response.raise_for_status()
26
+
27
+ with open(output_image_path, 'wb') as f:
28
+ f.write(response.content)
29
+ print(f"Image downloaded and saved to {output_image_path}")
30
+
31
+ except requests.RequestException as e:
32
+ print(f"Error: {str(e)} ({input_image_path})")
33
+ return str(e)
34
+
35
+ def iterate_over_directory(directory_path, result_directory):
36
+ for root, _, files in os.walk(directory_path):
37
+ for file in files:
38
+ if file.lower().endswith(('.png', '.jpg', '.jpeg', '.webp', '.heic')):
39
+ file_path = os.path.join(root, file)
40
+
41
+ result_file_name = os.path.splitext(os.path.basename(file_path))[0] + '.png'
42
+ result_file_directory = os.path.join(result_directory, os.path.basename(root))
43
+
44
+ if not os.path.exists(result_file_directory):
45
+ os.makedirs(result_file_directory)
46
+
47
+ result_path = os.path.join(result_file_directory, result_file_name)
48
+
49
+ if not os.path.exists(result_path): # don't re-process images
50
+ process_image(file_path, result_path)
51
+
52
+ if __name__ == "__main__":
53
+ INPUT_DIRECTORY = "../original-images/"
54
+ OUTPUT_DIRECTORY = "../result-clipdrop/"
55
+
56
+ if not os.path.exists(OUTPUT_DIRECTORY):
57
+ os.makedirs(OUTPUT_DIRECTORY)
58
+
59
+ iterate_over_directory(directory_path=INPUT_DIRECTORY, result_directory=OUTPUT_DIRECTORY)
utils/move_images.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+
4
+ def copy_images(source_dir, dest_dir):
5
+ # Ensure the destination directory exists
6
+ if not os.path.exists(dest_dir):
7
+ os.makedirs(dest_dir)
8
+
9
+ # Walk through the source directory
10
+ for root, _, files in os.walk(source_dir):
11
+ for filename in files:
12
+ print(filename)
13
+ source_file = os.path.join(root, filename)
14
+
15
+ # Extract the folder name
16
+ folder_name = os.path.basename(root)
17
+ # Append folder name to the filename
18
+ new_filename = f"{folder_name}_{filename}"
19
+ dest_file = os.path.join(dest_dir, new_filename)
20
+
21
+ # Check if the file is an image and doesn't already exist in the destination
22
+ if os.path.isfile(source_file) and not os.path.exists(dest_file):
23
+ shutil.copy2(source_file, dest_file)
24
+ print(f"Copied: {new_filename}")
25
+ else:
26
+ print(f"Skipped: {filename} (already exists or not a file)")
27
+
28
+ def main():
29
+ # Define your source and destination directories
30
+ source_directory = '../categorised-images'
31
+ destination_directory = '../original-images'
32
+
33
+ # Call the function to copy images
34
+ copy_images(source_directory, destination_directory)
35
+
36
+ if __name__ == "__main__":
37
+ main()
utils/photoroom.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ import concurrent.futures
4
+
5
+ # Load environment variables from .env file
6
+ from dotenv import load_dotenv
7
+ load_dotenv()
8
+
9
+
10
+ API_KEY = os.getenv('PHOTOROOM_API_KEY')
11
+
12
+ def process_image(input_image_path, output_image_path):
13
+ try:
14
+ url = "https://sdk.photoroom.com/v1/segment"
15
+
16
+ with open(input_image_path, 'rb') as image_file:
17
+ files = { "image_file": image_file }
18
+
19
+ headers = {
20
+ "Accept": "image/png, application/json",
21
+ "pr-background-removal-model-version": "2024-09-26",
22
+ "x-api-key": API_KEY
23
+ }
24
+
25
+ response = requests.post(url, files=files, headers=headers)
26
+ response.raise_for_status()
27
+
28
+ with open(output_image_path, 'wb') as f:
29
+ f.write(response.content)
30
+ print(f"Image downloaded and saved to {output_image_path}")
31
+
32
+ except requests.RequestException as e:
33
+ print(f"Error: {str(e)} ({input_image_path})")
34
+ return str(e)
35
+
36
+ def iterate_over_directory(directory_path, result_directory):
37
+ with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
38
+ for root, _, files in os.walk(directory_path):
39
+ for file in files:
40
+ if file.lower().endswith(('.png', '.jpg', '.jpeg', '.webp', '.heic')):
41
+ file_path = os.path.join(root, file)
42
+
43
+ result_file_name = os.path.splitext(os.path.basename(file_path))[0] + '.png'
44
+ result_file_directory = os.path.join(result_directory, os.path.basename(root))
45
+
46
+ if not os.path.exists(result_file_directory):
47
+ os.makedirs(result_file_directory)
48
+
49
+ result_path = os.path.join(result_file_directory, result_file_name)
50
+
51
+ if not os.path.exists(result_path): # don't re-process images
52
+ executor.submit(process_image, file_path, result_path)
53
+
54
+ if __name__ == "__main__":
55
+ INPUT_DIRECTORY = "../original-images/"
56
+ OUTPUT_DIRECTORY = "../result-photoroom/"
57
+
58
+ if not os.path.exists(OUTPUT_DIRECTORY):
59
+ os.makedirs(OUTPUT_DIRECTORY)
60
+
61
+ iterate_over_directory(directory_path=INPUT_DIRECTORY, result_directory=OUTPUT_DIRECTORY)
utils/remove_backgrounds.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from photoroom import process_image as photoroom_process
3
+ from removebg import process_image as removebg_process
4
+ #from clipdrop import process_image as clipdrop_process
5
+ from bria_rmbg20 import process_image as bria_process
6
+
7
+ def create_directory(path):
8
+ if not os.path.exists(path):
9
+ os.makedirs(path)
10
+
11
+ def process_images(input_directory, output_directory, process_function, limit=None):
12
+ count = 0
13
+ for root, _, files in os.walk(input_directory):
14
+ for file in files:
15
+ if file.lower().endswith(('.png', '.jpg', '.jpeg', '.webp', '.heic')):
16
+ file_path = os.path.join(root, file)
17
+ result_file_name = os.path.splitext(os.path.basename(file_path))[0] + '.png'
18
+ result_file_directory = os.path.join(output_directory)
19
+
20
+ if not os.path.exists(result_file_directory):
21
+ os.makedirs(result_file_directory)
22
+
23
+ result_path = os.path.join(result_file_directory, result_file_name)
24
+
25
+ if not os.path.exists(result_path): # Check if the image has already been processed
26
+ print(file_path, result_path)
27
+ process_function(file_path, result_path)
28
+ count += 1
29
+ if limit and count >= limit:
30
+ return
31
+
32
+ def main(dry_run=False):
33
+ input_directory = "../data/resized-original-images"
34
+ output_base_directory = "../data/processed"
35
+
36
+ # Define output directories for each API
37
+ output_directories = {
38
+ "photoroom": os.path.join(output_base_directory, "photoroom"),
39
+ "removebg": os.path.join(output_base_directory, "removebg"),
40
+ #"clipdrop": os.path.join(output_base_directory, "clipdrop"),
41
+ "bria": os.path.join(output_base_directory, "bria")
42
+ }
43
+
44
+ # Create output directories if they don't exist
45
+ for directory in output_directories.values():
46
+ create_directory(directory)
47
+
48
+ if dry_run:
49
+ print("Starting dry run...")
50
+ k = 5
51
+ process_images(input_directory, output_directories["photoroom"], photoroom_process, limit=k)
52
+ process_images(input_directory, output_directories["removebg"], removebg_process, limit=k)
53
+ #process_images(input_directory, output_directories["clipdrop"], clipdrop_process, limit=k)
54
+ process_images(input_directory, output_directories["bria"], bria_process, limit=k)
55
+ print("Dry run completed.")
56
+ else:
57
+ print("Starting full processing...")
58
+ process_images(input_directory, output_directories["photoroom"], photoroom_process)
59
+ process_images(input_directory, output_directories["removebg"], removebg_process)
60
+ #process_images(input_directory, output_directories["clipdrop"], clipdrop_process)
61
+ process_images(input_directory, output_directories["bria"], bria_process)
62
+ print("Full processing completed.")
63
+
64
+ if __name__ == "__main__":
65
+ # Set dry_run to True for a dry run, or False for full processing
66
+ main(dry_run=False)
utils/removebg.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ import time
4
+
5
+ # Load environment variables from .env file
6
+ from dotenv import load_dotenv
7
+ load_dotenv()
8
+ API_KEY = os.getenv('REMOVEBG_API_KEY')
9
+
10
+ def process_image(input_image_path, output_image_path):
11
+ url = "https://api.remove.bg/v1.0/removebg"
12
+ try:
13
+ with open(input_image_path, 'rb') as image_file:
14
+ files = { "image_file": image_file }
15
+ payload = { "size": "auto" }
16
+ headers = {
17
+ "Accept": "image/png, application/json",
18
+ "x-api-key": API_KEY
19
+ }
20
+
21
+ response = requests.post(url, data=payload, files=files, headers=headers)
22
+ response.raise_for_status()
23
+
24
+ with open(output_image_path, 'wb') as f:
25
+ f.write(response.content)
26
+ print(f"Image downloaded and saved to {output_image_path}")
27
+
28
+ except requests.exceptions.HTTPError as e:
29
+ if response.status_code == 429:
30
+ print(f"Rate limit exceeded. Retrying {input_image_path} after delay...")
31
+ time.sleep(60) # Wait for 60 seconds before retrying
32
+ return process_image(input_image_path, output_image_path)
33
+ else:
34
+ print(f"Error: {str(e)} ({input_image_path})")
35
+ return str(e)
36
+
37
+ def iterate_over_directory(directory_path, result_directory):
38
+ for root, _, files in os.walk(directory_path):
39
+ for file in files:
40
+ if file.lower().endswith(('.png', '.jpg', '.jpeg', '.webp', '.heic')):
41
+ file_path = os.path.join(root, file)
42
+
43
+ result_file_name = os.path.splitext(os.path.basename(file_path))[0] + '.png'
44
+ result_file_directory = os.path.join(result_directory, os.path.basename(root))
45
+
46
+ if not os.path.exists(result_file_directory):
47
+ os.makedirs(result_file_directory)
48
+
49
+ result_path = os.path.join(result_file_directory, result_file_name)
50
+
51
+ if not os.path.exists(result_path): # don't re-process images
52
+ process_image(file_path, result_path)
53
+
54
+ if __name__ == "__main__":
55
+ INPUT_DIRECTORY = "../original-images/"
56
+ OUTPUT_DIRECTORY = "../result-remove-bg/"
57
+
58
+ if not os.path.exists(OUTPUT_DIRECTORY):
59
+ os.makedirs(OUTPUT_DIRECTORY)
60
+
61
+ iterate_over_directory(directory_path=INPUT_DIRECTORY, result_directory=OUTPUT_DIRECTORY)
utils/resize_images.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from PIL import Image
3
+ import concurrent.futures
4
+
5
+ # Define the directories
6
+ input_directory = "../original-images"
7
+ output_directory = "../resized-original-images-test"
8
+
9
+ # Ensure output directory exists
10
+ os.makedirs(output_directory, exist_ok=True)
11
+
12
+ def resize_image(input_path, output_path):
13
+ with Image.open(input_path) as img:
14
+ # Calculate the current megapixels
15
+ current_megapixels = (img.width * img.height) / 1_000_000
16
+ max_megapixels = 10
17
+
18
+ if current_megapixels > max_megapixels:
19
+ # Calculate the scaling factor to reduce the image to 10 megapixels
20
+ scaling_factor = (max_megapixels / current_megapixels) ** 0.5
21
+ new_size = (int(img.width * scaling_factor), int(img.height * scaling_factor))
22
+ # Resize the image
23
+ resized_img = img.resize(new_size, Image.LANCZOS)
24
+ # Save the resized image
25
+ resized_img.save(output_path)
26
+ else:
27
+ # If the image is smaller than 10 megapixels, save it as is
28
+ img.save(output_path)
29
+
30
+ def main():
31
+ # Iterate over the input directory
32
+ with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
33
+ for filename in os.listdir(input_directory):
34
+ if filename.endswith(('.png', '.jpg', '.jpeg')):
35
+ input_path = os.path.join(input_directory, filename)
36
+ output_path = os.path.join(output_directory, filename)
37
+ # Check if the output file already exists
38
+ if not os.path.exists(output_path):
39
+ executor.submit(resize_image, input_path, output_path)
40
+ print(f"Submitted {filename} for resizing.")
41
+ else:
42
+ print(f"Skipped {filename}, already exists in {output_directory}")
43
+
44
+ print("All images have been resized and saved to the output directory.")
45
+
46
+ if __name__ == "__main__":
47
+ main()