Spaces:

bgsys
/

background-removal-arena

Running

App Files Files Community

tdurbor commited on Dec 2, 2024

Commit

4dccf1d

1 Parent(s): c01e765

inital commit

Browse files

Files changed (14) hide show

app.py +264 -0
db.py +63 -0
launch.sh +2 -0
rating_systems.py +59 -0
requirements.txt +10 -0
utils/__init__.py +0 -0
utils/bria_rmbg20.py +71 -0
utils/check_images.py +55 -0
utils/clipdrop.py +59 -0
utils/move_images.py +37 -0
utils/photoroom.py +61 -0
utils/remove_backgrounds.py +66 -0
utils/removebg.py +61 -0
utils/resize_images.py +47 -0

app.py ADDED Viewed

	@@ -0,0 +1,264 @@

+import os
+import logging
+from typing import Tuple
+from dotenv import load_dotenv
+import gradio as gr
+import numpy as np
+from PIL import Image
+import random
+from db import compute_elo_scores, get_all_votes
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+# Load environment variables from .env file
+load_dotenv()
+# Access the API key
+PHOTOROOM_API_KEY = os.getenv('PHOTOROOM_API_KEY')
+CLIPDROP_API_KEY = os.getenv('CLIPDROP_API_KEY')
+def fetch_elo_scores():
+    """Fetch and log Elo scores."""
+    try:
+        elo_scores = compute_elo_scores()
+        logging.info("Elo scores successfully computed.")
+        return elo_scores
+    except Exception as e:
+        logging.error("Error computing Elo scores: %s", str(e))
+        return None
+def update_rankings_table():
+    """Update and return the rankings table based on Elo scores."""
+    elo_scores = fetch_elo_scores()
+    if elo_scores:
+        rankings = [
+            ["Photoroom", int(elo_scores.get("Photoroom", 1000))],
+            #["Clipdrop", int(elo_scores.get("Clipdrop", 1000))],
+            ["RemoveBG", int(elo_scores.get("RemoveBG", 1000))],
+            ["BRIA RMBG 2.0", int(elo_scores.get("BRIA RMBG 2.0", 1000))],
+        ]
+        rankings.sort(key=lambda x: x[1], reverse=True)
+        return rankings
+    else:
+        return [
+            ["Photoroom", -1],
+            #["Clipdrop", -1],
+            ["RemoveBG", -1],
+            ["BRIA RMBG 2.0", -1],
+        ]
+def select_new_image():
+    """Select a new image and its segmented versions."""
+    image_paths = load_images_from_directory("data/resized-original-images")
+    last_image_path = None
+    max_attempts = 10
+    for _ in range(max_attempts):
+        available_images = [path for path in image_paths if path != last_image_path]
+        if not available_images:
+            logging.error("No available images to select from.")
+            return None
+        random_image_path = random.choice(available_images)
+        input_image = Image.open(random_image_path)
+        image_filename = os.path.splitext(os.path.basename(random_image_path))[0] + ".png"
+        segmented_image_paths = {
+            "Photoroom": os.path.join("data/processed/photoroom", image_filename),
+            #"Clipdrop": os.path.join("data/processed/clipdrop", image_filename),
+            "RemoveBG": os.path.join("data/processed/removebg", image_filename),
+            "BRIA RMBG 2.0": os.path.join("data/processed/bria", image_filename)
+        }
+        try:
+            selected_models = random.sample(list(segmented_image_paths.keys()), 2)
+            model_a_name, model_b_name = selected_models
+            model_a_output_path = segmented_image_paths[model_a_name]
+            model_b_output_path = segmented_image_paths[model_b_name]
+            model_a_output_image = Image.open(model_a_output_path)
+            model_b_output_image = Image.open(model_b_output_path)
+            return (random_image_path, input_image, model_a_output_path, model_a_output_image,
+                    model_b_output_path, model_b_output_image, model_a_name, model_b_name)
+        except FileNotFoundError as e:
+            logging.error("File not found: %s. Resampling another image.", e)
+            last_image_path = random_image_path
+    logging.error("Failed to select a new image after %d attempts.", max_attempts)
+    return None
+def get_notice_markdown():
+    """Generate the notice markdown with dynamic vote count."""
+    total_votes = len(get_all_votes())
+    return f"""
+    # ⚔️  Background Removal Arena: Compare & Test the Best Background Removal Models
+    ## 📜 How It Works
+    - **Blind Test**: You will see two images with their background removed from two anonymous background removal models (Clipdrop, RemoveBG, Photoroom, BRIA RMBG 2.0).
+    - **Vote for the Best**: Choose the best result, if none stand out choose "Tie".
+    ## 📊 Stats
+    - **Total #votes**: {total_votes}
+    ## 👇 Test now!
+    """
+def compute_mask_difference(segmented_a, segmented_b):
+    """Compute the absolute difference between two image masks."""
+    mask_a = np.asarray(segmented_a)
+    mask_b = np.asarray(segmented_b)
+    # Set transparent pixels to zero and compute the sum in one step
+    mask_a_1d = np.where(mask_a[..., 3] == 0, 0, np.sum(mask_a[..., :3], axis=-1))
+    mask_b_1d = np.where(mask_b[..., 3] == 0, 0, np.sum(mask_b[..., :3], axis=-1))
+    # Compute the absolute difference between the masks
+    return np.abs(mask_a_1d - mask_b_1d)
+def gradio_interface():
+    """Create and return the Gradio interface."""
+    with gr.Blocks() as demo:
+        gr.Markdown("# Background Removal Arena")
+        with gr.Tabs() as tabs:
+            with gr.Tab("⚔️ Arena (battle)", id=0):
+                notice_markdown = gr.Markdown(get_notice_markdown(), elem_id="notice_markdown")
+                (fpath_input, input_image, fpath_a, segmented_a, fpath_b, segmented_b,
+                 a_name, b_name) = select_new_image()
+                model_a_name = gr.State(a_name)
+                model_b_name = gr.State(b_name)
+                fpath_input = gr.State(fpath_input)
+                fpath_a = gr.State(fpath_a)
+                fpath_b = gr.State(fpath_b)
+                # Compute the absolute difference between the masks
+                mask_difference = compute_mask_difference(segmented_a, segmented_b)
+                with gr.Row():
+                    image_a_display = gr.Image(
+                        value=segmented_a,
+                        type="pil",
+                        label="Model A",
+                        width=500,
+                        height=500
+                    )
+                    input_image_display = gr.AnnotatedImage(
+                        value=(input_image, [(mask_difference > 0, "Difference Mask")]),
+                        label="Input Image",
+                        width=500,
+                        height=500
+                    )
+                    image_b_display = gr.Image(
+                        value=segmented_b,
+                        type="pil",
+                        label="Model B",
+                        width=500,
+                        height=500
+                    )
+                tie = gr.State("Tie")
+                with gr.Row():
+                    vote_a_btn = gr.Button("👈  A is better")
+                    vote_tie = gr.Button("🤝  Tie")
+                    vote_b_btn = gr.Button("👉  B is better")
+                vote_a_btn.click(
+                    fn=lambda: vote_for_model("model_a", (fpath_input, fpath_a, fpath_b), model_a_name, model_b_name),
+                    outputs=[
+                        fpath_input, input_image_display, fpath_a, image_a_display, fpath_b, image_b_display, model_a_name, model_b_name, notice_markdown
+                    ]
+                )
+                vote_b_btn.click(
+                    fn=lambda: vote_for_model("model_b", (fpath_input, fpath_a, fpath_b), model_a_name, model_b_name),
+                    outputs=[
+                        fpath_input, input_image_display, fpath_a, image_a_display, fpath_b, image_b_display, model_a_name, model_b_name, notice_markdown
+                    ]
+                )
+                vote_tie.click(
+                    fn=lambda: vote_for_model("tie", (fpath_input, fpath_a, fpath_b), model_a_name, model_b_name),
+                    outputs=[
+                        fpath_input, input_image_display, fpath_a, image_a_display, fpath_b, image_b_display, model_a_name, model_b_name, notice_markdown
+                    ]
+                )
+                def vote_for_model(choice, fpaths, model_a_name, model_b_name):
+                    """Submit a vote for a model and return updated images and model names."""
+                    logging.info("Voting for model: %s", choice)
+                    vote_data = {
+                        "image_id": fpaths[0].value,
+                        "model_a": model_a_name.value,
+                        "model_b": model_b_name.value,
+                        "winner": choice,
+                        "fpath_a": fpaths[1].value,
+                        "fpath_b": fpaths[2].value,
+                    }
+                    try:
+                        logging.debug("Adding vote data to the database: %s", vote_data)
+                        from db import add_vote
+                        result = add_vote(vote_data)
+                        logging.info("Vote successfully recorded in the database with ID: %s", result["id"])
+                    except Exception as e:
+                        logging.error("Error recording vote: %s", str(e))
+                    (new_fpath_input, new_input_image, new_fpath_a, new_segmented_a,
+                     new_fpath_b, new_segmented_b, new_a_name, new_b_name) = select_new_image()
+                    model_a_name.value = new_a_name
+                    model_b_name.value = new_b_name
+                    fpath_input.value = new_fpath_input
+                    fpath_a.value = new_fpath_a
+                    fpath_b.value = new_fpath_b
+                    mask_difference = compute_mask_difference(new_segmented_a, new_segmented_b)
+                    # Update the notice markdown with the new vote count
+                    new_notice_markdown = get_notice_markdown()
+                    return (fpath_input.value, (new_input_image, [(mask_difference, "Mask")]), fpath_a.value, new_segmented_a,
+                            fpath_b.value, new_segmented_b, model_a_name.value, model_b_name.value, new_notice_markdown)
+            with gr.Tab("🏆 Leaderboard", id=1) as leaderboard_tab:
+                rankings_table = gr.Dataframe(
+                    headers=["Model", "Ranking"],
+                    value=update_rankings_table(),
+                    label="Current Model Rankings",
+                    column_widths=[180, 60],
+                    row_count=4
+                )
+                leaderboard_tab.select(
+                    fn=lambda: update_rankings_table(),
+                    outputs=rankings_table
+                )
+            with gr.Tab("📊 Vote Data", id=2) as vote_data_tab:
+                def update_vote_data():
+                    votes = get_all_votes()
+                    return [[vote.id, vote.image_id, vote.model_a, vote.model_b, vote.winner, vote.timestamp] for vote in votes]
+                vote_table = gr.Dataframe(
+                    headers=["ID", "Image ID", "Model A", "Model B", "Winner", "Timestamp"],
+                    value=update_vote_data(),
+                    label="Vote Data",
+                    column_widths=[20, 150, 100, 100, 100, 150],
+                    row_count=0
+                )
+                vote_data_tab.select(
+                    fn=lambda: update_vote_data(),
+                    outputs=vote_table
+                )
+    return demo
+def load_images_from_directory(directory):
+    """Load and return image paths from a directory."""
+    image_files = [f for f in os.listdir(directory) if f.endswith(('.png', '.jpg', '.jpeg'))]
+    return [os.path.join(directory, f) for f in image_files]
+if __name__ == "__main__":
+    demo = gradio_interface()
+    demo.launch()

db.py ADDED Viewed

	@@ -0,0 +1,63 @@

+from sqlalchemy import create_engine, Column, Integer, String, DateTime
+from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.orm import sessionmaker, Session
+from datetime import datetime
+import pandas as pd
+import uuid
+from rating_systems import compute_elo
+DATABASE_URL = "sqlite:///./data/votes.db"  # Example with SQLite, replace with PostgreSQL for production
+engine = create_engine(DATABASE_URL, connect_args={"check_same_thread": False})
+SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
+Base = declarative_base()
+# Database model
+class Vote(Base):
+    __tablename__ = "votes"
+    id = Column(Integer, primary_key=True, index=True)
+    image_id = Column(String, index=True)
+    model_a = Column(String)
+    model_b = Column(String)
+    winner = Column(String)
+    user_id = Column(String, index=True)
+    fpath_a = Column(String)
+    fpath_b = Column(String)
+    timestamp = Column(DateTime, default=datetime.utcnow)
+Base.metadata.create_all(bind=engine)
+# Dependency for database session
+def get_db():
+    db = SessionLocal()
+    try:
+        yield db
+    finally:
+        db.close()
+def add_vote(vote_data):
+    with SessionLocal() as db:
+        db_vote = Vote(**vote_data)
+        db.add(db_vote)
+        db.commit()
+        db.refresh(db_vote)
+        return {"id": db_vote.id, "user_id": db_vote.user_id, "timestamp": db_vote.timestamp}
+# Function to get all votes
+def get_all_votes():
+    with SessionLocal() as db:
+        votes = db.query(Vote).all()
+        return votes
+# Function to compute Elo scores
+def compute_elo_scores():
+    with SessionLocal() as db:
+        votes = db.query(Vote).all()
+        data = {
+            "model_a": [vote.model_a for vote in votes],
+            "model_b": [vote.model_b for vote in votes],
+            "winner": [vote.winner for vote in votes]
+        }
+        df = pd.DataFrame(data)
+        elo_scores = compute_elo(df)
+        return elo_scores

launch.sh ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ pip3 install -r requirements.txt
2	+ python3 app.py

rating_systems.py ADDED Viewed

	@@ -0,0 +1,59 @@

+# This code is copied from the following source:
+# https://github.com/lm-sys/FastChat/blob/main/fastchat/serve/monitor/rating_systems.py
+import math
+import pandas as pd
+import numpy as np
+from sqlalchemy.orm import Session
+import pandas as pd
+def get_matchups_models(df):
+    n_rows = len(df)
+    model_indices, models = pd.factorize(pd.concat([df["model_a"], df["model_b"]]))
+    matchups = np.column_stack([model_indices[:n_rows], model_indices[n_rows:]])
+    return matchups, models.to_list()
+def preprocess_for_elo(df):
+    """
+    in Elo we want numpy arrays for matchups and outcomes
+      matchups: int32 (N,2)  contains model ids for the competitors in a match
+      outcomes: float64 (N,) contains 1.0, 0.5, or 0.0 representing win, tie, or loss for model_a
+    """
+    matchups, models = get_matchups_models(df)
+    outcomes = np.full(len(df), 0.5)
+    outcomes[df["winner"] == "model_a"] = 1.0
+    outcomes[df["winner"] == "model_b"] = 0.0
+    return matchups, outcomes, models
+def compute_elo(df, k=4.0, base=10.0, init_rating=1000.0, scale=400.0):
+    matchups, outcomes, models = preprocess_for_elo(df)
+    alpha = math.log(base) / scale
+    ratings = np.full(shape=(len(models),), fill_value=init_rating)
+    for (model_a_idx, model_b_idx), outcome in zip(matchups, outcomes):
+        prob = 1.0 / (
+            1.0 + math.exp(alpha * (ratings[model_b_idx] - ratings[model_a_idx]))
+        )
+        update = k * (outcome - prob)
+        ratings[model_a_idx] += update
+        ratings[model_b_idx] -= update
+    return {model: ratings[idx] for idx, model in enumerate(models)}
+def compute_elo_from_votes(db: Session):
+    # Retrieve all votes from the database
+    votes = db.query(Vote).all()
+    # Convert votes to a DataFrame
+    data = {
+        "model_a": [vote.model_a for vote in votes],
+        "model_b": [vote.model_b for vote in votes],
+        "winner": [vote.winner for vote in votes]
+    }
+    df = pd.DataFrame(data)
+    # Compute Elo scores using the existing function
+    elo_scores = compute_elo(df)
+    return elo_scores

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+certifi==2024.8.30
+fastapi==0.115.4
+gradio==5.4.0
+numpy==2.0.2
+pandas==2.2.3
+pillow==11.0.0
+python-dotenv==1.0.1
+requests==2.32.3
+SQLAlchemy==2.0.36
+uvicorn==0.30.1

utils/__init__.py ADDED Viewed

File without changes

utils/bria_rmbg20.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import os
+import requests
+import concurrent.futures
+from dotenv import load_dotenv
+# Load environment variables from .env file
+load_dotenv()
+API_TOKEN = os.getenv('BRIA_API_TOKEN')
+def process_image(input_image_path, output_image_path):
+    try:
+        url = "https://engine.prod.bria-api.com/v1/background/remove"
+        # Prepare the file payload
+        files = [
+            ('file', (os.path.basename(input_image_path), open(input_image_path, 'rb'), 'image/jpeg'))
+        ]
+        headers = {
+            'api_token': API_TOKEN
+        }
+        # Make the POST request to the Bria API
+        response = requests.post(url, headers=headers, files=files)
+        response.raise_for_status()
+        # Get the result from the response
+        data = response.json()
+        # Extract the result URL from the response
+        processed_image_url = data.get('result_url')
+        # Download the processed image
+        if processed_image_url:
+            image_response = requests.get(processed_image_url)
+            image_response.raise_for_status()  # Ensure the request was successful
+            with open(output_image_path, 'wb') as f:
+                f.write(image_response.content)
+                print(f"Image processed and saved to {output_image_path}")
+    except requests.RequestException as e:
+        print(f"Error: {str(e)} ({input_image_path})")
+        return str(e)
+def iterate_over_directory(directory_path, result_directory):
+    with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
+        for root, _, files in os.walk(directory_path):
+            for file in files:
+                if file.lower().endswith(('.png', '.jpg', '.jpeg', '.webp', '.heic')):
+                    file_path = os.path.join(root, file)
+                    result_file_name = os.path.splitext(os.path.basename(file_path))[0] + '.png'
+                    result_file_directory = os.path.join(result_directory, os.path.basename(root))
+                    if not os.path.exists(result_file_directory):
+                        os.makedirs(result_file_directory)
+                    result_path = os.path.join(result_file_directory, result_file_name)
+                    if not os.path.exists(result_path): # don't re-process images
+                        executor.submit(process_image, file_path, result_path)
+if __name__ == "__main__":
+    INPUT_DIRECTORY = "../original-images/"
+    OUTPUT_DIRECTORY = "../result-bria-rmbg20/"
+    if not os.path.exists(OUTPUT_DIRECTORY):
+        os.makedirs(OUTPUT_DIRECTORY)
+    iterate_over_directory(directory_path=INPUT_DIRECTORY, result_directory=OUTPUT_DIRECTORY)

utils/check_images.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import os
+from PIL import Image
+import pandas as pd
+# Define the directories
+original_directories = [
+    "../data/resized-original-images"
+]
+processed_directories = {
+    "Photoroom": "../data/processed/photoroom",
+    "Clipdrop": "../data/processed/clipdrop",
+    "RemoveBG": "../data/processed/removebg",
+    "BRIA RMBG 2.0": "../data/processed/bria"
+}
+def compute_megapixels(width, height):
+    return (width * height) / 1_000_000
+def check_image_sizes_comparison():
+    data = []
+    for original_directory in original_directories:
+        for filename in os.listdir(original_directory):
+            if filename.endswith(('.png', '.jpg', '.jpeg')):
+                original_path = os.path.join(original_directory, filename)
+                with Image.open(original_path) as img:
+                    original_size = f"{img.size[0]}x{img.size[1]}"
+                    original_megapixels = compute_megapixels(img.size[0], img.size[1])
+                sizes = {
+                    "original_file_name": filename,
+                    "original_size": original_size,
+                    "original_megapixels": original_megapixels
+                }
+                png_filename = os.path.splitext(filename)[0] + ".png"
+                for model, directory in processed_directories.items():
+                    image_path = os.path.join(directory, png_filename)
+                    if os.path.exists(image_path):
+                        with Image.open(image_path) as img:
+                            image_size = f"{img.size[0]}x{img.size[1]}"
+                            image_megapixels = compute_megapixels(img.size[0], img.size[1])
+                    else:
+                        image_size = "Not found"
+                        image_megapixels = "Not found"
+                    sizes[model] = image_size
+                    sizes[f"{model}_megapixels"] = image_megapixels
+                data.append(sizes)
+    df = pd.DataFrame(data)
+    df.to_csv("image_sizes_comparison.csv", index=False)
+if __name__ == "__main__":
+    check_image_sizes_comparison()

utils/clipdrop.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import os
+import requests
+# Load environment variables from .env file
+from dotenv import load_dotenv
+load_dotenv()
+API_KEY = os.getenv('CLIPDROP_API_KEY')
+def process_image(input_image_path, output_image_path):
+    try:
+        url = "https://clipdrop-api.co/remove-background/v1"
+        with open(input_image_path, 'rb') as image_file:
+            files = { "image_file": image_file }
+            payload = {
+                "transparency_handling": "discard_alpha_layer"
+            }
+            headers = {
+                "Accept": "image/png",
+                "x-api-key": API_KEY
+            }
+            response = requests.post(url, data=payload, files=files, headers=headers)
+            response.raise_for_status()
+            with open(output_image_path, 'wb') as f:
+                f.write(response.content)
+                print(f"Image downloaded and saved to {output_image_path}")
+    except requests.RequestException as e:
+        print(f"Error: {str(e)} ({input_image_path})")
+        return str(e)
+def iterate_over_directory(directory_path, result_directory):
+    for root, _, files in os.walk(directory_path):
+        for file in files:
+            if file.lower().endswith(('.png', '.jpg', '.jpeg', '.webp', '.heic')):
+                file_path = os.path.join(root, file)
+                result_file_name = os.path.splitext(os.path.basename(file_path))[0] + '.png'
+                result_file_directory = os.path.join(result_directory, os.path.basename(root))
+                if not os.path.exists(result_file_directory):
+                    os.makedirs(result_file_directory)
+                result_path = os.path.join(result_file_directory, result_file_name)
+                if not os.path.exists(result_path): # don't re-process images
+                    process_image(file_path, result_path)
+if __name__ == "__main__":
+    INPUT_DIRECTORY = "../original-images/"
+    OUTPUT_DIRECTORY = "../result-clipdrop/"
+    if not os.path.exists(OUTPUT_DIRECTORY):
+        os.makedirs(OUTPUT_DIRECTORY)
+    iterate_over_directory(directory_path=INPUT_DIRECTORY, result_directory=OUTPUT_DIRECTORY)

utils/move_images.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import os
+import shutil
+def copy_images(source_dir, dest_dir):
+    # Ensure the destination directory exists
+    if not os.path.exists(dest_dir):
+        os.makedirs(dest_dir)
+    # Walk through the source directory
+    for root, _, files in os.walk(source_dir):
+        for filename in files:
+            print(filename)
+            source_file = os.path.join(root, filename)
+            # Extract the folder name
+            folder_name = os.path.basename(root)
+            # Append folder name to the filename
+            new_filename = f"{folder_name}_{filename}"
+            dest_file = os.path.join(dest_dir, new_filename)
+            # Check if the file is an image and doesn't already exist in the destination
+            if os.path.isfile(source_file) and not os.path.exists(dest_file):
+                shutil.copy2(source_file, dest_file)
+                print(f"Copied: {new_filename}")
+            else:
+                print(f"Skipped: {filename} (already exists or not a file)")
+def main():
+    # Define your source and destination directories
+    source_directory = '../categorised-images'
+    destination_directory = '../original-images'
+    # Call the function to copy images
+    copy_images(source_directory, destination_directory)
+if __name__ == "__main__":
+    main()

utils/photoroom.py ADDED Viewed

	@@ -0,0 +1,61 @@

+import os
+import requests
+import concurrent.futures
+# Load environment variables from .env file
+from dotenv import load_dotenv
+load_dotenv()
+API_KEY = os.getenv('PHOTOROOM_API_KEY')
+def process_image(input_image_path, output_image_path):
+    try:
+        url = "https://sdk.photoroom.com/v1/segment"
+        with open(input_image_path, 'rb') as image_file:
+            files = { "image_file": image_file }
+            headers = {
+                "Accept": "image/png, application/json",
+                "pr-background-removal-model-version": "2024-09-26",
+                "x-api-key": API_KEY
+            }
+            response = requests.post(url, files=files, headers=headers)
+            response.raise_for_status()
+            with open(output_image_path, 'wb') as f:
+                f.write(response.content)
+                print(f"Image downloaded and saved to {output_image_path}")
+    except requests.RequestException as e:
+        print(f"Error: {str(e)} ({input_image_path})")
+        return str(e)
+def iterate_over_directory(directory_path, result_directory):
+    with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
+        for root, _, files in os.walk(directory_path):
+            for file in files:
+                if file.lower().endswith(('.png', '.jpg', '.jpeg', '.webp', '.heic')):
+                    file_path = os.path.join(root, file)
+                    result_file_name = os.path.splitext(os.path.basename(file_path))[0] + '.png'
+                    result_file_directory = os.path.join(result_directory, os.path.basename(root))
+                    if not os.path.exists(result_file_directory):
+                        os.makedirs(result_file_directory)
+                    result_path = os.path.join(result_file_directory, result_file_name)
+                    if not os.path.exists(result_path): # don't re-process images
+                        executor.submit(process_image, file_path, result_path)
+if __name__ == "__main__":
+    INPUT_DIRECTORY = "../original-images/"
+    OUTPUT_DIRECTORY = "../result-photoroom/"
+    if not os.path.exists(OUTPUT_DIRECTORY):
+        os.makedirs(OUTPUT_DIRECTORY)
+    iterate_over_directory(directory_path=INPUT_DIRECTORY, result_directory=OUTPUT_DIRECTORY)

utils/remove_backgrounds.py ADDED Viewed

	@@ -0,0 +1,66 @@

+import os
+from photoroom import process_image as photoroom_process
+from removebg import process_image as removebg_process
+#from clipdrop import process_image as clipdrop_process
+from bria_rmbg20 import process_image as bria_process
+def create_directory(path):
+    if not os.path.exists(path):
+        os.makedirs(path)
+def process_images(input_directory, output_directory, process_function, limit=None):
+    count = 0
+    for root, _, files in os.walk(input_directory):
+        for file in files:
+            if file.lower().endswith(('.png', '.jpg', '.jpeg', '.webp', '.heic')):
+                file_path = os.path.join(root, file)
+                result_file_name = os.path.splitext(os.path.basename(file_path))[0] + '.png'
+                result_file_directory = os.path.join(output_directory)
+                if not os.path.exists(result_file_directory):
+                    os.makedirs(result_file_directory)
+                result_path = os.path.join(result_file_directory, result_file_name)
+                if not os.path.exists(result_path):  # Check if the image has already been processed
+                    print(file_path, result_path)
+                    process_function(file_path, result_path)
+                    count += 1
+                    if limit and count >= limit:
+                        return
+def main(dry_run=False):
+    input_directory = "../data/resized-original-images"
+    output_base_directory = "../data/processed"
+    # Define output directories for each API
+    output_directories = {
+        "photoroom": os.path.join(output_base_directory, "photoroom"),
+        "removebg": os.path.join(output_base_directory, "removebg"),
+        #"clipdrop": os.path.join(output_base_directory, "clipdrop"),
+        "bria": os.path.join(output_base_directory, "bria")
+    }
+    # Create output directories if they don't exist
+    for directory in output_directories.values():
+        create_directory(directory)
+    if dry_run:
+        print("Starting dry run...")
+        k = 5
+        process_images(input_directory, output_directories["photoroom"], photoroom_process, limit=k)
+        process_images(input_directory, output_directories["removebg"], removebg_process, limit=k)
+        #process_images(input_directory, output_directories["clipdrop"], clipdrop_process, limit=k)
+        process_images(input_directory, output_directories["bria"], bria_process, limit=k)
+        print("Dry run completed.")
+    else:
+        print("Starting full processing...")
+        process_images(input_directory, output_directories["photoroom"], photoroom_process)
+        process_images(input_directory, output_directories["removebg"], removebg_process)
+        #process_images(input_directory, output_directories["clipdrop"], clipdrop_process)
+        process_images(input_directory, output_directories["bria"], bria_process)
+        print("Full processing completed.")
+if __name__ == "__main__":
+    # Set dry_run to True for a dry run, or False for full processing
+    main(dry_run=False)

utils/removebg.py ADDED Viewed

	@@ -0,0 +1,61 @@

+import os
+import requests
+import time
+# Load environment variables from .env file
+from dotenv import load_dotenv
+load_dotenv()
+API_KEY = os.getenv('REMOVEBG_API_KEY')
+def process_image(input_image_path, output_image_path):
+    url = "https://api.remove.bg/v1.0/removebg"
+    try:
+        with open(input_image_path, 'rb') as image_file:
+            files = { "image_file": image_file }
+            payload = { "size": "auto" }
+            headers = {
+                "Accept": "image/png, application/json",
+                "x-api-key": API_KEY
+            }
+            response = requests.post(url, data=payload, files=files, headers=headers)
+            response.raise_for_status()
+            with open(output_image_path, 'wb') as f:
+                f.write(response.content)
+                print(f"Image downloaded and saved to {output_image_path}")
+    except requests.exceptions.HTTPError as e:
+        if response.status_code == 429:
+            print(f"Rate limit exceeded. Retrying {input_image_path} after delay...")
+            time.sleep(60)  # Wait for 60 seconds before retrying
+            return process_image(input_image_path, output_image_path)
+        else:
+            print(f"Error: {str(e)} ({input_image_path})")
+            return str(e)
+def iterate_over_directory(directory_path, result_directory):
+    for root, _, files in os.walk(directory_path):
+        for file in files:
+            if file.lower().endswith(('.png', '.jpg', '.jpeg', '.webp', '.heic')):
+                file_path = os.path.join(root, file)
+                result_file_name = os.path.splitext(os.path.basename(file_path))[0] + '.png'
+                result_file_directory = os.path.join(result_directory, os.path.basename(root))
+                if not os.path.exists(result_file_directory):
+                    os.makedirs(result_file_directory)
+                result_path = os.path.join(result_file_directory, result_file_name)
+                if not os.path.exists(result_path): # don't re-process images
+                    process_image(file_path, result_path)
+if __name__ == "__main__":
+    INPUT_DIRECTORY = "../original-images/"
+    OUTPUT_DIRECTORY = "../result-remove-bg/"
+    if not os.path.exists(OUTPUT_DIRECTORY):
+        os.makedirs(OUTPUT_DIRECTORY)
+    iterate_over_directory(directory_path=INPUT_DIRECTORY, result_directory=OUTPUT_DIRECTORY)

utils/resize_images.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import os
+from PIL import Image
+import concurrent.futures
+# Define the directories
+input_directory = "../original-images"
+output_directory = "../resized-original-images-test"
+# Ensure output directory exists
+os.makedirs(output_directory, exist_ok=True)
+def resize_image(input_path, output_path):
+    with Image.open(input_path) as img:
+        # Calculate the current megapixels
+        current_megapixels = (img.width * img.height) / 1_000_000
+        max_megapixels = 10
+        if current_megapixels > max_megapixels:
+            # Calculate the scaling factor to reduce the image to 10 megapixels
+            scaling_factor = (max_megapixels / current_megapixels) ** 0.5
+            new_size = (int(img.width * scaling_factor), int(img.height * scaling_factor))
+            # Resize the image
+            resized_img = img.resize(new_size, Image.LANCZOS)
+            # Save the resized image
+            resized_img.save(output_path)
+        else:
+            # If the image is smaller than 10 megapixels, save it as is
+            img.save(output_path)
+def main():
+    # Iterate over the input directory
+    with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
+        for filename in os.listdir(input_directory):
+            if filename.endswith(('.png', '.jpg', '.jpeg')):
+                input_path = os.path.join(input_directory, filename)
+                output_path = os.path.join(output_directory, filename)
+                # Check if the output file already exists
+                if not os.path.exists(output_path):
+                    executor.submit(resize_image, input_path, output_path)
+                    print(f"Submitted {filename} for resizing.")
+                else:
+                    print(f"Skipped {filename}, already exists in {output_directory}")
+    print("All images have been resized and saved to the output directory.")
+if __name__ == "__main__":
+    main()