Spaces:

methodw
/

xbgp

Running

File size: 2,973 Bytes

import gradio as gr
import os
from transformers import AutoImageProcessor, AutoModel
import torch
from pymongo import MongoClient
from PIL import Image
import json
import numpy as np
import faiss
from dotenv import load_dotenv

load_dotenv()


# Init similarity search AI model and processor
torch_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
dino_v2_model = AutoModel.from_pretrained("./dinov2-base").to(torch_device)
dino_v2_image_processor = AutoImageProcessor.from_pretrained("./dinov2-base")

# MongoDB
MONGO_URI = os.environ.get("MONGO_URI")
mongo = MongoClient(MONGO_URI)
db = mongo["xbgp"]


def process_image(image):
    """
    Process the image and extract features using the DINOv2 model.
    """
    # Add your image processing code here.
    # This will include preprocessing the image, passing it through the model,
    # and then formatting the output (extracted features).

    # Load the index
    with open("images.json", "r") as f:
        images = json.load(f)

    # Convert to RGB if it isn't already
    if image.mode != "RGB":
        image = image.convert("RGB")

    # Resize to 64px while maintaining aspect ratio
    width, height = image.size
    if width < height:
        w_percent = 64 / float(width)
        new_width = 64
        new_height = int(float(height) * float(w_percent))
    else:
        h_percent = 64 / float(height)
        new_height = 64
        new_width = int(float(width) * float(h_percent))
    image = image.resize((new_width, new_height), Image.LANCZOS)

    # Extract the features from the uploaded image
    with torch.no_grad():
        inputs = dino_v2_image_processor(images=image, return_tensors="pt").to(
            torch_device
        )
        outputs = dino_v2_model(**inputs)

    # Normalize the features before search, whatever that means
    embeddings = outputs.last_hidden_state
    embeddings = embeddings.mean(dim=1)
    vector = embeddings.detach().cpu().numpy()
    vector = np.float32(vector)
    faiss.normalize_L2(vector)

    # Read the index file and perform search of top 50 images
    index = faiss.read_index("vector.index")
    distances, indices = index.search(vector, 50)
    matches = []
    for idx, matching_gamerpic in enumerate(indices[0]):
        gamerpic = images[matching_gamerpic]
        # Return the corresponding title with only the matched gamerpic
        title = db.titles.find_one(
            {"gamerpics.cdn": gamerpic},
            {"name": 1, "type": 1, "url": 1, "gamerpics.$": 1},
        )

        title["rank"] = idx
        title["score"] = str(round((1 / (distances[0][idx] + 1) * 100), 2)) + "%"
        matches.append(title)

    print(matches)
    return "<h1>GG BRO ! </h1>"


# Create a Gradio interface
iface = gr.Interface(
    fn=process_image,
    inputs=gr.Image(type="pil"),  # Adjust the shape as needed
    outputs="html",  # Or any other output format that suits your needs
)

# Launch the Gradio app
iface.launch()