File size: 2,563 Bytes
609bf1e
 
 
 
 
 
 
 
 
 
 
d9b1d5d
 
609bf1e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3989d63
 
 
609bf1e
3989d63
 
 
 
 
 
 
6001755
 
609bf1e
 
 
 
 
 
3989d63
609bf1e
 
 
bb75ee4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import gradio as gr
from transformers import AutoImageProcessor, AutoModel
import torch
from PIL import Image
import json
import numpy as np
import faiss


# Init similarity search AI model and processor
torch_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
dino_v2_model = AutoModel.from_pretrained("./dinov2-base").to(torch_device)
dino_v2_image_processor = AutoImageProcessor.from_pretrained("./dinov2-base")


def process_image(image):
    """
    Process the image and extract features using the DINOv2 model.
    """
    # Add your image processing code here.
    # This will include preprocessing the image, passing it through the model,
    # and then formatting the output (extracted features).

    # Load the index
    with open("images.json", "r") as f:
        images = json.load(f)

    # Convert to RGB if it isn't already
    if image.mode != "RGB":
        image = image.convert("RGB")

    # Resize to 64px while maintaining aspect ratio
    width, height = image.size
    if width < height:
        w_percent = 64 / float(width)
        new_width = 64
        new_height = int(float(height) * float(w_percent))
    else:
        h_percent = 64 / float(height)
        new_height = 64
        new_width = int(float(width) * float(h_percent))
    image = image.resize((new_width, new_height), Image.LANCZOS)

    # Extract the features from the uploaded image
    with torch.no_grad():
        inputs = dino_v2_image_processor(images=image, return_tensors="pt").to(
            torch_device
        )
        outputs = dino_v2_model(**inputs)

    # Normalize the features before search, whatever that means
    embeddings = outputs.last_hidden_state
    embeddings = embeddings.mean(dim=1)
    vector = embeddings.detach().cpu().numpy()
    vector = np.float32(vector)
    faiss.normalize_L2(vector)

    # Read the index file and perform search of top 50 images
    index = faiss.read_index("vector.index")
    distances, indices = index.search(vector, 50)

    matches = []

    for idx, matching_gamerpic in enumerate(indices[0]):
        gamerpic = {}
        gamerpic["cdn"] = images[matching_gamerpic]
        gamerpic["score"] = str(round((1 / (distances[0][idx] + 1) * 100), 2)) + "%"

        print(gamerpic)

        matches.append(gamerpic)

    return matches


# Create a Gradio interface
iface = gr.Interface(
    fn=process_image,
    inputs=gr.Image(type="pil"),  # Adjust the shape as needed
    outputs="json",  # Or any other output format that suits your needs
)

# Launch the Gradio app
iface.launch()