Spaces:
Sleeping
Sleeping
Commit
·
5fd9c92
1
Parent(s):
bcd5c09
app
Browse files
app.py
CHANGED
@@ -1,7 +1,122 @@
|
|
1 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
-
|
4 |
-
|
|
|
|
|
|
|
|
|
5 |
|
6 |
-
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
+
import torch
|
3 |
+
from datasets import load_dataset
|
4 |
+
from qdrant_client import QdrantClient
|
5 |
+
from qdrant_client.http import models
|
6 |
+
from colpali_engine.models import ColQwen2, ColQwen2Processor
|
7 |
+
from PIL import Image
|
8 |
+
import requests
|
9 |
+
from io import BytesIO
|
10 |
|
11 |
+
# Initialize the model, processor, and Qdrant client
|
12 |
+
model_name = "vidore/colqwen2-v0.1"
|
13 |
+
colpali_model = ColQwen2.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="cuda:0")
|
14 |
+
colpali_processor = ColQwen2Processor.from_pretrained(model_name)
|
15 |
+
qdrant_client = QdrantClient(":memory:")
|
16 |
+
collection_name = "image_collection"
|
17 |
|
18 |
+
# Load the dataset (this should be done only once when setting up the app)
|
19 |
+
dataset = load_dataset("davanstrien/loc-nineteenth-century-song-sheets", split="train")
|
20 |
+
|
21 |
+
def setup_qdrant():
|
22 |
+
# Create a collection in Qdrant
|
23 |
+
qdrant_client.recreate_collection(
|
24 |
+
collection_name=collection_name,
|
25 |
+
vectors_config=models.VectorParams(
|
26 |
+
size=colpali_model.config.hidden_size,
|
27 |
+
distance=models.Distance.COSINE,
|
28 |
+
multivector_config=models.MultiVectorConfig(
|
29 |
+
comparator=models.MultiVectorComparator.MAX_SIM
|
30 |
+
),
|
31 |
+
),
|
32 |
+
)
|
33 |
+
|
34 |
+
# Index the dataset (this should be done only once when setting up the app)
|
35 |
+
batch_size = 32
|
36 |
+
for i in range(0, len(dataset), batch_size):
|
37 |
+
batch = dataset[i:i+batch_size]
|
38 |
+
images = batch['image']
|
39 |
+
with torch.no_grad():
|
40 |
+
batch_images = colpali_processor.process_images(images).to(colpali_model.device)
|
41 |
+
image_embeddings = colpali_model(**batch_images)
|
42 |
+
|
43 |
+
points = []
|
44 |
+
for j, embedding in enumerate(image_embeddings):
|
45 |
+
multivector = embedding.cpu().float().numpy().tolist()
|
46 |
+
points.append(models.PointStruct(
|
47 |
+
id=i+j,
|
48 |
+
vector=multivector,
|
49 |
+
payload={
|
50 |
+
"item_id": batch['item_id'][j],
|
51 |
+
"item_url": batch['item_url'][j]
|
52 |
+
}
|
53 |
+
))
|
54 |
+
|
55 |
+
qdrant_client.upsert(
|
56 |
+
collection_name=collection_name,
|
57 |
+
points=points
|
58 |
+
)
|
59 |
+
|
60 |
+
print("Indexing complete!")
|
61 |
+
|
62 |
+
def search_similar_images(query, top_k=5, mode="text"):
|
63 |
+
with torch.no_grad():
|
64 |
+
if mode == "text":
|
65 |
+
batch_query = colpali_processor.process_queries([query]).to(colpali_model.device)
|
66 |
+
else: # Image mode
|
67 |
+
batch_query = colpali_processor.process_images([query]).to(colpali_model.device)
|
68 |
+
query_embedding = colpali_model(**batch_query)
|
69 |
+
|
70 |
+
multivector_query = query_embedding[0].cpu().float().numpy().tolist()
|
71 |
+
|
72 |
+
search_result = qdrant_client.search(
|
73 |
+
collection_name=collection_name,
|
74 |
+
query_vector=multivector_query,
|
75 |
+
limit=top_k
|
76 |
+
)
|
77 |
+
|
78 |
+
return search_result
|
79 |
+
|
80 |
+
def process_results(results):
|
81 |
+
output = []
|
82 |
+
for result in results:
|
83 |
+
item_url = result.payload['item_url']
|
84 |
+
score = result.score
|
85 |
+
output.append((item_url, f"Score: {score:.4f}"))
|
86 |
+
return output
|
87 |
+
|
88 |
+
def text_search(query, top_k):
|
89 |
+
results = search_similar_images(query, top_k, mode="text")
|
90 |
+
return process_results(results)
|
91 |
+
|
92 |
+
def image_search(image, top_k):
|
93 |
+
results = search_similar_images(image, top_k, mode="image")
|
94 |
+
return process_results(results)
|
95 |
+
|
96 |
+
# Set up the Gradio interface
|
97 |
+
with gr.Blocks() as demo:
|
98 |
+
gr.Markdown("# Image Search App")
|
99 |
+
gr.Markdown("Search for similar images using text or image input.")
|
100 |
+
|
101 |
+
with gr.Tab("Text Search"):
|
102 |
+
text_input = gr.Textbox(label="Enter your search query")
|
103 |
+
text_button = gr.Button("Search")
|
104 |
+
text_output = gr.Gallery(label="Results", show_label=False, elem_id="gallery").style(columns=[2], rows=[2], object_fit="contain", height="auto")
|
105 |
+
text_scores = gr.JSON(label="Scores")
|
106 |
+
|
107 |
+
with gr.Tab("Image Search"):
|
108 |
+
image_input = gr.Image(type="pil", label="Upload an image")
|
109 |
+
image_button = gr.Button("Search")
|
110 |
+
image_output = gr.Gallery(label="Results", show_label=False, elem_id="gallery").style(columns=[2], rows=[2], object_fit="contain", height="auto")
|
111 |
+
image_scores = gr.JSON(label="Scores")
|
112 |
+
|
113 |
+
top_k_slider = gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Number of results")
|
114 |
+
|
115 |
+
text_button.click(text_search, inputs=[text_input, top_k_slider], outputs=[text_output, text_scores])
|
116 |
+
image_button.click(image_search, inputs=[image_input, top_k_slider], outputs=[image_output, image_scores])
|
117 |
+
|
118 |
+
# Run the setup (this should be done only once when deploying the app)
|
119 |
+
setup_qdrant()
|
120 |
+
|
121 |
+
# Launch the app
|
122 |
+
demo.launch()
|