davanstrien HF staff commited on
Commit
5fd9c92
·
1 Parent(s): bcd5c09
Files changed (1) hide show
  1. app.py +119 -4
app.py CHANGED
@@ -1,7 +1,122 @@
1
  import gradio as gr
 
 
 
 
 
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
 
 
 
 
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import torch
3
+ from datasets import load_dataset
4
+ from qdrant_client import QdrantClient
5
+ from qdrant_client.http import models
6
+ from colpali_engine.models import ColQwen2, ColQwen2Processor
7
+ from PIL import Image
8
+ import requests
9
+ from io import BytesIO
10
 
11
+ # Initialize the model, processor, and Qdrant client
12
+ model_name = "vidore/colqwen2-v0.1"
13
+ colpali_model = ColQwen2.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="cuda:0")
14
+ colpali_processor = ColQwen2Processor.from_pretrained(model_name)
15
+ qdrant_client = QdrantClient(":memory:")
16
+ collection_name = "image_collection"
17
 
18
+ # Load the dataset (this should be done only once when setting up the app)
19
+ dataset = load_dataset("davanstrien/loc-nineteenth-century-song-sheets", split="train")
20
+
21
+ def setup_qdrant():
22
+ # Create a collection in Qdrant
23
+ qdrant_client.recreate_collection(
24
+ collection_name=collection_name,
25
+ vectors_config=models.VectorParams(
26
+ size=colpali_model.config.hidden_size,
27
+ distance=models.Distance.COSINE,
28
+ multivector_config=models.MultiVectorConfig(
29
+ comparator=models.MultiVectorComparator.MAX_SIM
30
+ ),
31
+ ),
32
+ )
33
+
34
+ # Index the dataset (this should be done only once when setting up the app)
35
+ batch_size = 32
36
+ for i in range(0, len(dataset), batch_size):
37
+ batch = dataset[i:i+batch_size]
38
+ images = batch['image']
39
+ with torch.no_grad():
40
+ batch_images = colpali_processor.process_images(images).to(colpali_model.device)
41
+ image_embeddings = colpali_model(**batch_images)
42
+
43
+ points = []
44
+ for j, embedding in enumerate(image_embeddings):
45
+ multivector = embedding.cpu().float().numpy().tolist()
46
+ points.append(models.PointStruct(
47
+ id=i+j,
48
+ vector=multivector,
49
+ payload={
50
+ "item_id": batch['item_id'][j],
51
+ "item_url": batch['item_url'][j]
52
+ }
53
+ ))
54
+
55
+ qdrant_client.upsert(
56
+ collection_name=collection_name,
57
+ points=points
58
+ )
59
+
60
+ print("Indexing complete!")
61
+
62
+ def search_similar_images(query, top_k=5, mode="text"):
63
+ with torch.no_grad():
64
+ if mode == "text":
65
+ batch_query = colpali_processor.process_queries([query]).to(colpali_model.device)
66
+ else: # Image mode
67
+ batch_query = colpali_processor.process_images([query]).to(colpali_model.device)
68
+ query_embedding = colpali_model(**batch_query)
69
+
70
+ multivector_query = query_embedding[0].cpu().float().numpy().tolist()
71
+
72
+ search_result = qdrant_client.search(
73
+ collection_name=collection_name,
74
+ query_vector=multivector_query,
75
+ limit=top_k
76
+ )
77
+
78
+ return search_result
79
+
80
+ def process_results(results):
81
+ output = []
82
+ for result in results:
83
+ item_url = result.payload['item_url']
84
+ score = result.score
85
+ output.append((item_url, f"Score: {score:.4f}"))
86
+ return output
87
+
88
+ def text_search(query, top_k):
89
+ results = search_similar_images(query, top_k, mode="text")
90
+ return process_results(results)
91
+
92
+ def image_search(image, top_k):
93
+ results = search_similar_images(image, top_k, mode="image")
94
+ return process_results(results)
95
+
96
+ # Set up the Gradio interface
97
+ with gr.Blocks() as demo:
98
+ gr.Markdown("# Image Search App")
99
+ gr.Markdown("Search for similar images using text or image input.")
100
+
101
+ with gr.Tab("Text Search"):
102
+ text_input = gr.Textbox(label="Enter your search query")
103
+ text_button = gr.Button("Search")
104
+ text_output = gr.Gallery(label="Results", show_label=False, elem_id="gallery").style(columns=[2], rows=[2], object_fit="contain", height="auto")
105
+ text_scores = gr.JSON(label="Scores")
106
+
107
+ with gr.Tab("Image Search"):
108
+ image_input = gr.Image(type="pil", label="Upload an image")
109
+ image_button = gr.Button("Search")
110
+ image_output = gr.Gallery(label="Results", show_label=False, elem_id="gallery").style(columns=[2], rows=[2], object_fit="contain", height="auto")
111
+ image_scores = gr.JSON(label="Scores")
112
+
113
+ top_k_slider = gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Number of results")
114
+
115
+ text_button.click(text_search, inputs=[text_input, top_k_slider], outputs=[text_output, text_scores])
116
+ image_button.click(image_search, inputs=[image_input, top_k_slider], outputs=[image_output, image_scores])
117
+
118
+ # Run the setup (this should be done only once when deploying the app)
119
+ setup_qdrant()
120
+
121
+ # Launch the app
122
+ demo.launch()