marinap commited on
Commit
e173a84
1 Parent(s): b19096b

initial commit

Browse files
app.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import requests
3
+ import numpy as np
4
+ import pandas as pd
5
+ import torch
6
+ import torch.nn.functional as F
7
+ from PIL import Image
8
+ import gradio as gr
9
+ import uform
10
+
11
+
12
+ model_multi = uform.get_model('unum-cloud/uform-vl-multilingual')
13
+
14
+ embeddings = np.load('multilingual-image-search/tensors/embeddings.npy')
15
+ embeddings = torch.tensor(embeddings)
16
+
17
+ #features = np.load('multilingual-image-search/tensors/features.npy')
18
+ #features = torch.tensor(features)
19
+
20
+ img_df = pd.read_csv('multilingual-image-search/image_data.csv')
21
+
22
+ def url2img(url):
23
+ data = requests.get(url, allow_redirects = True).content
24
+ #return Image.open(io.BytesIO(data))
25
+ return data
26
+
27
+ def find_topk(text):
28
+
29
+ top_k = 10
30
+
31
+ text_data = model_multi.preprocess_text(text)
32
+ text_features, text_embedding = model_multi.encode_text(text_data, return_features=True)
33
+
34
+ sims = F.cosine_similarity(text_embedding, embeddings)
35
+
36
+ vals, inds = sims.topk(top_k)
37
+
38
+ top_k_urls = img_df.iloc[inds]['url'].values[0]
39
+
40
+ return url2img(top_k_urls)
41
+
42
+
43
+
44
+ # def rerank(text_features, text_data):
45
+
46
+ # # craet joint embeddings & get scores
47
+ # joint_embedding = model_multi.encode_multimodal(
48
+ # image_features=image_features,
49
+ # text_features=text_features,
50
+ # attention_mask=text_data['attention_mask']
51
+ # )
52
+ # score = model_multi.get_matching_scores(joint_embedding)
53
+
54
+ # # argmax to get top N
55
+
56
+ # return
57
+
58
+
59
+ demo = gr.Interface(find_topk, inputs = 'text', outputs = 'image')
60
+ if __name__ == "__main__":
61
+ demo.launch()
image_data.csv ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ numpy
2
+ torch
3
+ uform
4
+ scikit-learn
5
+ npy-append-array
tensors/embeddings.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:654724f18089334fdf0690a8da1b530b293d0d1d4815138736485fe26bd9e18b
3
+ size 25585792
tensors/features.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:222f3ea4591231f42cf65337053e925ec22290287fd42515c5db94662e3ac776
3
+ size 15121127552