Update app.py
Browse files
app.py
CHANGED
@@ -7,6 +7,7 @@ import pandas as pd
|
|
7 |
import hashlib
|
8 |
import numpy as np
|
9 |
import cv2
|
|
|
10 |
from PIL import Image
|
11 |
|
12 |
# if you changed the MLP architecture during training, change it also here:
|
@@ -49,11 +50,6 @@ def phashstr(image, hash_size=8, highfreq_factor=4):
|
|
49 |
diff = dctlowfreq > med
|
50 |
return _binary_array_to_hex(diff.flatten())
|
51 |
|
52 |
-
def normalized(a, axis=-1, order=2):
|
53 |
-
l2 = np.atleast_1d(np.linalg.norm(a, order, axis))
|
54 |
-
l2[l2 == 0] = 1
|
55 |
-
return a / np.expand_dims(l2, axis)
|
56 |
-
|
57 |
def convert_numpy_types(data):
|
58 |
if isinstance(data, dict):
|
59 |
return {key: convert_numpy_types(value) for key, value in data.items()}
|
@@ -65,15 +61,28 @@ def convert_numpy_types(data):
|
|
65 |
return int(data)
|
66 |
else:
|
67 |
return data
|
|
|
|
|
|
|
68 |
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
device = "cuda" if torch.cuda.is_available() else "cpu"
|
73 |
|
74 |
-
|
75 |
-
|
76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
|
78 |
image = Image.fromarray(image)
|
79 |
image_np = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
|
@@ -81,15 +90,24 @@ def predict(image):
|
|
81 |
phash = phashstr(image)
|
82 |
md5 = hashlib.md5(image.tobytes()).hexdigest()
|
83 |
sha1 = hashlib.sha1(image.tobytes()).hexdigest()
|
84 |
-
|
85 |
inputs = preprocess(image).unsqueeze(0).to(device)
|
86 |
|
87 |
with torch.no_grad():
|
|
|
88 |
img_emb = model2.encode_image(inputs)
|
89 |
-
|
90 |
-
|
91 |
-
|
|
|
|
|
|
|
|
|
92 |
|
|
|
|
|
|
|
|
|
|
|
93 |
result = {
|
94 |
"clip_aesthetic": prediction,
|
95 |
"phash": phash,
|
|
|
7 |
import hashlib
|
8 |
import numpy as np
|
9 |
import cv2
|
10 |
+
import time
|
11 |
from PIL import Image
|
12 |
|
13 |
# if you changed the MLP architecture during training, change it also here:
|
|
|
50 |
diff = dctlowfreq > med
|
51 |
return _binary_array_to_hex(diff.flatten())
|
52 |
|
|
|
|
|
|
|
|
|
|
|
53 |
def convert_numpy_types(data):
|
54 |
if isinstance(data, dict):
|
55 |
return {key: convert_numpy_types(value) for key, value in data.items()}
|
|
|
61 |
return int(data)
|
62 |
else:
|
63 |
return data
|
64 |
+
|
65 |
+
def normalized_np(a, axis=-1, order=2):
|
66 |
+
import numpy as np # pylint: disable=import-outside-toplevel
|
67 |
|
68 |
+
l2 = np.atleast_1d(np.linalg.norm(a, order, axis))
|
69 |
+
l2[l2 == 0] = 1
|
70 |
+
return a / np.expand_dims(l2, axis)
|
|
|
71 |
|
72 |
+
def normalized(a, axis=-1, order=2):
|
73 |
+
l2 = torch.linalg.norm(a, dim=axis, ord=order, keepdim=True)
|
74 |
+
l2[l2 == 0] = 1
|
75 |
+
return a / l2
|
76 |
+
|
77 |
+
|
78 |
+
model = MLP(768) # CLIP embedding dim is 768 for CLIP ViT L 14
|
79 |
+
pthpath = "https://huggingface.co/haor/aesthetics/resolve/main/sac%2Blogos%2Bava1-l14-linearMSE.pth"
|
80 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
81 |
+
model.load_state_dict(torch.hub.load_state_dict_from_url(pthpath, map_location=device))
|
82 |
+
model.to(device).eval()
|
83 |
+
model2, preprocess = clip.load("ViT-L/14", device=device)
|
84 |
+
|
85 |
+
def predict(image):
|
86 |
|
87 |
image = Image.fromarray(image)
|
88 |
image_np = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
|
|
|
90 |
phash = phashstr(image)
|
91 |
md5 = hashlib.md5(image.tobytes()).hexdigest()
|
92 |
sha1 = hashlib.sha1(image.tobytes()).hexdigest()
|
|
|
93 |
inputs = preprocess(image).unsqueeze(0).to(device)
|
94 |
|
95 |
with torch.no_grad():
|
96 |
+
start_time = time.time()
|
97 |
img_emb = model2.encode_image(inputs)
|
98 |
+
end_time = time.time()
|
99 |
+
print(f"Encoding image took {end_time - start_time} seconds")
|
100 |
+
|
101 |
+
start_time = time.time()
|
102 |
+
img_emb = normalized(img_emb).float()
|
103 |
+
end_time = time.time()
|
104 |
+
print(f"Normalizing image took {end_time - start_time} seconds")
|
105 |
|
106 |
+
start_time = time.time()
|
107 |
+
prediction = model(img_emb).item()
|
108 |
+
end_time = time.time()
|
109 |
+
print(f"Making prediction took {end_time - start_time} seconds")
|
110 |
+
|
111 |
result = {
|
112 |
"clip_aesthetic": prediction,
|
113 |
"phash": phash,
|