Merve Noyan
commited on
Commit
•
a9b77f8
1
Parent(s):
095a16c
initial commit
Browse files- .DS_Store +0 -0
- .gitattributes +2 -0
- decoder/.DS_Store +0 -0
- decoder/.gitattributes +1 -0
- decoder/keras_metadata_decoder.pb +3 -0
- decoder/model.h5 +3 -0
- decoder/saved_model.pb +3 -0
- decoder/variables/.gitattributes +3 -0
- decoder/variables/variables.data-00000-of-00001 +3 -0
- decoder/variables/variables.index +0 -0
- encoder/.DS_Store +0 -0
- encoder/.gitattributes +1 -0
- encoder/keras_metadata_encoder.pb +3 -0
- encoder/model.h5 +3 -0
- encoder/saved_model.pb +3 -0
- encoder/variables/.gitattributes +2 -0
- encoder/variables/variables.data-00000-of-00001 +3 -0
- encoder/variables/variables.index +0 -0
- pipeline.py +70 -0
.DS_Store
ADDED
Binary file (8.2 kB). View file
|
|
.gitattributes
CHANGED
@@ -25,3 +25,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
25 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
26 |
*.zstandard filter=lfs diff=lfs merge=lfs -text
|
27 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
25 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
26 |
*.zstandard filter=lfs diff=lfs merge=lfs -text
|
27 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
encoder filter=lfs diff=lfs merge=lfs -text
|
29 |
+
decoder filter=lfs diff=lfs merge=lfs -text
|
decoder/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
decoder/.gitattributes
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
model.h5 filter=lfs diff=lfs merge=lfs -text
|
decoder/keras_metadata_decoder.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e8ccb81427499e2bd01421f651a779215faeb2306ee3c998daadd78a6b6fcf28
|
3 |
+
size 10036
|
decoder/model.h5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fe2d82bd1ce2018eb05d618e5e33e1de50862f99b67670344a9b0440161e9bb5
|
3 |
+
size 24349932
|
decoder/saved_model.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d522774f3f411985799ed44b15176540ae1d0966e2e21e05e23450d9c8544679
|
3 |
+
size 797127
|
decoder/variables/.gitattributes
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
|
2 |
+
/Users/mervenoyan/Desktop/caption/image-captioning/encoder/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
|
3 |
+
/Users/mervenoyan/Desktop/caption/image-captioning/decoder/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
|
decoder/variables/variables.data-00000-of-00001
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e49b95332c394b3c1f844f4275cd315f4e0760f9fc0ea085b2bf85b1726e323a
|
3 |
+
size 24321506
|
decoder/variables/variables.index
ADDED
Binary file (992 Bytes). View file
|
|
encoder/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
encoder/.gitattributes
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
model.h5 filter=lfs diff=lfs merge=lfs -text
|
encoder/keras_metadata_encoder.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:972a2170cf22e5827c66ccedc5c2d10b5bee7b9a3808316a3377c14f1a463b99
|
3 |
+
size 1732
|
encoder/model.h5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b5b38e2f804ca719f2fe5a2fc31b4906bfc8502b5c057edfce08a9baa55443e6
|
3 |
+
size 2109280
|
encoder/saved_model.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f99dcc649b8ded921c0ccd0f3ef375ff2a3a498f710abe60655d1ccb40c9b074
|
3 |
+
size 37214
|
encoder/variables/.gitattributes
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
|
2 |
+
/Users/mervenoyan/Desktop/caption/image-captioning/encoder/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
|
encoder/variables/variables.data-00000-of-00001
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3c6e09edb4de9bb605fcaafb659672dfbce0dc7b7f8e730b8c054e1a3c80ad5c
|
3 |
+
size 2099011
|
encoder/variables/variables.index
ADDED
Binary file (270 Bytes). View file
|
|
pipeline.py
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import tensorflow as tf
|
2 |
+
import numpy as np
|
3 |
+
from tensorflow import keras
|
4 |
+
import os
|
5 |
+
from typing import Dict, List, Any
|
6 |
+
import pickle
|
7 |
+
from PIL import Image
|
8 |
+
class PreTrainedPipeline():
|
9 |
+
def __init__(self, path=""):
|
10 |
+
|
11 |
+
# load the model
|
12 |
+
self.decoder = keras.models.load_model(os.path.join(path, "decoder"))
|
13 |
+
self.decoder = keras.models.load_model(os.path.join(path, "encoder"))
|
14 |
+
|
15 |
+
image_model = tf.keras.applications.InceptionV3(include_top=False,
|
16 |
+
weights='imagenet')
|
17 |
+
new_input = image_model.input
|
18 |
+
hidden_layer = image_model.layers[-1].output
|
19 |
+
|
20 |
+
self.image_features_extract_model = tf.keras.Model(new_input, hidden_layer)
|
21 |
+
|
22 |
+
with open('tokenizer.pickle', 'rb') as handle:
|
23 |
+
self.tokenizer = pickle.load(handle)
|
24 |
+
|
25 |
+
|
26 |
+
|
27 |
+
def load_image(image_path):
|
28 |
+
img = tf.io.read_file(image_path)
|
29 |
+
img = tf.io.decode_jpeg(img, channels=3)
|
30 |
+
img = tf.image.resize(img, (299, 299))
|
31 |
+
img = tf.keras.applications.inception_v3.preprocess_input(img)
|
32 |
+
return img, image_path
|
33 |
+
|
34 |
+
def __call__(self, inputs: "Image.Image") -> List[Dict[str, Any]]:
|
35 |
+
"""
|
36 |
+
Args:
|
37 |
+
inputs (:obj:`PIL.Image`):
|
38 |
+
The raw image representation as PIL.
|
39 |
+
No transformation made whatsoever from the input. Make all necessary transformations here.
|
40 |
+
Return:
|
41 |
+
A :obj:`list`:. The list contains items that are dicts should be liked {"label": "XXX", "score": 0.82}
|
42 |
+
It is preferred if the returned list is in decreasing `score` order
|
43 |
+
"""
|
44 |
+
|
45 |
+
hidden = tf.zeros((1, 512))
|
46 |
+
|
47 |
+
temp_input = tf.expand_dims(load_image(image)[0], 0)
|
48 |
+
img_tensor_val = self.image_features_extract_model(temp_input)
|
49 |
+
img_tensor_val = tf.reshape(img_tensor_val, (img_tensor_val.shape[0],
|
50 |
+
-1,
|
51 |
+
img_tensor_val.shape[3]))
|
52 |
+
|
53 |
+
features = self.encoder(img_tensor_val)
|
54 |
+
|
55 |
+
dec_input = tf.expand_dims([self.tokenizer.word_index['<start>']], 0)
|
56 |
+
result = []
|
57 |
+
|
58 |
+
for i in range(max_length):
|
59 |
+
predictions, hidden, attention_weights = self.decoder(dec_input,
|
60 |
+
features,
|
61 |
+
hidden)
|
62 |
+
|
63 |
+
predicted_id = tf.random.categorical(predictions, 1)[0][0].numpy()
|
64 |
+
result.append(self.tokenizer.index_word[predicted_id])
|
65 |
+
|
66 |
+
if self.tokenizer.index_word[predicted_id] == '<end>':
|
67 |
+
return result
|
68 |
+
|
69 |
+
dec_input = tf.expand_dims([predicted_id], 0)
|
70 |
+
return result
|