Spaces:

pritish
/

Image-Captioning

Running

pritish commited on Aug 31, 2022

Commit

21bc5b0

1 Parent(s): d5a817a

new model added

Files changed (5) hide show

app.py CHANGED Viewed

@@ -2,7 +2,6 @@ import io
 import os
 import streamlit as st
 import requests
-import numpy as np
 from PIL import Image
 from model import get_caption_model, generate_caption
@@ -37,16 +36,15 @@ if (img_url != "") and (img_url != None):
     img = img.convert('RGB')
     st.image(img)
     img.save('tmp.jpg')
-    st.image(img)
     predict()
     os.remove('tmp.jpg')
 st.markdown('<center style="opacity: 70%">OR</center>', unsafe_allow_html=True)
-img = st.file_uploader(label='Upload Image', type=['jpg', 'png'])
-if img != None:
-    img = img.read()
     img = Image.open(io.BytesIO(img))
     img = img.convert('RGB')
     img.save('tmp.jpg')

 import os
 import streamlit as st
 import requests
 from PIL import Image
 from model import get_caption_model, generate_caption
     img = img.convert('RGB')
     st.image(img)
     img.save('tmp.jpg')
     predict()
     os.remove('tmp.jpg')
 st.markdown('<center style="opacity: 70%">OR</center>', unsafe_allow_html=True)
+img_upload = st.file_uploader(label='Upload Image', type=['jpg', 'png'])
+if img_upload != None:
+    img = img_upload.read()
     img = Image.open(io.BytesIO(img))
     img = img.convert('RGB')
     img.save('tmp.jpg')

model.py CHANGED Viewed

@@ -14,7 +14,7 @@ UNITS = 512
 # LOADING DATA
-vocab = pickle.load(open('saved_vocabulary/vocab_1.file', 'rb'))
 tokenizer = tf.keras.layers.TextVectorization(
     # max_tokens=VOCABULARY_SIZE,
@@ -36,7 +36,6 @@ def CNN_Encoder():
         include_top=False,
         weights='imagenet'
     )
-    inception_v3.trainable = False
     output = inception_v3.output
     output = tf.keras.layers.Reshape(
@@ -264,7 +263,7 @@ def load_image_from_path(img_path):
     img = tf.io.read_file(img_path)
     img = tf.io.decode_jpeg(img, channels=3)
     img = tf.keras.layers.Resizing(299, 299)(img)
-    img = img / 255.
     return img
@@ -322,8 +321,8 @@ def get_caption_model():
     caption_model.decoder(sample_y, sample_enc_out, training=False)
     try:
-        caption_model.load_weights('saved_models/image_captioning_transformer_weights_2.h5')
     except FileNotFoundError:
-        caption_model.load_weights('Image-Captioning/saved_models/image_captioning_transformer_weights_2.h5')
     return caption_model

 # LOADING DATA
+vocab = pickle.load(open('saved_vocabulary/vocab_coco.file', 'rb'))
 tokenizer = tf.keras.layers.TextVectorization(
     # max_tokens=VOCABULARY_SIZE,
         include_top=False,
         weights='imagenet'
     )
     output = inception_v3.output
     output = tf.keras.layers.Reshape(
     img = tf.io.read_file(img_path)
     img = tf.io.decode_jpeg(img, channels=3)
     img = tf.keras.layers.Resizing(299, 299)(img)
+    img = tf.keras.applications.inception_v3.preprocess_input(img)
     return img
     caption_model.decoder(sample_y, sample_enc_out, training=False)
     try:
+        caption_model.load_weights('saved_models/image_captioning_coco_weights.h5')
     except FileNotFoundError:
+        caption_model.load_weights('Image-Captioning/saved_models/image_captioning_coco_weights.h5')
     return caption_model

saved_models/{image_captioning_transformer_weights_1.h5 → image_captioning_coco_weights.h5} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4feab5df7dc83396210b152594e0abb31ef7a9a584a9146461aa585752a37ffb
-size 201652392

 version https://git-lfs.github.com/spec/v1
+oid sha256:1e84cba7f9b81e113d6cf938f1c03eff9be2f91932edf36ba1d43a14a63eeb23
+size 214197308

saved_models/image_captioning_transformer_weights_3.h5 DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:61d25373a21ccddf8bf24f67b2a1bb2037d565f8e88c85e20e7f5ef8527a90d9
-size 247484996

saved_vocabulary/{vocab_2.file → vocab_coco.file} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5397f3824177102eff3f2fe01eb8b6fffbe97671a8e8b102e20bd5e8e2064b7e
-size 1985473

 version https://git-lfs.github.com/spec/v1
+oid sha256:41e68329402249568d75664e814571ccfb8471da72894cffca32cb534a9ef1ea
+size 1153949