Spaces:

abven
/

ImageCaptionGenerator

Sleeping

App Files Files Community

VenkateshRoshan commited on Oct 6, 2024

Commit

be7ebcc

1 Parent(s): f3a635f

Initial Commit

Browse files

Files changed (8) hide show

ImageCaptioning.ipynb +0 -0
__init__.py +0 -0
__pycache__/config.cpython-310.pyc +0 -0
config/__pycache__/config.cpython-310.pyc +0 -0
config/config.py +11 -0
data/__pycache__/dataLoader.cpython-310.pyc +0 -0
data/dataLoader.py +52 -0
main.py +14 -0

ImageCaptioning.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

__init__.py ADDED Viewed

File without changes

__pycache__/config.cpython-310.pyc ADDED Viewed

Binary file (648 Bytes). View file

config/__pycache__/config.cpython-310.pyc ADDED Viewed

Binary file (659 Bytes). View file

config/config.py ADDED Viewed

	@@ -0,0 +1,11 @@

+import torch
+class Config:
+    IMAGE_SIZE = (224, 224)
+    MAX_SEQ_LEN = 64
+    VIT_MODEL = 'google/vit-base-patch16-224-in21k'
+    GPT2_MODEL = 'gpt2'
+    LEARNING_RATE = 5e-5
+    EPOCHS = 10
+    DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
+    AWS_S3_BUCKET = 'your-s3-bucket-name'
+    DATASET_PATH = '../Datasets/Flickr8K/'

data/__pycache__/dataLoader.cpython-310.pyc ADDED Viewed

Binary file (1.99 kB). View file

data/dataLoader.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import numpy as np
+import os
+import cv2
+from PIL import Image
+from torchvision import transforms
+import pandas as pd
+class dataLoader:
+    def __init__(self, path):
+        self.path = path
+        self.img_path = path + 'images/'
+        self.caption_path = path + 'captions.csv'
+        self.img_list = os.listdir(self.img_path)
+        self.caption_dict = self.get_caption_dict()
+        self.transform = transforms.Compose([
+            transforms.Resize((224, 224)),
+            transforms.ToTensor()
+        ])
+    def get_caption_dict(self):
+        caption_dict = {}
+        df = pd.read_csv(self.caption_path, delimiter=',')
+        for i in range(len(df)):
+            img_name = df.iloc[i, 0]
+            caption = df.iloc[i, 1]
+            caption_dict[img_name] = caption
+        return caption_dict
+    def get_image(self, img_name):
+        img = Image.open(self.img_path + img_name)
+        img = self.transform(img)
+        return img
+    def get_caption(self, img_name):
+        return self.caption_dict[img_name]
+    def get_batch(self, batch_size):
+        batch = np.random.choice(self.img_list, batch_size)
+        images = []
+        captions = []
+        for img_name in batch:
+            images.append(self.get_image(img_name))
+            captions.append(self.get_caption(img_name))
+        return images, captions
+    def get_all(self):
+        images = []
+        captions = []
+        for img_name in self.img_list:
+            images.append(self.get_image(img_name))
+            captions.append(self.get_caption(img_name))
+        return images, captions

main.py ADDED Viewed

	@@ -0,0 +1,14 @@

+import numpy as np
+import os
+import cv2
+from PIL import Image
+from matplotlib import pyplot as plt
+from config.config import Config
+from data.dataLoader import dataLoader
+if __name__ == '__main__':
+    dl = dataLoader(Config.DATASET_PATH)
+    images, captions = dl.get_all()
+    print('Number of images:', len(images))
+    print('Number of captions:', len(captions))