Jerome Blin commited on
Commit
7484424
1 Parent(s): ee9b74a

Add application file

Browse files
Files changed (32) hide show
  1. app.py +1 -0
  2. fis/__init__.py +0 -0
  3. fis/__pycache__/__init__.cpython-37.pyc +0 -0
  4. fis/app/__pycache__/app.cpython-37.pyc +0 -0
  5. fis/app/app.py +39 -0
  6. fis/feature_extraction/__pycache__/run.cpython-37.pyc +0 -0
  7. fis/feature_extraction/detection/__pycache__/base.cpython-37.pyc +0 -0
  8. fis/feature_extraction/detection/__pycache__/dummy.cpython-37.pyc +0 -0
  9. fis/feature_extraction/detection/base.py +18 -0
  10. fis/feature_extraction/detection/dummy.py +23 -0
  11. fis/feature_extraction/embedding/__pycache__/base.cpython-37.pyc +0 -0
  12. fis/feature_extraction/embedding/__pycache__/timm.cpython-37.pyc +0 -0
  13. fis/feature_extraction/embedding/base.py +18 -0
  14. fis/feature_extraction/embedding/timm.py +63 -0
  15. fis/feature_extraction/pipeline/__pycache__/base.cpython-37.pyc +0 -0
  16. fis/feature_extraction/pipeline/__pycache__/factory.cpython-37.pyc +0 -0
  17. fis/feature_extraction/pipeline/__pycache__/pipeline.cpython-37.pyc +0 -0
  18. fis/feature_extraction/pipeline/base.py +85 -0
  19. fis/feature_extraction/pipeline/factory.py +51 -0
  20. fis/feature_extraction/pipeline/pipeline.py +11 -0
  21. fis/feature_extraction/run.py +51 -0
  22. fis/similarity_search/milvus/__pycache__/collection.cpython-37.pyc +0 -0
  23. fis/similarity_search/milvus/collection.py +58 -0
  24. fis/utils/__pycache__/config.cpython-37.pyc +0 -0
  25. fis/utils/__pycache__/constants.cpython-37.pyc +0 -0
  26. fis/utils/__pycache__/s3.cpython-37.pyc +0 -0
  27. fis/utils/config.py +16 -0
  28. fis/utils/constants.py +1 -0
  29. fis/utils/data/__pycache__/download_fashionpedia.cpython-37.pyc +0 -0
  30. fis/utils/data/download_fashionpedia.py +56 -0
  31. fis/utils/s3.py +38 -0
  32. requirements.txt +0 -0
app.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from fis.app import app # noqa: F401
fis/__init__.py ADDED
File without changes
fis/__pycache__/__init__.cpython-37.pyc ADDED
Binary file (149 Bytes). View file
 
fis/app/__pycache__/app.cpython-37.pyc ADDED
Binary file (1.35 kB). View file
 
fis/app/app.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import List
3
+
4
+ import gradio as gr
5
+ import numpy as np
6
+ from datasets import load_dataset
7
+ from PIL.Image import Image as Img
8
+
9
+ from fis.feature_extraction.pipeline.pipeline import factory
10
+ from fis.utils.constants import ORGANISATION
11
+ from fis.utils.s3 import read_image_from_s3
12
+
13
+ # Ugly fix of "OMP: Error #15: Initializing libomp.a, but found libiomp5.dylib already initialized."
14
+ os.environ["KMP_DUPLICATE_LIB_OK"] = "True"
15
+
16
+
17
+ PIPELINE_NAME = "dummy_swin_pipe"
18
+
19
+ pipeline = factory.get(PIPELINE_NAME)
20
+
21
+ DATASET_PATH = os.path.join(ORGANISATION, "dummy_swin_pipe_debug")
22
+ dataset = load_dataset(path=DATASET_PATH, split="train")
23
+ dataset.add_faiss_index(column="embedding")
24
+
25
+
26
+ def find_most_similar(image: np.ndarray) -> List[Img]:
27
+ image_embeddings = pipeline.encode(image)[0]
28
+
29
+ scores, samples = dataset.get_nearest_examples("embedding", image_embeddings, k=5)
30
+
31
+ images = []
32
+ for image_path in samples["path"]:
33
+ image = read_image_from_s3(image_path)
34
+ images.append(image)
35
+
36
+ return images
37
+
38
+
39
+ gr.Interface(fn=find_most_similar, inputs="image", outputs=["image" for i in range(5)]).launch()
fis/feature_extraction/__pycache__/run.cpython-37.pyc ADDED
Binary file (1.52 kB). View file
 
fis/feature_extraction/detection/__pycache__/base.cpython-37.pyc ADDED
Binary file (763 Bytes). View file
 
fis/feature_extraction/detection/__pycache__/dummy.cpython-37.pyc ADDED
Binary file (962 Bytes). View file
 
fis/feature_extraction/detection/base.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from abc import ABC, abstractmethod
2
+
3
+ from PIL import Image
4
+
5
+
6
+ class BaseDetector(ABC):
7
+ """Base class for detection models."""
8
+
9
+ @abstractmethod
10
+ def __call__(self, image: Image) -> None:
11
+ """Get embeddings from an image.
12
+
13
+ Args:
14
+ image: Image to encode
15
+
16
+ Returns:
17
+ Embedding
18
+ """
fis/feature_extraction/detection/dummy.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Tuple
2
+
3
+ from PIL import Image
4
+
5
+ from fis.feature_extraction.detection.base import BaseDetector
6
+
7
+
8
+ class DummyDetector(BaseDetector):
9
+ """Dummy detection model."""
10
+
11
+ def __call__(self, image: Image) -> List[Tuple[int]]:
12
+ """Return a bounding box with the same size as the image.
13
+
14
+ Args:
15
+ image: Image
16
+
17
+ Returns:
18
+ Dummy bounding box the same size as the image
19
+ """
20
+ x_min, y_min = 0, 0
21
+ x_max, y_max = image.size
22
+
23
+ return [(x_min, y_min, x_max, y_max)]
fis/feature_extraction/embedding/__pycache__/base.cpython-37.pyc ADDED
Binary file (753 Bytes). View file
 
fis/feature_extraction/embedding/__pycache__/timm.cpython-37.pyc ADDED
Binary file (2.19 kB). View file
 
fis/feature_extraction/embedding/base.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from abc import ABC, abstractmethod
2
+
3
+ from PIL import Image
4
+
5
+
6
+ class BaseEncoder(ABC):
7
+ """Base class for encoders."""
8
+
9
+ @abstractmethod
10
+ def __call__(self, image: Image) -> None:
11
+ """Get embeddings from an image.
12
+
13
+ Args:
14
+ image: Image to encode
15
+
16
+ Returns:
17
+ Embedding
18
+ """
fis/feature_extraction/embedding/timm.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Callable, Tuple
2
+
3
+ import timm
4
+ import torch
5
+ from PIL import Image
6
+ from timm.data import resolve_data_config
7
+ from timm.data.transforms_factory import create_transform
8
+
9
+ from fis.feature_extraction.embedding.base import BaseEncoder
10
+
11
+
12
+ class TimmModel(BaseEncoder):
13
+ """Base class for timm models."""
14
+
15
+ def __init__(self, model_name: str) -> None:
16
+ """Instantiate the model class.
17
+
18
+ Args:
19
+ model_name: Name of the model in the timm library.
20
+ """
21
+ model, transform = self._creat_timm_model(model_name=model_name)
22
+
23
+ self._model_name = model_name
24
+ self._model = model
25
+ self._transform = transform
26
+
27
+ @property
28
+ def model_name(self) -> str:
29
+ return self._model_name
30
+
31
+ @staticmethod
32
+ def _creat_timm_model(model_name: str) -> Tuple[torch.nn.Module, Callable]:
33
+ """Create a model and its assitiated configuration.
34
+
35
+ Args:
36
+ model_name: Name of the model in the timm library.
37
+
38
+ Returns:
39
+ model and transformation function for input images.
40
+ """
41
+ model = timm.create_model(model_name=model_name, pretrained=True, num_classes=0)
42
+ model.eval()
43
+
44
+ config = resolve_data_config({}, model=model)
45
+ transform = create_transform(**config)
46
+
47
+ return model, transform
48
+
49
+ def __call__(self, image: Image) -> torch.Tensor:
50
+ """Get embeddings from an image.
51
+
52
+ Args:
53
+ image: Image to encode
54
+
55
+ Returns:
56
+ Embedding
57
+ """
58
+ tensor = self._transform(image).unsqueeze(0) # transform and add batch dimension
59
+
60
+ with torch.no_grad():
61
+ embedding = self._model(tensor)
62
+
63
+ return embedding.numpy()[0]
fis/feature_extraction/pipeline/__pycache__/base.cpython-37.pyc ADDED
Binary file (2.77 kB). View file
 
fis/feature_extraction/pipeline/__pycache__/factory.cpython-37.pyc ADDED
Binary file (2.14 kB). View file
 
fis/feature_extraction/pipeline/__pycache__/pipeline.cpython-37.pyc ADDED
Binary file (569 Bytes). View file
 
fis/feature_extraction/pipeline/base.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, List
2
+
3
+ import numpy as np
4
+ import torch
5
+ from PIL import Image
6
+ from PIL.Image import Image as Img
7
+
8
+ from fis.feature_extraction.detection.base import BaseDetector
9
+ from fis.feature_extraction.embedding.base import BaseEncoder
10
+
11
+
12
+ class EncodingPipeline:
13
+ """Apply the detection and embedding models to an image."""
14
+
15
+ def __init__(self, name: str, detection_model: BaseDetector, embedding_model: BaseEncoder) -> None:
16
+ """Initialize the encoding pipeline.
17
+
18
+ Args:
19
+ name: Name of the pipeline.
20
+ detection_model: Model used to detect the fashion items in the images.
21
+ embedding_model: Model used to generate embeddings for each detected item.
22
+ """
23
+ self._name = name
24
+ self._detection_model = detection_model
25
+ self._embedding_model = embedding_model
26
+
27
+ def encode(self, image: str) -> List[torch.Tensor]:
28
+ """Encode each item from an image into a embedding.
29
+
30
+ Args:
31
+ image: path to the image.
32
+
33
+ Returns:
34
+ Embeddings for each detected item in the image.
35
+ """
36
+ image = self._load_images(image)
37
+ bboxes = self._detection_model(image)
38
+ items = self._crop_images(image, bboxes)
39
+
40
+ embeddings = []
41
+ for item in items:
42
+ embedding = self._embedding_model(item)
43
+ embeddings.append(embedding)
44
+
45
+ return embeddings
46
+
47
+ def _load_images(self, image: Any) -> Img:
48
+ """Read an image from disk.
49
+
50
+ Args:
51
+ image: Path to the image on disk.
52
+
53
+ Raises:
54
+ TypeError: if the type of image is incorrect.
55
+
56
+ Returns:
57
+ PIL Image.
58
+ """
59
+ if isinstance(image, Img):
60
+ pass
61
+ elif isinstance(image, np.ndarray):
62
+ image = Image.fromarray(image)
63
+ elif isinstance(image, str):
64
+ image = Image.open(image)
65
+ else:
66
+ raise TypeError(f"Unknown type for image: {type(image)}")
67
+
68
+ return image
69
+
70
+ def _crop_images(self, image, bboxes) -> List[Img]:
71
+ """Crop an image based on bounding boxes.
72
+
73
+ Args:
74
+ image: Image to crop items from.
75
+ bboxes: Bounding box containing an item.
76
+
77
+ Returns:
78
+ List of cropped images.
79
+ """
80
+ items = []
81
+ for bbox in bboxes:
82
+ cropped_image = image.crop(bbox)
83
+ items.append(cropped_image)
84
+
85
+ return items
fis/feature_extraction/pipeline/factory.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fis.feature_extraction.detection.base import BaseDetector
2
+ from fis.feature_extraction.embedding.base import BaseEncoder
3
+ from fis.feature_extraction.pipeline.base import EncodingPipeline
4
+
5
+
6
+ class PipelineFactory:
7
+ """Factory method for encoding pipelines.
8
+
9
+ Example use:
10
+ >>> from fis.feature_extraction.pipeline.factory import PipelineFactory
11
+ >>> factory = PipelineFactory()
12
+ >>> factory.register_pipeline(
13
+ ... name="example_pipeline",
14
+ ... detection_model=BaseDetector(),
15
+ ... embedding_model=BaseEncoder()
16
+ ... )
17
+ >>> pipeline = factory.get('example_pipeline')
18
+ """
19
+
20
+ def __init__(self):
21
+ """Instantiate factory object."""
22
+ self._pipelines = {}
23
+
24
+ def register_pipeline(self, name: str, detection_model: BaseDetector, embedding_model: BaseEncoder) -> None:
25
+ """Register a new pipeline to the factory.
26
+
27
+ Args:
28
+ name: Name of the pipeline to create.
29
+ detection_model: Instance of a BaseDetector object.
30
+ embedding_model: Instance of a BaseEncoder object.
31
+ """
32
+ pipeline = EncodingPipeline(name=name, detection_model=detection_model, embedding_model=embedding_model)
33
+ self._pipelines[name] = pipeline
34
+
35
+ def get(self, name: str) -> EncodingPipeline:
36
+ """Get a pipeline from its name.
37
+
38
+ Args:
39
+ name: Name of the pipeline to get.
40
+
41
+ Raises:
42
+ ValueError: If no pipeline has been registered with the given name.
43
+
44
+ Returns:
45
+ Encoding pipeline.
46
+ """
47
+ pipeline = self._pipelines.get(name)
48
+ if not pipeline:
49
+ raise ValueError(name)
50
+
51
+ return pipeline
fis/feature_extraction/pipeline/pipeline.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fis.feature_extraction.detection.dummy import DummyDetector
2
+ from fis.feature_extraction.embedding.timm import TimmModel
3
+ from fis.feature_extraction.pipeline.factory import PipelineFactory
4
+
5
+ factory = PipelineFactory()
6
+
7
+ factory.register_pipeline(
8
+ name="dummy_swin_pipe",
9
+ detection_model=DummyDetector(),
10
+ embedding_model=TimmModel(model_name="swinv2_base_window8_256"),
11
+ )
fis/feature_extraction/run.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from datasets import Dataset
3
+ from tqdm import tqdm
4
+
5
+ from fis.feature_extraction.pipeline.pipeline import factory
6
+ from fis.utils.constants import ORGANISATION
7
+ from fis.utils.s3 import list_images_from_bucket, read_image_from_s3
8
+
9
+
10
+ def make_dataset(pipeline_name: str) -> Dataset:
11
+ print("Listing images from S3...")
12
+ images = list_images_from_bucket()
13
+ images = images[:100000]
14
+ print(f"{len(images)} images to process.")
15
+
16
+ pipeline = factory.get(pipeline_name)
17
+ data = []
18
+
19
+ print("Encoding images...")
20
+ for image_name in tqdm(images):
21
+ image = read_image_from_s3(image_name)
22
+ embeddings = pipeline.encode(image)
23
+
24
+ for embedding in embeddings:
25
+ image_data = {
26
+ "path": image_name,
27
+ "embedding": embedding,
28
+ }
29
+
30
+ data.append(image_data)
31
+
32
+ df = pd.DataFrame(data)
33
+ dataset = Dataset.from_pandas(df)
34
+
35
+ return dataset
36
+
37
+
38
+ def upload_dataset(dataset: Dataset, pipeline_name: str) -> None:
39
+ print("Uploading dataset...")
40
+ repo_id = "{}/{}".format(ORGANISATION, pipeline_name)
41
+ dataset.push_to_hub(repo_id=repo_id)
42
+
43
+
44
+ def main():
45
+ pipeline_name = "dummy_swin_pipe"
46
+ dataset = make_dataset(pipeline_name=pipeline_name)
47
+ upload_dataset(dataset=dataset, pipeline_name=pipeline_name)
48
+
49
+
50
+ if __name__ == "__main__":
51
+ main()
fis/similarity_search/milvus/__pycache__/collection.cpython-37.pyc ADDED
Binary file (1.47 kB). View file
 
fis/similarity_search/milvus/collection.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pymilvus import (
2
+ Collection,
3
+ CollectionSchema,
4
+ DataType,
5
+ FieldSchema,
6
+ connections,
7
+ utility,
8
+ )
9
+
10
+ connections.connect(host="127.0.0.1", port="19530")
11
+
12
+
13
+ def create_milvus_collection(collection_name: str, dim: int) -> Collection:
14
+ """Create a Milvus collection.
15
+
16
+ Inspired by https://github.com/milvus-io/bootcamp/blob/master/solutions/reverse_image_search/1_build_image_search_engine.ipynb
17
+
18
+ Args:
19
+ collection_name: name of the Milvus collection
20
+ dim: number of dimentions
21
+
22
+ Returns:
23
+ Milvus collection
24
+ """
25
+ if utility.has_collection(collection_name):
26
+ utility.drop_collection(collection_name)
27
+
28
+ fields = [
29
+ FieldSchema(
30
+ name="id",
31
+ dtype=DataType.INT64,
32
+ descrition="ids",
33
+ is_primary=True,
34
+ auto_id=False,
35
+ ),
36
+ FieldSchema(
37
+ name="path",
38
+ dtype=DataType.VARCHAR,
39
+ descrition="path to image",
40
+ max_length=500,
41
+ # is_primary=True,
42
+ # auto_id=False,
43
+ ),
44
+ FieldSchema(
45
+ name="embedding",
46
+ dtype=DataType.FLOAT_VECTOR,
47
+ descrition="image embedding vectors",
48
+ dim=dim,
49
+ ),
50
+ ]
51
+
52
+ schema = CollectionSchema(fields=fields, description="reverse image search")
53
+ collection = Collection(name=collection_name, schema=schema)
54
+
55
+ index_params = {"metric_type": "L2", "index_type": "IVF_FLAT", "params": {"nlist": 2048}}
56
+ collection.create_index(field_name="embedding", index_params=index_params)
57
+
58
+ return collection
fis/utils/__pycache__/config.cpython-37.pyc ADDED
Binary file (520 Bytes). View file
 
fis/utils/__pycache__/constants.cpython-37.pyc ADDED
Binary file (185 Bytes). View file
 
fis/utils/__pycache__/s3.cpython-37.pyc ADDED
Binary file (1.06 kB). View file
 
fis/utils/config.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+ from dotenv import load_dotenv
4
+
5
+ # Read environment variables from .env file.
6
+ load_dotenv()
7
+
8
+ DIR_ROOT = Path(__file__).resolve().parent.parent.parent
9
+ DIR_DATA = DIR_ROOT / "data"
10
+ DIR_SCRAPING = DIR_DATA / "scraping"
11
+ DIR_SCRAPING_IMAGES = DIR_SCRAPING / "images"
12
+
13
+ S3_BUCKET = "fashion-img-search"
14
+ S3_BUCKET_IMAGES = f"s3://{S3_BUCKET}/images/"
15
+
16
+ FILE_SCRAPING_DATA = "items.jsonl"
fis/utils/constants.py ADDED
@@ -0,0 +1 @@
 
 
1
+ ORGANISATION = "FSDL-Fashion"
fis/utils/data/__pycache__/download_fashionpedia.cpython-37.pyc ADDED
Binary file (1.81 kB). View file
 
fis/utils/data/download_fashionpedia.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import urllib.request
3
+ import zipfile
4
+
5
+ from fis.utils.config import DIR_DATA
6
+
7
+ # Download from S3
8
+ RAW_TRAIN_IMAGES = "https://s3.amazonaws.com/ifashionist-dataset/images/train2020.zip"
9
+ RAW_VAL_IMAGES = "https://s3.amazonaws.com/ifashionist-dataset/images/val_test2020.zip"
10
+ RAW_TRAIN_ANNOTATIONS = "https://s3.amazonaws.com/ifashionist-dataset/annotations/instances_attributes_train2020.json"
11
+ RAW_VAL_ANNOTATIONS = "https://s3.amazonaws.com/ifashionist-dataset/annotations/instances_attributes_val2020.json"
12
+
13
+ # to local disk
14
+ TRAIN_ANNOTATIONS = "train.json"
15
+ VAL_ANNOTATIONS = "val.json"
16
+
17
+
18
+ def download(url: str, target: str) -> None:
19
+ """Download image and annotations.
20
+
21
+ Args:
22
+ url: url to download from.
23
+ target: file or directory to download to.
24
+ """
25
+ print(f"Downloading from {url}")
26
+
27
+ # Images
28
+ if url.split(".")[-1] == "zip":
29
+ path, _ = urllib.request.urlretrieve(url=url) # noqa
30
+ with zipfile.ZipFile(path, "r") as f:
31
+ f.extractall(target)
32
+
33
+ os.remove(path)
34
+
35
+ # Annotations
36
+ else:
37
+ urllib.request.urlretrieve(url=url, filename=target) # noqa
38
+
39
+
40
+ def download_fashionpedia(destination_dir: str = DIR_DATA) -> None:
41
+ """Download the Fashionpedia dataset.
42
+
43
+ Args:
44
+ destination_dir: directory where the dataset will be saved.
45
+ """
46
+ os.makedirs(destination_dir, exist_ok=True)
47
+
48
+ download(url=RAW_TRAIN_ANNOTATIONS, target=os.path.join(destination_dir, TRAIN_ANNOTATIONS))
49
+ download(url=RAW_VAL_ANNOTATIONS, target=os.path.join(destination_dir, VAL_ANNOTATIONS))
50
+
51
+ download(url=RAW_TRAIN_IMAGES, target=destination_dir)
52
+ download(url=RAW_VAL_IMAGES, target=destination_dir)
53
+
54
+
55
+ if __name__ == "__main__":
56
+ download_fashionpedia()
fis/utils/s3.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ from typing import List
3
+
4
+ import boto3
5
+ from PIL import Image
6
+
7
+ from fis.utils import config as cfg
8
+
9
+ s3 = boto3.resource("s3")
10
+
11
+
12
+ def list_images_from_bucket(bucket: str = cfg.S3_BUCKET) -> List[str]:
13
+ """List jpeg images from a bucket.
14
+
15
+ Args:
16
+ bucket: Name of the bucket. Defaults to cfg.S3_BUCKET.
17
+
18
+ Returns:
19
+ List of image names.
20
+ """
21
+ my_bucket = s3.Bucket(bucket)
22
+
23
+ images = []
24
+ for _object in my_bucket.objects.all():
25
+ key = _object.key
26
+ if ".jpg" in key:
27
+ images.append(key)
28
+
29
+ return images
30
+
31
+
32
+ def read_image_from_s3(key, bucket: str = cfg.S3_BUCKET):
33
+
34
+ bucket = s3.Bucket(bucket)
35
+ image = bucket.Object(key)
36
+ img_data = image.get().get("Body").read()
37
+
38
+ return Image.open(io.BytesIO(img_data))
requirements.txt ADDED
The diff for this file is too large to render. See raw diff