Spaces:
Running
Running
""" | |
Builds a similarity index for a directory of images | |
""" | |
import glob | |
import os | |
import sys | |
import tarfile | |
import requests | |
import streamlit as st | |
from PIL import Image | |
from txtai.embeddings import Embeddings | |
class Application: | |
""" | |
Main application | |
""" | |
def __init__(self, directory): | |
""" | |
Creates a new application. | |
Args: | |
directory: directory of images | |
""" | |
self.embeddings = self.build(directory) | |
def build(self, directory): | |
""" | |
Builds an image embeddings index. | |
Args: | |
directory: directory with images | |
Returns: | |
Embeddings index | |
""" | |
embeddings = Embeddings({"method": "sentence-transformers", "path": "clip-ViT-B-32"}) | |
embeddings.index(self.images(directory)) | |
# Update model to support multilingual queries | |
embeddings.config["path"] = "sentence-transformers/clip-ViT-B-32-multilingual-v1" | |
embeddings.model = embeddings.loadvectors() | |
return embeddings | |
def images(self, directory): | |
""" | |
Generator that loops over each image in a directory. | |
Args: | |
directory: directory with images | |
""" | |
for path in glob.glob(directory + "/*jpg") + glob.glob(directory + "/*png"): | |
yield (path, Image.open(path), None) | |
def run(self): | |
""" | |
Runs a Streamlit application. | |
""" | |
st.title("Image search") | |
st.markdown("This application shows how images and text can be embedded into the same space to support similarity search. ") | |
st.markdown( | |
"[sentence-transformers](https://github.com/UKPLab/sentence-transformers/tree/master/examples/applications/image-search) " | |
+ "recently added support for the [OpenAI CLIP model](https://github.com/openai/CLIP). This model embeds text and images into " | |
+ "the same space, enabling image similarity search. txtai can directly utilize these models." | |
) | |
query = st.text_input("Search query:") | |
if query: | |
index, _ = self.embeddings.search(query, 1)[0] | |
st.image(Image.open(index)) | |
def create(directory): | |
""" | |
Creates and caches a Streamlit application. | |
Args: | |
directory: directory of images to index | |
Returns: | |
Application | |
""" | |
return Application(directory) | |
if __name__ == "__main__": | |
os.environ["TOKENIZERS_PARALLELISM"] = "false" | |
files = "/tmp/txtai" | |
if not os.path.exists(files): | |
os.makedirs(files) | |
response = requests.get("https://github.com/neuml/txtai/releases/download/v3.5.0/tests.tar.gz", stream=True) | |
f = tarfile.open(fileobj=response.raw, mode="r|gz") | |
f.extractall(path="/tmp") | |
# Create and run application | |
app = create(files) | |
app.run() | |