Spaces:

xguman
/

hw04

Sleeping

App Files Files Community

xguman commited on Oct 19, 2024

Commit

875ae89

1 Parent(s): 37f6b71

v1

Browse files

Files changed (4) hide show

app.py +54 -0
create_image_embeddnigs.py +46 -0
download_dataset.py +25 -0
requirements.txt +223 -0

app.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import gradio as gr
+import torch
+from PIL import Image
+from transformers import CLIPProcessor, CLIPModel
+from torch.utils.data import Dataset, DataLoader
+import os
+import numpy as np
+import warnings
+from create_image_embeddnigs import create_embeddings
+from download_dataset import download_images
+warnings.filterwarnings("ignore", category=UserWarning)
+os.environ["TOKENIZERS_PARALLELISM"] = "true"  # or "false"
+download_images()
+# get image embeddings. If file «image_embeddings.npy» exists, just load it, otherwise create it
+if os.path.exists("image_embeddings.npy"):
+    image_embeddings = np.load("image_embeddings.npy")
+else:
+    image_dir = "data/pictures"
+    batch_size = 32
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    image_embeddings = create_embeddings(image_dir, batch_size, device)
+image_embeddings = image_embeddings / np.linalg.norm(image_embeddings, axis=1, keepdims=True)
+def get_text_embeddings(input_text):
+    model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
+    processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
+    inputs = processor(text=input_text, return_tensors="pt", padding=True, truncation=True)
+    embeddings = model.get_text_features(**inputs)
+    vector = embeddings.detach().numpy().ravel()
+    return vector / np.linalg.norm(vector)
+def cosine_similarity(a, b):
+    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
+def find_similar_images(text_embedding, image_embeddings, top_k=4):
+    similarities = np.array([cosine_similarity(text_embedding, image_embedding) for image_embedding in image_embeddings])
+    top_k_indices = np.argsort(similarities)[-top_k:][::-1]
+    return top_k_indices
+def get_similar_images(input_text):
+    text_embedding = get_text_embeddings(input_text)
+    top_k_indices = find_similar_images(text_embedding, image_embeddings)
+    image_paths = [os.path.join("data/pictures", f) for f in os.listdir("data/pictures") if f.endswith(('.png', '.jpg', '.jpeg'))]
+    similar_images = [image_paths[i] for i in top_k_indices]
+    return [Image.open(image_path) for image_path in similar_images]
+if __name__ == "__main__":
+    iface = gr.Interface(fn=get_similar_images, inputs="text", outputs="gallery", title="Find Similar Images")
+    iface.launch(share=True)

create_image_embeddnigs.py ADDED Viewed

	@@ -0,0 +1,46 @@

+import torch
+from PIL import Image
+from transformers import CLIPProcessor, CLIPModel
+from torch.utils.data import Dataset, DataLoader
+import os
+import numpy as np
+import warnings
+warnings.filterwarnings("ignore", category=UserWarning)
+class ImageDataset(Dataset):
+    def __init__(self, image_dir, processor):
+        self.image_paths = [os.path.join(image_dir, f) for f in os.listdir(image_dir) if f.endswith(('.png', '.jpg', '.jpeg'))]
+        self.processor = processor
+    def __len__(self):
+        return len(self.image_paths)
+    def __getitem__(self, idx):
+        image = Image.open(self.image_paths[idx])
+        return self.processor(images=image, return_tensors="pt")['pixel_values'][0]
+def get_clip_embeddings_batch(image_dir, batch_size=32, device='cuda'):
+    # Load the CLIP model and processor
+    model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
+    processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
+    # Create dataset and dataloader
+    dataset = ImageDataset(image_dir, processor)
+    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=4)
+    all_embeddings = []
+    model.eval()
+    with torch.no_grad():
+        for batch in dataloader:
+            batch = batch.to(device)
+            embeddings = model.get_image_features(pixel_values=batch)
+            all_embeddings.append(embeddings.cpu().numpy())
+    return np.concatenate(all_embeddings)
+def create_embeddings(image_dir, batch_size, device):
+    embeddings = get_clip_embeddings_batch(image_dir, batch_size, device)
+    np.save("image_embeddings.npy", embeddings)
+    return embeddings

download_dataset.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import os
+from datasets import load_dataset
+from PIL import Image
+def download_images(name="food101"):
+    # Load the "food101" dataset
+    dataset = load_dataset(name, split="train[:1%]")  # Get a small percentage of the data
+    # Create a directory to save the images if it doesn't exist
+    output_dir = "data/pictures"
+    os.makedirs(output_dir, exist_ok=True)
+    # Limit to 200 images
+    num_images = 200
+    count = 0
+    # Iterate over the dataset and save the images
+    for example in dataset:
+        if count >= num_images:
+            break
+        image = example['image']
+        image.save(os.path.join(output_dir, f"image_{count}.jpg"))  # Save as JPG
+        count += 1
+    print(f"Downloaded and saved {count} images to the folder '{output_dir}'")

requirements.txt ADDED Viewed

	@@ -0,0 +1,223 @@

+- _libgcc_mutex=0.1
+- _openmp_mutex=5.1
+- bzip2=1.0.8
+- ca-certificates=2024.7.2
+- ld_impl_linux-64=2.38
+- libffi=3.4.4
+- libgcc-ng=11.2.0
+- libgomp=11.2.0
+- libstdcxx-ng=11.2.0
+- libuuid=1.41.5
+- ncurses=6.4
+- openssl=1.1.1w
+- pip=24.2
+- python=3.10.10
+- readline=8.2
+- setuptools=72.1.0
+- sqlite=3.45.3
+- tk=8.6.14
+- wheel=0.43.0
+- xz=5.4.6
+- zlib=1.2.13
+- absl-py==2.1.0
+- aiofiles==23.2.1
+- aiohappyeyeballs==2.4.0
+- aiohttp==3.10.5
+- aiosignal==1.3.1
+- annotated-types==0.7.0
+- anyio==4.4.0
+- argon2-cffi==23.1.0
+- argon2-cffi-bindings==21.2.0
+- arrow==1.3.0
+- asttokens==2.4.1
+- async-lru==2.0.4
+- async-timeout==4.0.3
+- attrs==24.2.0
+- babel==2.16.0
+- backoff==2.2.1
+- beautifulsoup4==4.12.3
+- bleach==6.1.0
+- boto3==1.35.12
+- botocore==1.35.12
+- cachetools==5.5.0
+- certifi==2024.8.30
+- cffi==1.17.1
+- charset-normalizer==3.3.2
+- click==8.1.7
+- comm==0.2.2
+- contourpy==1.3.0
+- cycler==0.12.1
+- datasets==3.0.1
+- debugpy==1.8.5
+- decorator==5.1.1
+- defusedxml==0.7.1
+- dill==0.3.8
+- exceptiongroup==1.2.2
+- executing==2.1.0
+- fastapi==0.113.0
+- fastjsonschema==2.20.0
+- ffmpy==0.4.0
+- filelock==3.15.4
+- fire==0.6.0
+- fonttools==4.53.1
+- fqdn==1.5.1
+- frozenlist==1.4.1
+- fsspec==2024.6.1
+- google-auth==2.34.0
+- google-auth-oauthlib==1.2.1
+- gradio==5.1.0
+- gradio-client==1.4.0
+- grpcio==1.66.1
+- h11==0.14.0
+- httpcore==1.0.5
+- httpx==0.27.2
+- huggingface-hub==0.26.0
+- idna==3.8
+- ipykernel==6.26.0
+- ipython==8.17.2
+- ipywidgets==8.1.1
+- isoduration==20.11.0
+- jedi==0.19.1
+- jinja2==3.1.4
+- jmespath==1.0.1
+- joblib==1.4.2
+- json5==0.9.25
+- jsonpointer==3.0.0
+- jsonschema==4.23.0
+- jsonschema-specifications==2023.12.1
+- jupyter-client==8.6.2
+- jupyter-core==5.7.2
+- jupyter-events==0.10.0
+- jupyter-lsp==2.2.5
+- jupyter-server==2.14.2
+- jupyter-server-terminals==0.5.3
+- jupyterlab==4.2.0
+- jupyterlab-pygments==0.3.0
+- jupyterlab-server==2.27.3
+- jupyterlab-widgets==3.0.13
+- kiwisolver==1.4.7
+- lightning==2.4.0
+- lightning-cloud==0.5.70
+- lightning-sdk==0.1.15
+- lightning-utilities==0.11.7
+- litdata==0.2.19
+- litserve==0.2.2
+- markdown==3.7
+- markdown-it-py==3.0.0
+- markupsafe==2.1.5
+- matplotlib==3.8.2
+- matplotlib-inline==0.1.7
+- mdurl==0.1.2
+- mistune==3.0.2
+- mpmath==1.3.0
+- multidict==6.0.5
+- multiprocess==0.70.16
+- nbclient==0.10.0
+- nbconvert==7.16.4
+- nbformat==5.10.4
+- nest-asyncio==1.6.0
+- networkx==3.3
+- notebook-shim==0.2.4
+- numpy==1.26.4
+- nvidia-cublas-cu12==12.1.3.1
+- nvidia-cuda-cupti-cu12==12.1.105
+- nvidia-cuda-nvrtc-cu12==12.1.105
+- nvidia-cuda-runtime-cu12==12.1.105
+- nvidia-cudnn-cu12==8.9.2.26
+- nvidia-cufft-cu12==11.0.2.54
+- nvidia-curand-cu12==10.3.2.106
+- nvidia-cusolver-cu12==11.4.5.107
+- nvidia-cusparse-cu12==12.1.0.106
+- nvidia-nccl-cu12==2.19.3
+- nvidia-nvjitlink-cu12==12.6.68
+- nvidia-nvtx-cu12==12.1.105
+- oauthlib==3.2.2
+- orjson==3.10.7
+- overrides==7.7.0
+- packaging==24.1
+- pandas==2.1.4
+- pandocfilters==1.5.1
+- parso==0.8.4
+- pexpect==4.9.0
+- pillow==10.4.0
+- platformdirs==4.2.2
+- prometheus-client==0.20.0
+- prompt-toolkit==3.0.47
+- protobuf==4.23.4
+- psutil==6.0.0
+- ptyprocess==0.7.0
+- pure-eval==0.2.3
+- pyarrow==17.0.0
+- pyasn1==0.6.0
+- pyasn1-modules==0.4.0
+- pycparser==2.22
+- pydantic==2.9.0
+- pydantic-core==2.23.2
+- pydub==0.25.1
+- pygments==2.18.0
+- pyjwt==2.9.0
+- pyparsing==3.1.4
+- python-dateutil==2.9.0.post0
+- python-json-logger==2.0.7
+- python-multipart==0.0.9
+- pytorch-lightning==2.4.0
+- pytz==2024.1
+- pyyaml==6.0.2
+- pyzmq==26.2.0
+- referencing==0.35.1
+- regex==2024.9.11
+- requests==2.32.3
+- requests-oauthlib==2.0.0
+- rfc3339-validator==0.1.4
+- rfc3986-validator==0.1.1
+- rich==13.8.0
+- rpds-py==0.20.0
+- rsa==4.9
+- ruff==0.7.0
+- s3transfer==0.10.2
+- safetensors==0.4.5
+- scikit-learn==1.3.2
+- scipy==1.11.4
+- semantic-version==2.10.0
+- send2trash==1.8.3
+- shellingham==1.5.4
+- simple-term-menu==1.6.4
+- six==1.16.0
+- sniffio==1.3.1
+- soupsieve==2.6
+- stack-data==0.6.3
+- starlette==0.38.4
+- sympy==1.13.2
+- tensorboard==2.15.1
+- tensorboard-data-server==0.7.2
+- termcolor==2.4.0
+- terminado==0.18.1
+- threadpoolctl==3.5.0
+- tinycss2==1.3.0
+- tokenizers==0.20.1
+- tomli==2.0.1
+- tomlkit==0.12.0
+- torch==2.2.1+cu121
+- torchmetrics==1.3.1
+- torchvision==0.17.1+cu121
+- tornado==6.4.1
+- tqdm==4.66.5
+- traitlets==5.14.3
+- transformers==4.45.2
+- triton==2.2.0
+- typer==0.12.5
+- types-python-dateutil==2.9.0.20240821
+- typing-extensions==4.12.2
+- tzdata==2024.1
+- uri-template==1.3.0
+- urllib3==2.2.2
+- uvicorn==0.30.6
+- wcwidth==0.2.13
+- webcolors==24.8.0
+- webencodings==0.5.1
+- websocket-client==1.8.0
+- websockets==12.0
+- werkzeug==3.0.4
+- widgetsnbextension==4.0.13
+- xxhash==3.5.0
+- yarl==1.9.11