Spaces:
Running
Running
import os, sys | |
# Ajouter le répertoire racine au chemin | |
root_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../..")) | |
sys.path.append(root_dir) | |
from PIL import Image | |
from pathlib import Path | |
import torch | |
from transformers import CLIPProcessor, CLIPVisionModel | |
import numpy as np | |
from tqdm import tqdm | |
from data.extract_embeddings.dataset_with_path import ImageWithPathDataset | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
model = CLIPVisionModel.from_pretrained("geolocal/StreetCLIP").to(device) | |
processor = CLIPProcessor.from_pretrained("geolocal/StreetCLIP") | |
input_path = Path("datasets/osv5m/images") | |
output_path = Path("datasets/osv5m/embeddings/street_clip") | |
output_path.mkdir(exist_ok=True, parents=True) | |
dataset = ImageWithPathDataset(input_path) | |
batch_size = 128 | |
dataloader = torch.utils.data.DataLoader( | |
dataset, batch_size=batch_size, num_workers=16, collate_fn=lambda x: zip(*x) | |
) | |
for images, output_emb_paths in tqdm(dataloader): | |
inputs = processor(images=images, return_tensors="pt") | |
inputs = {k: v.to(device) for k, v in inputs.items()} | |
with torch.no_grad(): | |
outputs = model(**inputs) | |
embeddings = outputs.last_hidden_state[:, 0] | |
numpy_embeddings = embeddings.cpu().numpy() | |
for emb, output_emb_path in zip(numpy_embeddings, output_emb_paths): | |
np.save(f"{output_emb_path}.npy", emb) | |