import numpy as np import random import string from torch.utils.data import Dataset, Subset class DummyData(Dataset): def __init__(self, length, size): self.length = length self.size = size def __len__(self): return self.length def __getitem__(self, i): x = np.random.randn(*self.size) letters = string.ascii_lowercase y = ''.join(random.choice(string.ascii_lowercase) for i in range(10)) return {"jpg": x, "txt": y} class DummyDataWithEmbeddings(Dataset): def __init__(self, length, size, emb_size): self.length = length self.size = size self.emb_size = emb_size def __len__(self): return self.length def __getitem__(self, i): x = np.random.randn(*self.size) y = np.random.randn(*self.emb_size).astype(np.float32) return {"jpg": x, "txt": y}