import PIL.Image | |
import pandas as pd | |
import tempfile | |
from pathlib import Path | |
import zipfile | |
import numpy as np | |
if __name__ == "__main__": | |
output_path = "/tmp/data/private_testset.zip" | |
metadata = pd.read_csv("metadata-subset.csv") | |
print(metadata) | |
# create temporary directory | |
with tempfile.TemporaryDirectory() as tmpdir: | |
tmp_path = Path(tmpdir) | |
for row in metadata.itertuples(): | |
# random dimensions | |
x = np.random.randint(100, 300) | |
y = np.random.randint(100, 300) | |
img = PIL.Image.fromarray( | |
np.random.randint(0, 255, (x, y, 3), dtype=np.uint8) | |
) | |
img.save(tmp_path / row.filename) | |
# create a zip file | |
Path(output_path).parent.mkdir(parents=True, exist_ok=True) | |
with zipfile.ZipFile(output_path, "w") as zip_ref: | |
for file in tmp_path.iterdir(): | |
zip_ref.write(file, f"private_testset/{file.name}") | |