dsgt-snakeclef / generate_dummy_testset.py
Anthony Miyaguchi
test for random sizes in images
c10f559
raw
history blame
974 Bytes
import PIL.Image
import pandas as pd
import tempfile
from pathlib import Path
import zipfile
import numpy as np
if __name__ == "__main__":
output_path = "/tmp/data/private_testset.zip"
metadata = pd.read_csv("metadata-subset.csv")
print(metadata)
# create temporary directory
with tempfile.TemporaryDirectory() as tmpdir:
tmp_path = Path(tmpdir)
for row in metadata.itertuples():
# random dimensions
x = np.random.randint(100, 300)
y = np.random.randint(100, 300)
img = PIL.Image.fromarray(
np.random.randint(0, 255, (x, y, 3), dtype=np.uint8)
)
img.save(tmp_path / row.filename)
# create a zip file
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
with zipfile.ZipFile(output_path, "w") as zip_ref:
for file in tmp_path.iterdir():
zip_ref.write(file, f"private_testset/{file.name}")