Spaces:
Running
Running
File size: 1,541 Bytes
9066d0b b6e6733 9066d0b b6e6733 9066d0b b6e6733 280e91d b6e6733 9066d0b b6e6733 9066d0b 76d1760 9066d0b 76d1760 b6e6733 9066d0b 056f6f9 52c9d8a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
import os
from io import BytesIO
from multiprocessing import Pool, cpu_count
import fiftyone as fo
from datasets import load_dataset
from PIL import Image
# Load the dataset
imagenet_hard_dataset = load_dataset('taesiri/imagenet-hard', split='validation')
os.makedirs("dataset", exist_ok=True)
def process_image(i):
image = imagenet_hard_dataset[i]["image"].convert("RGB")
image_path = f"dataset/{i}.JPEG"
image.save(image_path, "JPEG", quality=80)
return {
"file_path": image_path,
"labels": imagenet_hard_dataset[i]["english_label"],
"english_label": imagenet_hard_dataset[i]["english_label"],
}
def create_fiftyone_sample(sample):
origin_label = fo.Classification(label=str(sample["origin"]))
english_label = fo.Classification(label=str(sample["english_label"]))
return fo.Sample(
filepath=sample["file_path"],
labels=fo.Classifications(classifications=[origin_label, english_label]),
)
if __name__ == "__main__":
# Process images in parallel and get the list of images with their labels
with Pool(cpu_count()) as pool:
samples_data = pool.map(process_image, range(len(imagenet_hard_dataset)))
# Create a FiftyOne dataset
dataset = fo.Dataset(name="imagenet-hard")
# Add images and labels to the FiftyOne dataset
samples = [create_fiftyone_sample(sample_data) for sample_data in samples_data]
dataset.add_samples(samples)
session = fo.launch_app(dataset, port=8888, remote=True, address="0.0.0.0")
session.wait()
|