File size: 1,541 Bytes
9066d0b
b6e6733
9066d0b
 
b6e6733
9066d0b
b6e6733
 
280e91d
b6e6733
 
 
9066d0b
b6e6733
9066d0b
 
 
 
 
76d1760
9066d0b
 
 
 
76d1760
 
 
 
 
 
 
b6e6733
 
 
9066d0b
 
 
 
 
 
 
 
 
 
 
056f6f9
52c9d8a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import os
from io import BytesIO
from multiprocessing import Pool, cpu_count
import fiftyone as fo
from datasets import load_dataset
from PIL import Image

# Load the dataset
imagenet_hard_dataset = load_dataset('taesiri/imagenet-hard', split='validation')
os.makedirs("dataset", exist_ok=True)


def process_image(i):
    image = imagenet_hard_dataset[i]["image"].convert("RGB")
    image_path = f"dataset/{i}.JPEG"
    image.save(image_path, "JPEG", quality=80)
    return {
        "file_path": image_path,
        "labels": imagenet_hard_dataset[i]["english_label"],
        "english_label": imagenet_hard_dataset[i]["english_label"],
    }


def create_fiftyone_sample(sample):
   origin_label = fo.Classification(label=str(sample["origin"]))
   english_label = fo.Classification(label=str(sample["english_label"]))
   return fo.Sample(
       filepath=sample["file_path"],
       labels=fo.Classifications(classifications=[origin_label, english_label]),
   )



if __name__ == "__main__":
    # Process images in parallel and get the list of images with their labels
    with Pool(cpu_count()) as pool:
        samples_data = pool.map(process_image, range(len(imagenet_hard_dataset)))

    # Create a FiftyOne dataset
    dataset = fo.Dataset(name="imagenet-hard")

    # Add images and labels to the FiftyOne dataset
    samples = [create_fiftyone_sample(sample_data) for sample_data in samples_data]
    dataset.add_samples(samples)

    session = fo.launch_app(dataset, port=8888, remote=True, address="0.0.0.0")
    session.wait()