Spaces:
Sleeping
Sleeping
# Copyright 2021 The HuggingFace Team. All rights reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
import unittest | |
from transformers import ( | |
MODEL_FOR_OBJECT_DETECTION_MAPPING, | |
AutoFeatureExtractor, | |
AutoModelForObjectDetection, | |
ObjectDetectionPipeline, | |
is_vision_available, | |
pipeline, | |
) | |
from transformers.testing_utils import ( | |
is_pipeline_test, | |
nested_simplify, | |
require_pytesseract, | |
require_tf, | |
require_timm, | |
require_torch, | |
require_vision, | |
slow, | |
) | |
from .test_pipelines_common import ANY | |
if is_vision_available(): | |
from PIL import Image | |
else: | |
class Image: | |
def open(*args, **kwargs): | |
pass | |
class ObjectDetectionPipelineTests(unittest.TestCase): | |
model_mapping = MODEL_FOR_OBJECT_DETECTION_MAPPING | |
def get_test_pipeline(self, model, tokenizer, processor): | |
object_detector = ObjectDetectionPipeline(model=model, image_processor=processor) | |
return object_detector, ["./tests/fixtures/tests_samples/COCO/000000039769.png"] | |
def run_pipeline_test(self, object_detector, examples): | |
outputs = object_detector("./tests/fixtures/tests_samples/COCO/000000039769.png", threshold=0.0) | |
self.assertGreater(len(outputs), 0) | |
for detected_object in outputs: | |
self.assertEqual( | |
detected_object, | |
{ | |
"score": ANY(float), | |
"label": ANY(str), | |
"box": {"xmin": ANY(int), "ymin": ANY(int), "xmax": ANY(int), "ymax": ANY(int)}, | |
}, | |
) | |
import datasets | |
dataset = datasets.load_dataset("hf-internal-testing/fixtures_image_utils", "image", split="test") | |
batch = [ | |
Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"), | |
"http://images.cocodataset.org/val2017/000000039769.jpg", | |
# RGBA | |
dataset[0]["file"], | |
# LA | |
dataset[1]["file"], | |
# L | |
dataset[2]["file"], | |
] | |
batch_outputs = object_detector(batch, threshold=0.0) | |
self.assertEqual(len(batch), len(batch_outputs)) | |
for outputs in batch_outputs: | |
self.assertGreater(len(outputs), 0) | |
for detected_object in outputs: | |
self.assertEqual( | |
detected_object, | |
{ | |
"score": ANY(float), | |
"label": ANY(str), | |
"box": {"xmin": ANY(int), "ymin": ANY(int), "xmax": ANY(int), "ymax": ANY(int)}, | |
}, | |
) | |
def test_small_model_tf(self): | |
pass | |
def test_small_model_pt(self): | |
model_id = "hf-internal-testing/tiny-detr-mobilenetsv3" | |
model = AutoModelForObjectDetection.from_pretrained(model_id) | |
feature_extractor = AutoFeatureExtractor.from_pretrained(model_id) | |
object_detector = ObjectDetectionPipeline(model=model, feature_extractor=feature_extractor) | |
outputs = object_detector("http://images.cocodataset.org/val2017/000000039769.jpg", threshold=0.0) | |
self.assertEqual( | |
nested_simplify(outputs, decimals=4), | |
[ | |
{"score": 0.3376, "label": "LABEL_0", "box": {"xmin": 159, "ymin": 120, "xmax": 480, "ymax": 359}}, | |
{"score": 0.3376, "label": "LABEL_0", "box": {"xmin": 159, "ymin": 120, "xmax": 480, "ymax": 359}}, | |
], | |
) | |
outputs = object_detector( | |
[ | |
"http://images.cocodataset.org/val2017/000000039769.jpg", | |
"http://images.cocodataset.org/val2017/000000039769.jpg", | |
], | |
threshold=0.0, | |
) | |
self.assertEqual( | |
nested_simplify(outputs, decimals=4), | |
[ | |
[ | |
{"score": 0.3376, "label": "LABEL_0", "box": {"xmin": 159, "ymin": 120, "xmax": 480, "ymax": 359}}, | |
{"score": 0.3376, "label": "LABEL_0", "box": {"xmin": 159, "ymin": 120, "xmax": 480, "ymax": 359}}, | |
], | |
[ | |
{"score": 0.3376, "label": "LABEL_0", "box": {"xmin": 159, "ymin": 120, "xmax": 480, "ymax": 359}}, | |
{"score": 0.3376, "label": "LABEL_0", "box": {"xmin": 159, "ymin": 120, "xmax": 480, "ymax": 359}}, | |
], | |
], | |
) | |
def test_large_model_pt(self): | |
model_id = "facebook/detr-resnet-50" | |
model = AutoModelForObjectDetection.from_pretrained(model_id) | |
feature_extractor = AutoFeatureExtractor.from_pretrained(model_id) | |
object_detector = ObjectDetectionPipeline(model=model, feature_extractor=feature_extractor) | |
outputs = object_detector("http://images.cocodataset.org/val2017/000000039769.jpg") | |
self.assertEqual( | |
nested_simplify(outputs, decimals=4), | |
[ | |
{"score": 0.9982, "label": "remote", "box": {"xmin": 40, "ymin": 70, "xmax": 175, "ymax": 117}}, | |
{"score": 0.9960, "label": "remote", "box": {"xmin": 333, "ymin": 72, "xmax": 368, "ymax": 187}}, | |
{"score": 0.9955, "label": "couch", "box": {"xmin": 0, "ymin": 1, "xmax": 639, "ymax": 473}}, | |
{"score": 0.9988, "label": "cat", "box": {"xmin": 13, "ymin": 52, "xmax": 314, "ymax": 470}}, | |
{"score": 0.9987, "label": "cat", "box": {"xmin": 345, "ymin": 23, "xmax": 640, "ymax": 368}}, | |
], | |
) | |
outputs = object_detector( | |
[ | |
"http://images.cocodataset.org/val2017/000000039769.jpg", | |
"http://images.cocodataset.org/val2017/000000039769.jpg", | |
] | |
) | |
self.assertEqual( | |
nested_simplify(outputs, decimals=4), | |
[ | |
[ | |
{"score": 0.9982, "label": "remote", "box": {"xmin": 40, "ymin": 70, "xmax": 175, "ymax": 117}}, | |
{"score": 0.9960, "label": "remote", "box": {"xmin": 333, "ymin": 72, "xmax": 368, "ymax": 187}}, | |
{"score": 0.9955, "label": "couch", "box": {"xmin": 0, "ymin": 1, "xmax": 639, "ymax": 473}}, | |
{"score": 0.9988, "label": "cat", "box": {"xmin": 13, "ymin": 52, "xmax": 314, "ymax": 470}}, | |
{"score": 0.9987, "label": "cat", "box": {"xmin": 345, "ymin": 23, "xmax": 640, "ymax": 368}}, | |
], | |
[ | |
{"score": 0.9982, "label": "remote", "box": {"xmin": 40, "ymin": 70, "xmax": 175, "ymax": 117}}, | |
{"score": 0.9960, "label": "remote", "box": {"xmin": 333, "ymin": 72, "xmax": 368, "ymax": 187}}, | |
{"score": 0.9955, "label": "couch", "box": {"xmin": 0, "ymin": 1, "xmax": 639, "ymax": 473}}, | |
{"score": 0.9988, "label": "cat", "box": {"xmin": 13, "ymin": 52, "xmax": 314, "ymax": 470}}, | |
{"score": 0.9987, "label": "cat", "box": {"xmin": 345, "ymin": 23, "xmax": 640, "ymax": 368}}, | |
], | |
], | |
) | |
def test_integration_torch_object_detection(self): | |
model_id = "facebook/detr-resnet-50" | |
object_detector = pipeline("object-detection", model=model_id) | |
outputs = object_detector("http://images.cocodataset.org/val2017/000000039769.jpg") | |
self.assertEqual( | |
nested_simplify(outputs, decimals=4), | |
[ | |
{"score": 0.9982, "label": "remote", "box": {"xmin": 40, "ymin": 70, "xmax": 175, "ymax": 117}}, | |
{"score": 0.9960, "label": "remote", "box": {"xmin": 333, "ymin": 72, "xmax": 368, "ymax": 187}}, | |
{"score": 0.9955, "label": "couch", "box": {"xmin": 0, "ymin": 1, "xmax": 639, "ymax": 473}}, | |
{"score": 0.9988, "label": "cat", "box": {"xmin": 13, "ymin": 52, "xmax": 314, "ymax": 470}}, | |
{"score": 0.9987, "label": "cat", "box": {"xmin": 345, "ymin": 23, "xmax": 640, "ymax": 368}}, | |
], | |
) | |
outputs = object_detector( | |
[ | |
"http://images.cocodataset.org/val2017/000000039769.jpg", | |
"http://images.cocodataset.org/val2017/000000039769.jpg", | |
] | |
) | |
self.assertEqual( | |
nested_simplify(outputs, decimals=4), | |
[ | |
[ | |
{"score": 0.9982, "label": "remote", "box": {"xmin": 40, "ymin": 70, "xmax": 175, "ymax": 117}}, | |
{"score": 0.9960, "label": "remote", "box": {"xmin": 333, "ymin": 72, "xmax": 368, "ymax": 187}}, | |
{"score": 0.9955, "label": "couch", "box": {"xmin": 0, "ymin": 1, "xmax": 639, "ymax": 473}}, | |
{"score": 0.9988, "label": "cat", "box": {"xmin": 13, "ymin": 52, "xmax": 314, "ymax": 470}}, | |
{"score": 0.9987, "label": "cat", "box": {"xmin": 345, "ymin": 23, "xmax": 640, "ymax": 368}}, | |
], | |
[ | |
{"score": 0.9982, "label": "remote", "box": {"xmin": 40, "ymin": 70, "xmax": 175, "ymax": 117}}, | |
{"score": 0.9960, "label": "remote", "box": {"xmin": 333, "ymin": 72, "xmax": 368, "ymax": 187}}, | |
{"score": 0.9955, "label": "couch", "box": {"xmin": 0, "ymin": 1, "xmax": 639, "ymax": 473}}, | |
{"score": 0.9988, "label": "cat", "box": {"xmin": 13, "ymin": 52, "xmax": 314, "ymax": 470}}, | |
{"score": 0.9987, "label": "cat", "box": {"xmin": 345, "ymin": 23, "xmax": 640, "ymax": 368}}, | |
], | |
], | |
) | |
def test_threshold(self): | |
threshold = 0.9985 | |
model_id = "facebook/detr-resnet-50" | |
object_detector = pipeline("object-detection", model=model_id) | |
outputs = object_detector("http://images.cocodataset.org/val2017/000000039769.jpg", threshold=threshold) | |
self.assertEqual( | |
nested_simplify(outputs, decimals=4), | |
[ | |
{"score": 0.9988, "label": "cat", "box": {"xmin": 13, "ymin": 52, "xmax": 314, "ymax": 470}}, | |
{"score": 0.9987, "label": "cat", "box": {"xmin": 345, "ymin": 23, "xmax": 640, "ymax": 368}}, | |
], | |
) | |
def test_layoutlm(self): | |
model_id = "Narsil/layoutlmv3-finetuned-funsd" | |
threshold = 0.9993 | |
object_detector = pipeline("object-detection", model=model_id, threshold=threshold) | |
outputs = object_detector( | |
"https://huggingface.co/spaces/impira/docquery/resolve/2359223c1837a7587402bda0f2643382a6eefeab/invoice.png" | |
) | |
self.assertEqual( | |
nested_simplify(outputs, decimals=4), | |
[ | |
{"score": 0.9993, "label": "I-ANSWER", "box": {"xmin": 294, "ymin": 254, "xmax": 343, "ymax": 264}}, | |
{"score": 0.9993, "label": "I-ANSWER", "box": {"xmin": 294, "ymin": 254, "xmax": 343, "ymax": 264}}, | |
], | |
) | |