Spaces:
Sleeping
Sleeping
from collections import defaultdict | |
import datetime | |
import io | |
import time | |
import torch | |
import gradio as gr | |
import cv2 | |
from transformers import AutoFeatureExtractor, AutoModelForObjectDetection | |
extractor = AutoFeatureExtractor.from_pretrained("hustvl/yolos-tiny") | |
model = AutoModelForObjectDetection.from_pretrained("hustvl/yolos-tiny") | |
BBOX_COLOR = [255, 0, 0] | |
PRED_THRESHOLD = 0.90 | |
def composite_predictions(img, processed_predictions, show_video=False): | |
interested_labels = processed_predictions["labels"] == 1 # only interested in people | |
scores = processed_predictions["scores"][interested_labels].tolist() | |
boxes = [[int(j) for j in x] for x in processed_predictions["boxes"][interested_labels].tolist()] | |
labels = [model.config.id2label[x] for x in processed_predictions["labels"][interested_labels].tolist()] | |
for score, box, label in zip(scores, boxes, labels): | |
cv2.rectangle(img, box, BBOX_COLOR, 1) | |
cv2.putText(img, f"{label}: {score:0.2f}", (box[0]+2, box[1]+10), cv2.FONT_HERSHEY_SIMPLEX, 0.33, BBOX_COLOR, 1, cv2.LINE_AA) | |
return img, len(boxes), datetime.datetime.now() | |
def process(img): | |
inputs = extractor(images=img, return_tensors="pt") | |
outputs = model(**inputs) | |
h, w, _ = img.shape | |
img_size = torch.tensor([(h, w)]) | |
processed = extractor.post_process_object_detection(outputs, PRED_THRESHOLD, img_size) | |
# Composite image and prediction bounding boxes + labels prediction | |
return composite_predictions(img, processed[0]) | |
with gr.Blocks() as demo: | |
stream = gr.State() | |
with gr.Row(): | |
with gr.Column(scale=1, min_width=600): | |
last_refresh_box = gr.Textbox(label="Last updated") | |
attendance_label = gr.Label(label="Current Attendance") | |
with gr.Row(): | |
with gr.Column(scale=1, min_width=600): | |
webcam = gr.Webcam(streaming=True) | |
output = gr.Image(label="Composite", visible=True) | |
webcam.stream(process, [webcam], [output, attendance_label, last_refresh_box]) | |
if __name__ == "__main__": | |
demo.queue().launch() | |
# import gradio as gr | |
# import numpy as np | |
# import time | |
# def add_to_stream(audio, instream): | |
# time.sleep(1) | |
# if audio is None: | |
# return gr.update(), instream | |
# if instream is None: | |
# ret = audio | |
# else: | |
# ret = (audio[0], np.concatenate((instream[1], audio[1]))) | |
# return ret, ret | |
# with gr.Blocks() as demo: | |
# inp = gr.Audio(source="microphone") | |
# out = gr.Audio() | |
# stream = gr.State() | |
# clear = gr.Button("Clear") | |
# inp.stream(add_to_stream, [inp, stream], [out, stream]) | |
# clear.click(lambda: [None, None, None], None, [inp, out, stream]) | |
# if __name__ == "__main__": | |
# demo.launch() |