camcounter / app.py
andrewgleave's picture
Use OpenCV
30cc27c
raw
history blame
2.77 kB
from collections import defaultdict
import datetime
import io
import time
import torch
import gradio as gr
import cv2
from transformers import AutoFeatureExtractor, AutoModelForObjectDetection
extractor = AutoFeatureExtractor.from_pretrained("hustvl/yolos-tiny")
model = AutoModelForObjectDetection.from_pretrained("hustvl/yolos-tiny")
BBOX_COLOR = [255, 0, 0]
PRED_THRESHOLD = 0.90
def composite_predictions(img, processed_predictions, show_video=False):
interested_labels = processed_predictions["labels"] == 1 # only interested in people
scores = processed_predictions["scores"][interested_labels].tolist()
boxes = [[int(j) for j in x] for x in processed_predictions["boxes"][interested_labels].tolist()]
labels = [model.config.id2label[x] for x in processed_predictions["labels"][interested_labels].tolist()]
for score, box, label in zip(scores, boxes, labels):
cv2.rectangle(img, box, BBOX_COLOR, 1)
cv2.putText(img, f"{label}: {score:0.2f}", (box[0]+2, box[1]+10), cv2.FONT_HERSHEY_SIMPLEX, 0.33, BBOX_COLOR, 1, cv2.LINE_AA)
return img, len(boxes), datetime.datetime.now()
def process(img):
inputs = extractor(images=img, return_tensors="pt")
outputs = model(**inputs)
h, w, _ = img.shape
img_size = torch.tensor([(h, w)])
processed = extractor.post_process_object_detection(outputs, PRED_THRESHOLD, img_size)
# Composite image and prediction bounding boxes + labels prediction
return composite_predictions(img, processed[0])
with gr.Blocks() as demo:
stream = gr.State()
with gr.Row():
with gr.Column(scale=1, min_width=600):
last_refresh_box = gr.Textbox(label="Last updated")
attendance_label = gr.Label(label="Current Attendance")
with gr.Row():
with gr.Column(scale=1, min_width=600):
webcam = gr.Webcam(streaming=True)
output = gr.Image(label="Composite", visible=True)
webcam.stream(process, [webcam], [output, attendance_label, last_refresh_box])
if __name__ == "__main__":
demo.queue().launch()
# import gradio as gr
# import numpy as np
# import time
# def add_to_stream(audio, instream):
# time.sleep(1)
# if audio is None:
# return gr.update(), instream
# if instream is None:
# ret = audio
# else:
# ret = (audio[0], np.concatenate((instream[1], audio[1])))
# return ret, ret
# with gr.Blocks() as demo:
# inp = gr.Audio(source="microphone")
# out = gr.Audio()
# stream = gr.State()
# clear = gr.Button("Clear")
# inp.stream(add_to_stream, [inp, stream], [out, stream])
# clear.click(lambda: [None, None, None], None, [inp, out, stream])
# if __name__ == "__main__":
# demo.launch()