import cv2 import gradio as gr import mediapipe as mp import numpy as np seg = mp.solutions.selfie_segmentation.SelfieSegmentation() options = mp.tasks.vision.GestureRecognizerOptions( base_options=mp.tasks.BaseOptions( model_asset_path="models/gesture_recognizer.task" ), running_mode=mp.tasks.vision.RunningMode.IMAGE, ) recognizer = mp.tasks.vision.GestureRecognizer.create_from_options(options) def close_segmentation_model(): print("Closing Models") seg.close() recognizer.close() def process_selfie(video): image = cv2.cvtColor(video, cv2.COLOR_RGB2BGR) results = seg.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) condition = np.stack((results.segmentation_mask,) * 3, axis=-1) > 0.5 bg_image = np.zeros(image.shape, dtype=np.uint8) bg_image[:] = (0, 0, 0) output_image = np.where(condition, video, bg_image) return output_image def process_gesture(video): mp_image = mp.Image( image_format=mp.ImageFormat.SRGB, data=cv2.cvtColor(video, cv2.COLOR_BGR2RGB) ) result = recognizer.recognize(mp_image) print(result.gestures) if not result.gestures: gesture_labels = {"None": 1.0} else: gesture_labels = {i.category_name: i.score for i in result.gestures[0]} return gesture_labels selfie_interface = gr.Interface( fn=process_selfie, inputs=gr.Image(sources=["webcam"], streaming=True), outputs="image", live=True, ) gesture_interface = gr.Interface( fn=process_gesture, inputs=gr.Image(sources=["webcam"], streaming=True), outputs="label", live=True, ) interface = gr.TabbedInterface( [selfie_interface, gesture_interface], ["Selfie", "Gesture"] ) if __name__ == "__main__": interface.launch()