from sentence_transformers import util from transformers import pipeline from PIL import Image, ImageDraw from sentence_transformers import util,SentenceTransformer import gradio as gr checkpoint = "google/owlvit-base-patch32" detector = pipeline(model=checkpoint, task="zero-shot-object-detection") model = SentenceTransformer('clip-ViT-L-14') def get_face_image(im1): predictions = detector( im1, candidate_labels=["human face"], ) max_score = 0 box_area = None for prediction in predictions: box = prediction["box"] label = prediction["label"] score = prediction["score"] if score > max_score : xmin, ymin, xmax, ymax = box.values() box_area = (xmin, ymin, xmax, ymax) max_score = score else: continue draw = ImageDraw.Draw(im1) draw.rectangle(box_area, outline="red", width=1) #draw.text((xmin, ymin), f"{label}: {round(score,2)}", fill="blue") crop_img1 = im1.crop(box_area) #display(crop_img1) newsize = (256, 256) face_img1 = crop_img1.resize(newsize) #display(face_img1) return face_img1 def predict(im1, im2,inp_sim): face_image1 = get_face_image(im1) face_image2 = get_face_image(im2) img_emb = model.encode([face_image1, face_image2]) sim = util.cos_sim(img_emb[0], img_emb[1]) if sim > inp_sim: return sim, "SAME PERSON, UNLOCK PHONE" else: return sim, "DIFFERENT PEOPLE, DON'T UNLOCK" description = "An application that can recognize if two faces belong to the same person or not" title = "Facial Identity Recognition System" interface = gr.Interface(fn=predict, inputs= [gr.Image(type="pil", source="webcam"), gr.Image(type="pil"), gr.Slider(0, 1, value=0.8, label="Similarity Percentage", info="Choose betwen 0 and 1")], outputs= [gr.Number(label="Similarity"), gr.Textbox(label="Message")] ) interface.launch(debug=True)