yolo-v8 / app.py.safe
Geoffrey Hollingworth
initial upload
3d3f535
raw
history blame
4.74 kB
import os
os.environ['OPENCV_AVFOUNDATION_SKIP_AUTH'] = '1'
import streamlit as st
import cv2
import numpy as np
from transformers import pipeline
from PIL import Image, ImageDraw
# Initialize the Hugging Face pipeline for facial emotion detection using the "trpakov/vit-face-expression" model
emotion_pipeline = pipeline("image-classification", model="trpakov/vit-face-expression")
# Function to analyze sentiment
def analyze_sentiment(face):
# Convert face to RGB
rgb_face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
# Convert the face to a PIL image
pil_image = Image.fromarray(rgb_face)
# Analyze sentiment using the Hugging Face pipeline
results = emotion_pipeline(pil_image)
# Get the dominant emotion
dominant_emotion = max(results, key=lambda x: x['score'])['label']
return dominant_emotion
TEXT_SIZE = 3
# Function to detect faces, analyze sentiment, and draw a red box around them
def detect_and_draw_faces(frame):
# Convert frame to RGB
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# Convert the frame to a PIL image
pil_image = Image.fromarray(rgb_frame)
# Analyze sentiment using the Hugging Face pipeline
results = emotion_pipeline(pil_image)
# Print the results to understand the structure
print(results)
# Draw on the PIL image
draw = ImageDraw.Draw(pil_image)
# Iterate through detected faces
for result in results:
box = result['box']
sentiment = result['label']
# Draw rectangle and text
x, y, w, h = box['left'], box['top'], box['width'], box['height']
draw.rectangle(((x, y), (x+w, y+h)), outline="red", width=3)
# Calculate position for the text background and the text itself
text_size = draw.textsize(sentiment)
background_tl = (x, y - text_size[1] - 5)
background_br = (x + text_size[0], y)
# Draw black rectangle as background
draw.rectangle([background_tl, background_br], fill="black")
# Draw white text on top
draw.text((x, y - text_size[1]), sentiment, fill="white")
# Convert back to OpenCV format
frame_with_boxes = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
return frame_with_boxes
# Function to capture video from webcam
def video_stream():
video_capture = cv2.VideoCapture(0)
if not video_capture.isOpened():
st.error("Error: Could not open video capture device.")
return
while True:
ret, frame = video_capture.read()
if not ret:
st.error("Error: Failed to read frame from video capture device.")
break
yield frame
video_capture.release()
# Streamlit UI
st.markdown(
"""
<style>
.main {
background-color: #FFFFFF;
}
.reportview-container .main .block-container{
padding-top: 2rem;
}
h1 {
color: #E60012;
font-family: 'Arial Black', Gadget, sans-serif;
}
h2 {
color: #E60012;
font-family: 'Arial', sans-serif;
}
h3 {
color: #333333;
font-family: 'Arial', sans-serif;
}
.stButton button {
background-color: #E60012;
color: white;
border-radius: 5px;
font-size: 16px;
}
</style>
""",
unsafe_allow_html=True
)
st.title("Computer Vision Test Lab")
st.subheader("Facial Sentiment")
# Columns for input and output streams
col1, col2 = st.columns(2)
with col1:
st.header("Input Stream")
st.subheader("Webcam")
video_placeholder = st.empty()
with col2:
st.header("Output Stream")
st.subheader("Analysis")
output_placeholder = st.empty()
sentiment_placeholder = st.empty()
# Start video stream
video_capture = cv2.VideoCapture(0)
if not video_capture.isOpened():
st.error("Error: Could not open video capture device.")
else:
while True:
ret, frame = video_capture.read()
if not ret:
st.error("Error: Failed to read frame from video capture device.")
break
# Detect faces, analyze sentiment, and draw red boxes with sentiment labels
frame_with_boxes = detect_and_draw_faces(frame)
# Display the input stream with the red box around the face
video_placeholder.image(frame_with_boxes, channels="BGR")
# Display the output stream (here it's the same as input, modify as needed)
output_placeholder.image(frame_with_boxes, channels="BGR")
# Add a short delay to control the frame rate
if cv2.waitKey(1) & 0xFF == ord('q'):
break