Spaces:
Runtime error
Runtime error
import streamlit as st | |
import numpy as np | |
from keras.models import load_model | |
import cv2 | |
from io import BytesIO | |
import mediapipe as mp | |
# Load the model | |
model = load_model('sign_asl_cnn_30_epochs.h5') | |
class_labels = {i: str(i) if i < 10 else chr(65 + i - 10) for i in range(36)} | |
# Function to preprocess the image | |
def preprocess_image(image): | |
image = cv2.resize(image, (200, 200)) | |
image = image / 255.0 | |
image = image.reshape(1, 200, 200, 3) | |
return image | |
# Function to predict the sign language letter | |
def predict_letter(image): | |
processed_image = preprocess_image(image) | |
predictions = model.predict(processed_image) | |
predicted_class = np.argmax(predictions, axis=1)[0] | |
sign_letter = class_labels[predicted_class] | |
return sign_letter | |
# Function to detect hands in the image | |
def detect_hands(image): | |
mp_hands = mp.solutions.hands | |
hands = mp_hands.Hands() | |
margin = 15 | |
# Convert the image to RGB | |
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) | |
# Process the image and get the hand landmarks | |
results = hands.process(image_rgb) | |
if results.multi_hand_landmarks: | |
for landmarks in results.multi_hand_landmarks: | |
# Get bounding box coordinates of the hand | |
landmarks_xy = [(int(landmark.x * image.shape[1]), int(landmark.y * image.shape[0])) | |
for landmark in landmarks.landmark] | |
# Define the bounding box for the hand | |
x_min = max(0, min(landmarks_xy, key=lambda x: x[0])[0] - margin) | |
y_min = max(0, min(landmarks_xy, key=lambda x: x[1])[1] - margin) | |
x_max = min(image.shape[1], max(landmarks_xy, key=lambda x: x[0])[0] + margin) | |
y_max = min(image.shape[0], max(landmarks_xy, key=lambda x: x[1])[1] + margin) | |
# Extract the hand region | |
roi = image[y_min:y_max, x_min:x_max] | |
# Check if the ROI is empty | |
if roi.size == 0: | |
continue | |
# Resize the ROI to match your model's input shape | |
roi = cv2.resize(roi, (200, 200), interpolation=cv2.INTER_AREA) | |
hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2RGB) | |
lower_yellow = np.array([93, 72, 51]) | |
upper_yellow = np.array([224, 194, 183]) | |
mask = cv2.inRange(hsv, lower_yellow, upper_yellow) | |
roi = cv2.bitwise_and(roi, roi, mask=mask) | |
roi = roi.reshape(1, 200, 200, 3) # Ensure it matches your model's input shape | |
# Make predictions using your classifier | |
predictions = model.predict(roi) | |
predicted_class = int(np.argmax(predictions, axis=1)[0]) | |
result = class_labels[predicted_class] | |
# Draw result on the image | |
cv2.putText(image, str(result), (x_min, y_min - 10), | |
cv2.FONT_HERSHEY_COMPLEX, 1, (0, 255, 0), 2) | |
# Draw bounding box on the image | |
cv2.rectangle(image, (x_min, y_min), (x_max, y_max), (255, 0, 0), 2) | |
return image | |
# Streamlit app | |
st.title('Sign Language Recognition') | |
# Sidebar with radio button for Upload/Webcam | |
selected_option = st.sidebar.radio("Select Option", ["Upload", "Webcam"], index=0) | |
if selected_option == "Upload": | |
uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "png"]) | |
if uploaded_file is not None: | |
if st.button('Predict'): | |
contents = uploaded_file.read() | |
nparr = np.frombuffer(contents, np.uint8) | |
image = cv2.imdecode(nparr, cv2.IMREAD_COLOR) | |
# Make the prediction | |
predicted_letter = predict_letter(image) | |
# Display the predicted letter | |
st.write('Predicted Letter:', predicted_letter) | |
elif selected_option == "Webcam": | |
# Placeholder for webcam frame | |
webcam_frame = st.empty() | |
# Placeholder for predicted letter in webcam mode | |
predicted_letter_webcam = st.empty() | |
# Placeholder for webcam capture status | |
webcam_capture_status = st.empty() | |
# Placeholder for webcam stop button | |
webcam_stop_button = st.empty() | |
# Placeholder for webcam status | |
webcam_status = st.empty() | |
# Placeholder for webcam button | |
webcam_button = st.button("Start Webcam") | |
if webcam_button: | |
webcam_status.text("Webcam is on.") | |
webcam_stop_button = st.button("Stop Webcam") | |
# OpenCV video capture | |
cap = cv2.VideoCapture(0) | |
while True: | |
# Read the frame from the webcam | |
ret, frame = cap.read() | |
# Display the frame in Streamlit | |
webcam_frame.image(frame, channels="BGR") | |
# Detect hands in the current frame | |
frame = detect_hands(frame) | |
# Convert the frame to JPEG format | |
_, jpeg = cv2.imencode(".jpg", frame) | |
# Display the predicted letter | |
predicted_letter = predict_letter(frame) | |
predicted_letter_webcam.text(f"Predicted Letter: {predicted_letter}") | |
# Check if the "Stop Webcam" button is clicked | |
if webcam_stop_button: | |
webcam_status.text("Webcam is off.") | |
break | |
# Release the webcam when done | |
cap.release() | |