Spaces:
Runtime error
Runtime error
File size: 5,233 Bytes
6e5598c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
import streamlit as st
import numpy as np
from keras.models import load_model
import cv2
from io import BytesIO
import mediapipe as mp
# Load the model
model = load_model('sign_asl_cnn_30_epochs.h5')
class_labels = {i: str(i) if i < 10 else chr(65 + i - 10) for i in range(36)}
# Function to preprocess the image
def preprocess_image(image):
image = cv2.resize(image, (200, 200))
image = image / 255.0
image = image.reshape(1, 200, 200, 3)
return image
# Function to predict the sign language letter
def predict_letter(image):
processed_image = preprocess_image(image)
predictions = model.predict(processed_image)
predicted_class = np.argmax(predictions, axis=1)[0]
sign_letter = class_labels[predicted_class]
return sign_letter
# Function to detect hands in the image
def detect_hands(image):
mp_hands = mp.solutions.hands
hands = mp_hands.Hands()
margin = 15
# Convert the image to RGB
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Process the image and get the hand landmarks
results = hands.process(image_rgb)
if results.multi_hand_landmarks:
for landmarks in results.multi_hand_landmarks:
# Get bounding box coordinates of the hand
landmarks_xy = [(int(landmark.x * image.shape[1]), int(landmark.y * image.shape[0]))
for landmark in landmarks.landmark]
# Define the bounding box for the hand
x_min = max(0, min(landmarks_xy, key=lambda x: x[0])[0] - margin)
y_min = max(0, min(landmarks_xy, key=lambda x: x[1])[1] - margin)
x_max = min(image.shape[1], max(landmarks_xy, key=lambda x: x[0])[0] + margin)
y_max = min(image.shape[0], max(landmarks_xy, key=lambda x: x[1])[1] + margin)
# Extract the hand region
roi = image[y_min:y_max, x_min:x_max]
# Check if the ROI is empty
if roi.size == 0:
continue
# Resize the ROI to match your model's input shape
roi = cv2.resize(roi, (200, 200), interpolation=cv2.INTER_AREA)
hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2RGB)
lower_yellow = np.array([93, 72, 51])
upper_yellow = np.array([224, 194, 183])
mask = cv2.inRange(hsv, lower_yellow, upper_yellow)
roi = cv2.bitwise_and(roi, roi, mask=mask)
roi = roi.reshape(1, 200, 200, 3) # Ensure it matches your model's input shape
# Make predictions using your classifier
predictions = model.predict(roi)
predicted_class = int(np.argmax(predictions, axis=1)[0])
result = class_labels[predicted_class]
# Draw result on the image
cv2.putText(image, str(result), (x_min, y_min - 10),
cv2.FONT_HERSHEY_COMPLEX, 1, (0, 255, 0), 2)
# Draw bounding box on the image
cv2.rectangle(image, (x_min, y_min), (x_max, y_max), (255, 0, 0), 2)
return image
# Streamlit app
st.title('Sign Language Recognition')
# Sidebar with radio button for Upload/Webcam
selected_option = st.sidebar.radio("Select Option", ["Upload", "Webcam"], index=0)
if selected_option == "Upload":
uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "png"])
if uploaded_file is not None:
if st.button('Predict'):
contents = uploaded_file.read()
nparr = np.frombuffer(contents, np.uint8)
image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
# Make the prediction
predicted_letter = predict_letter(image)
# Display the predicted letter
st.write('Predicted Letter:', predicted_letter)
elif selected_option == "Webcam":
# Placeholder for webcam frame
webcam_frame = st.empty()
# Placeholder for predicted letter in webcam mode
predicted_letter_webcam = st.empty()
# Placeholder for webcam capture status
webcam_capture_status = st.empty()
# Placeholder for webcam stop button
webcam_stop_button = st.empty()
# Placeholder for webcam status
webcam_status = st.empty()
# Placeholder for webcam button
webcam_button = st.button("Start Webcam")
if webcam_button:
webcam_status.text("Webcam is on.")
webcam_stop_button = st.button("Stop Webcam")
# OpenCV video capture
cap = cv2.VideoCapture(0)
while True:
# Read the frame from the webcam
ret, frame = cap.read()
# Display the frame in Streamlit
webcam_frame.image(frame, channels="BGR")
# Detect hands in the current frame
frame = detect_hands(frame)
# Convert the frame to JPEG format
_, jpeg = cv2.imencode(".jpg", frame)
# Display the predicted letter
predicted_letter = predict_letter(frame)
predicted_letter_webcam.text(f"Predicted Letter: {predicted_letter}")
# Check if the "Stop Webcam" button is clicked
if webcam_stop_button:
webcam_status.text("Webcam is off.")
break
# Release the webcam when done
cap.release()
|