Spaces:

AkashKhatri
/

sign-language-interpretor

Runtime error

File size: 5,233 Bytes

6e5598c

import streamlit as st
import numpy as np
from keras.models import load_model
import cv2
from io import BytesIO
import mediapipe as mp

# Load the model
model = load_model('sign_asl_cnn_30_epochs.h5')
class_labels = {i: str(i) if i < 10 else chr(65 + i - 10) for i in range(36)}

# Function to preprocess the image
def preprocess_image(image):
    image = cv2.resize(image, (200, 200))
    image = image / 255.0
    image = image.reshape(1, 200, 200, 3)
    return image

# Function to predict the sign language letter
def predict_letter(image):
    processed_image = preprocess_image(image)
    predictions = model.predict(processed_image)
    predicted_class = np.argmax(predictions, axis=1)[0]
    sign_letter = class_labels[predicted_class]
    return sign_letter

# Function to detect hands in the image
def detect_hands(image):
    mp_hands = mp.solutions.hands
    hands = mp_hands.Hands()
    margin = 15

    # Convert the image to RGB
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Process the image and get the hand landmarks
    results = hands.process(image_rgb)

    if results.multi_hand_landmarks:
        for landmarks in results.multi_hand_landmarks:
            # Get bounding box coordinates of the hand
            landmarks_xy = [(int(landmark.x * image.shape[1]), int(landmark.y * image.shape[0]))
                            for landmark in landmarks.landmark]

            # Define the bounding box for the hand
            x_min = max(0, min(landmarks_xy, key=lambda x: x[0])[0] - margin)
            y_min = max(0, min(landmarks_xy, key=lambda x: x[1])[1] - margin)
            x_max = min(image.shape[1], max(landmarks_xy, key=lambda x: x[0])[0] + margin)
            y_max = min(image.shape[0], max(landmarks_xy, key=lambda x: x[1])[1] + margin)

            # Extract the hand region
            roi = image[y_min:y_max, x_min:x_max]

            # Check if the ROI is empty
            if roi.size == 0:
                continue

            # Resize the ROI to match your model's input shape
            roi = cv2.resize(roi, (200, 200), interpolation=cv2.INTER_AREA)
            hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2RGB)

            lower_yellow = np.array([93, 72, 51])
            upper_yellow = np.array([224, 194, 183])
            mask = cv2.inRange(hsv, lower_yellow, upper_yellow)
            roi = cv2.bitwise_and(roi, roi, mask=mask)
            roi = roi.reshape(1, 200, 200, 3)  # Ensure it matches your model's input shape

            # Make predictions using your classifier
            predictions = model.predict(roi)
            predicted_class = int(np.argmax(predictions, axis=1)[0])
            result = class_labels[predicted_class]

            # Draw result on the image
            cv2.putText(image, str(result), (x_min, y_min - 10),
                        cv2.FONT_HERSHEY_COMPLEX, 1, (0, 255, 0), 2)

            # Draw bounding box on the image
            cv2.rectangle(image, (x_min, y_min), (x_max, y_max), (255, 0, 0), 2)

    return image

# Streamlit app
st.title('Sign Language Recognition')

# Sidebar with radio button for Upload/Webcam
selected_option = st.sidebar.radio("Select Option", ["Upload", "Webcam"], index=0)

if selected_option == "Upload":
    uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "png"])

    if uploaded_file is not None:
        if st.button('Predict'):
            contents = uploaded_file.read()
            nparr = np.frombuffer(contents, np.uint8)
            image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)

            # Make the prediction
            predicted_letter = predict_letter(image)

            # Display the predicted letter
            st.write('Predicted Letter:', predicted_letter)

elif selected_option == "Webcam":
    # Placeholder for webcam frame
    webcam_frame = st.empty()

    # Placeholder for predicted letter in webcam mode
    predicted_letter_webcam = st.empty()

    # Placeholder for webcam capture status
    webcam_capture_status = st.empty()

    # Placeholder for webcam stop button
    webcam_stop_button = st.empty()

    # Placeholder for webcam status
    webcam_status = st.empty()

    # Placeholder for webcam button
    webcam_button = st.button("Start Webcam")

    if webcam_button:
        webcam_status.text("Webcam is on.")
        webcam_stop_button = st.button("Stop Webcam")

        # OpenCV video capture
        cap = cv2.VideoCapture(0)

        while True:
            # Read the frame from the webcam
            ret, frame = cap.read()

            # Display the frame in Streamlit
            webcam_frame.image(frame, channels="BGR")

            # Detect hands in the current frame
            frame = detect_hands(frame)

            # Convert the frame to JPEG format
            _, jpeg = cv2.imencode(".jpg", frame)

            # Display the predicted letter
            predicted_letter = predict_letter(frame)
            predicted_letter_webcam.text(f"Predicted Letter: {predicted_letter}")

            # Check if the "Stop Webcam" button is clicked
            if webcam_stop_button:
                webcam_status.text("Webcam is off.")
                break

        # Release the webcam when done
        cap.release()