Ajay Karthick Senthil Kumar commited on
Commit
9ddd8d9
·
1 Parent(s): cd2b531
Files changed (5) hide show
  1. .gitignore +1 -0
  2. app.py +115 -0
  3. config.py +31 -0
  4. model/asl_model.h5 +3 -0
  5. requirements.txt +5 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ __pycache__
app.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import cv2
3
+ import numpy as np
4
+ from tensorflow.keras.models import load_model
5
+ import mediapipe as mp
6
+
7
+ # Load your label to alphabet mapping
8
+ from config import label_to_alphabet # Ensure this file has the correct mapping
9
+
10
+ # Load the saved ASL model
11
+ model = load_model("model/asl_model.h5")
12
+
13
+ # Initialize MediaPipe for hand detection
14
+ mp_hands = mp.solutions.hands
15
+ hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1, min_detection_confidence=0.5)
16
+ mp_drawing = mp.solutions.drawing_utils # For hand landmark drawing
17
+
18
+ def detect_and_crop_hand(image):
19
+ """
20
+ Detect the hand in the image, crop the region, and return the cropped hand image.
21
+ """
22
+ # Convert the image to RGB format (required by MediaPipe)
23
+ rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
24
+
25
+ # Detect hand landmarks
26
+ results = hands.process(rgb_image)
27
+
28
+ if results.multi_hand_landmarks:
29
+ for hand_landmarks in results.multi_hand_landmarks:
30
+ # Get the image dimensions
31
+ h, w, _ = image.shape
32
+ x_min = w
33
+ y_min = h
34
+ x_max = y_max = 0
35
+
36
+ # Loop through landmarks to determine bounding box for the hand
37
+ for landmark in hand_landmarks.landmark:
38
+ x, y = int(landmark.x * w), int(landmark.y * h)
39
+ x_min = min(x, x_min)
40
+ y_min = min(y, y_min)
41
+ x_max = max(x, x_max)
42
+ y_max = max(y, y_max)
43
+
44
+ # Crop the hand portion from the image
45
+ cropped_hand = image[y_min:y_max, x_min:x_max]
46
+
47
+ # Optional: Draw the landmarks on the original image for visualization (for debugging)
48
+ mp_drawing.draw_landmarks(image, hand_landmarks, mp_hands.HAND_CONNECTIONS)
49
+
50
+ return cropped_hand
51
+
52
+ # If no hand is detected, return None
53
+ return None
54
+
55
+ def preprocess_hand_image(hand_image):
56
+ """
57
+ Preprocess the cropped hand image for the ASL recognition model.
58
+ This involves resizing, normalizing, and reshaping the image.
59
+ """
60
+ # Resize the image to 150x150 pixels (or your model's input size)
61
+ hand_image_resized = cv2.resize(hand_image, (150, 150))
62
+
63
+ # Normalize the image (scale pixel values to [0, 1])
64
+ hand_image_normalized = hand_image_resized / 255.0
65
+
66
+ # Reshape the image to match the model's expected input shape (1, 150, 150, 3)
67
+ hand_image_reshaped = np.expand_dims(hand_image_normalized, axis=0)
68
+
69
+ return hand_image_reshaped
70
+
71
+ def predict_asl_alphabet(cropped_hand):
72
+ """
73
+ Feed the cropped hand image into the ASL recognition model and return the predicted alphabet.
74
+ """
75
+ # Preprocess the hand image
76
+ processed_hand = preprocess_hand_image(cropped_hand)
77
+
78
+ # Make the prediction using the ASL model
79
+ predictions = model.predict(processed_hand)
80
+
81
+ # Get the predicted label (the index of the highest predicted probability)
82
+ predicted_label = np.argmax(predictions[0])
83
+
84
+ # Map the label to the corresponding alphabet
85
+ predicted_alphabet = label_to_alphabet[predicted_label]
86
+
87
+ return predicted_alphabet
88
+
89
+ # Gradio interface function
90
+ def process_video_frame(image):
91
+ """
92
+ Process the webcam feed to detect, crop the hand, and predict the ASL alphabet.
93
+ """
94
+ # Detect and crop the hand from the image
95
+ cropped_hand = detect_and_crop_hand(image)
96
+
97
+ if cropped_hand is None:
98
+ return "No hand detected"
99
+
100
+ # Predict the ASL alphabet using the cropped hand image
101
+ predicted_alphabet = predict_asl_alphabet(cropped_hand)
102
+
103
+ return predicted_alphabet # Return the predicted alphabet
104
+
105
+ # Gradio interface setup
106
+ iface = gr.Interface(
107
+ fn=process_video_frame,
108
+ inputs=gr.Image(sources=["webcam"], streaming=True), # Webcam input
109
+ outputs="text", # Display the predicted alphabet
110
+ live=True, # Enable live video streaming
111
+ description="Real-Time ASL Hand Gesture Recognition"
112
+ )
113
+
114
+ # Launch the Gradio app
115
+ iface.launch()
config.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ label_to_alphabet = {
2
+ 0: 'A',
3
+ 1: 'B',
4
+ 2: 'C',
5
+ 3: 'D',
6
+ 4: 'E',
7
+ 5: 'F',
8
+ 6: 'G',
9
+ 7: 'H',
10
+ 8: 'I',
11
+ 9: 'J',
12
+ 10: 'K',
13
+ 11: 'L',
14
+ 12: 'M',
15
+ 13: 'N',
16
+ 14: 'O',
17
+ 15: 'P',
18
+ 16: 'Q',
19
+ 17: 'R',
20
+ 18: 'S',
21
+ 19: 'T',
22
+ 20: 'U',
23
+ 21: 'V',
24
+ 22: 'W',
25
+ 23: 'X',
26
+ 24: 'Y',
27
+ 25: 'Z',
28
+ 26: 'del',
29
+ 27: 'nothing',
30
+ 28: 'space'
31
+ }
model/asl_model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0f623d3b7f9aaf48f50efc73eb279ca6127b0a7518bb1def5971ea01238bf59
3
+ size 510065040
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio
2
+ opencv-python==4.7.0.72
3
+ numpy==1.23.5
4
+ tensorflow
5
+ mediapipe