prashant-garg commited on
Commit
6a3ed8c
·
1 Parent(s): ccdfda0

working locally

Browse files
Files changed (3) hide show
  1. app.py +101 -3
  2. packages.txt +1 -0
  3. requirements.txt +1 -1
app.py CHANGED
@@ -1,9 +1,21 @@
 
 
 
1
  import streamlit as st
2
  import numpy as np
3
  import torch
4
  from transformers import AutoFeatureExtractor, AutoModelForAudioClassification
5
- from streamlit_webrtc import webrtc_streamer, AudioProcessorBase, WebRtcMode
6
- import av
 
 
 
 
 
 
 
 
 
7
 
8
  # Load Model and Feature Extractor
9
  @st.cache_resource
@@ -20,4 +32,90 @@ def load_model():
20
  placeholder = st.empty()
21
  placeholder.text("Loading model...")
22
  feature_extractor, model = load_model()
23
- placeholder.text("Model loaded!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import warnings
2
+ warnings.filterwarnings("ignore")
3
+
4
  import streamlit as st
5
  import numpy as np
6
  import torch
7
  from transformers import AutoFeatureExtractor, AutoModelForAudioClassification
8
+ import pyaudio
9
+ import logging
10
+
11
+ # Configure logging
12
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
13
+
14
+ # Define audio stream parameters
15
+ FORMAT = pyaudio.paInt16 # 16-bit resolution
16
+ CHANNELS = 1 # Mono audio
17
+ RATE = 16000 # 16kHz sampling rate
18
+ CHUNK = 1024 # Number of frames per buffer
19
 
20
  # Load Model and Feature Extractor
21
  @st.cache_resource
 
32
  placeholder = st.empty()
33
  placeholder.text("Loading model...")
34
  feature_extractor, model = load_model()
35
+ placeholder.text("Model loaded!")
36
+
37
+ st.title("Real-Time Gender Detection from Voice :microphone:")
38
+ st.write("Click 'Start' to detect gender in real-time.")
39
+ placeholder.empty()
40
+
41
+ # Initialize session state
42
+ if 'listening' not in st.session_state:
43
+ st.session_state['listening'] = False
44
+ if 'prediction' not in st.session_state:
45
+ st.session_state['prediction'] = ""
46
+
47
+ # Function to stop listening
48
+ def stop_listening():
49
+ """Stop the audio stream and update session state to stop listening."""
50
+ if 'stream' in st.session_state:
51
+ logging.info("Stopping stream")
52
+ st.session_state['stream'].stop_stream()
53
+ st.session_state['stream'].close()
54
+ if 'audio' in st.session_state:
55
+ logging.info("Stopping audio")
56
+ st.session_state['audio'].terminate()
57
+ st.session_state['listening'] = False
58
+ st.session_state['prediction'] = "Stopped listening, click 'Start Listening' to start again."
59
+ st.rerun()
60
+
61
+ def start_listening():
62
+ """Start the audio stream and continuously process audio for gender detection."""
63
+ try:
64
+ placeholder = st.empty()
65
+ audio = pyaudio.PyAudio()
66
+ stream = audio.open(format=FORMAT,
67
+ channels=CHANNELS,
68
+ rate=RATE,
69
+ input=True,
70
+ frames_per_buffer=CHUNK)
71
+
72
+ st.session_state['stream'] = stream
73
+ st.session_state['audio'] = audio
74
+ st.session_state['listening'] = True
75
+ st.session_state['prediction'] = "Listening........................"
76
+ placeholder.write("Listening for audio...")
77
+
78
+ while st.session_state['listening']:
79
+ audio_data = np.array([], dtype=np.float32)
80
+
81
+ for _ in range(int(RATE / CHUNK * 1.5)):
82
+ # Read audio chunk from the stream
83
+ data = stream.read(CHUNK, exception_on_overflow=False)
84
+
85
+ # Convert byte data to numpy array and normalize
86
+ chunk_data = np.frombuffer(data, dtype=np.int16).astype(np.float32) / 32768.0
87
+ audio_data = np.concatenate((audio_data, chunk_data))
88
+
89
+ # Check if there is significant sound
90
+ if np.max(np.abs(audio_data)) > 0.05: # Threshold for detecting sound
91
+ # Process the audio data
92
+ inputs = feature_extractor(audio_data, sampling_rate=RATE, return_tensors="pt", padding=True)
93
+ # Perform inference
94
+ with torch.no_grad():
95
+ logits = model(**inputs).logits
96
+ predicted_ids = torch.argmax(logits, dim=-1)
97
+
98
+ # Map predicted IDs to labels
99
+ predicted_label = model.config.id2label[predicted_ids.item()]
100
+
101
+ if predicted_label != st.session_state['prediction']:
102
+ st.session_state['prediction'] = predicted_label
103
+ # st.write(f"Detected Gender: {predicted_label}")
104
+ placeholder.write(f"Detected Gender: {predicted_label}")
105
+ else:
106
+ st.session_state['prediction'] = "---- No significant sound detected, skipping prediction. ----"
107
+ placeholder.empty()
108
+ placeholder.empty()
109
+ except Exception as e:
110
+ logging.error(f"An error occurred: {e}")
111
+ st.error(f"An error occurred: {e}")
112
+ stop_listening()
113
+
114
+
115
+ col1, col2 = st.columns(2)
116
+ with col1:
117
+ if st.button("Start"):
118
+ start_listening()
119
+ with col2:
120
+ if st.button("Stop"):
121
+ stop_listening()
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ portaudio19-dev
requirements.txt CHANGED
@@ -34,6 +34,7 @@ pandas==2.2.3
34
  pillow==11.1.0
35
  protobuf==5.29.3
36
  pyarrow==19.0.0
 
37
  pycparser==2.22
38
  pydeck==0.9.1
39
  pyee==12.1.1
@@ -52,7 +53,6 @@ safetensors==0.5.2
52
  six==1.17.0
53
  smmap==5.0.2
54
  streamlit==1.42.0
55
- streamlit-webrtc==0.47.9
56
  sympy==1.13.1
57
  tenacity==9.0.0
58
  tokenizers==0.21.0
 
34
  pillow==11.1.0
35
  protobuf==5.29.3
36
  pyarrow==19.0.0
37
+ PyAudio==0.2.14
38
  pycparser==2.22
39
  pydeck==0.9.1
40
  pyee==12.1.1
 
53
  six==1.17.0
54
  smmap==5.0.2
55
  streamlit==1.42.0
 
56
  sympy==1.13.1
57
  tenacity==9.0.0
58
  tokenizers==0.21.0