Spaces:

prashant-garg
/

gender-detection

Running

App Files Files Community

prashant-garg commited on Feb 16

Commit

3b0b181

1 Parent(s): ab02fe1

gender detection app

Browse files

Files changed (3) hide show

app.py +120 -2
requirements.txt +54 -0
runtime.txt +1 -0

app.py CHANGED Viewed

@@ -1,4 +1,122 @@
 import streamlit as st
-x = st.slider('Select a value')
-st.write(x, 'squared is', x * x)

+"""
+Streamlit application for real-time gender detection from audio input.
+Uses wav2vec2 model to analyze voice and predict speaker gender.
+"""
 import streamlit as st
+import pyaudio
+import numpy as np
+import torch
+from transformers import AutoFeatureExtractor, AutoModelForAudioClassification
+import logging
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+# Define audio stream parameters
+FORMAT = pyaudio.paInt16  # 16-bit resolution
+CHANNELS = 1              # Mono audio
+RATE = 16000              # 16kHz sampling rate
+CHUNK = 1024              # Number of frames per buffer
+@st.cache_resource
+def load_model():
+    """
+    Load the wav2vec2 model and feature extractor for gender recognition.
+    Returns:
+        tuple: A tuple containing the feature extractor and the model.
+    """
+    model_path = "alefiury/wav2vec2-large-xlsr-53-gender-recognition-librispeech"
+    # model_path = "./local-model"
+    feature_extractor = AutoFeatureExtractor.from_pretrained(model_path)
+    model = AutoModelForAudioClassification.from_pretrained(model_path)
+    model.eval()
+    logging.info("Model loaded successfully.")
+    return feature_extractor, model
+st.title("Gender Detection")
+# Initialize session state
+if 'listening' not in st.session_state:
+    st.session_state['listening'] = False
+if 'prediction' not in st.session_state:
+    st.session_state['prediction'] = ""
+# Function to stop listening
+def stop_listening():
+    """Stop the audio stream and update session state to stop listening."""
+    if 'stream' in st.session_state:
+        logging.info("Stopping stream")
+        st.session_state['stream'].stop_stream()
+        st.session_state['stream'].close()
+    if 'audio' in st.session_state:
+        logging.info("Stopping audio")
+        st.session_state['audio'].terminate()
+    st.session_state['listening'] = False
+    st.session_state['prediction'] = "Stopped listening, click 'Start Listening' to start again."
+    st.rerun()
+def start_listening():
+    """Start the audio stream and continuously process audio for gender detection."""
+    placeholder = st.empty()
+    try:
+        placeholder.write("Loading model...")
+        feature_extractor, model = load_model()
+        audio = pyaudio.PyAudio()
+        stream = audio.open(format=FORMAT,
+                            channels=CHANNELS,
+                            rate=RATE,
+                            input=True,
+                            frames_per_buffer=CHUNK)
+        st.session_state['stream'] = stream
+        st.session_state['audio'] = audio
+        st.session_state['listening'] = True
+        st.session_state['prediction'] = "Listening........................"
+        placeholder.write("Listening for audio...")
+        while st.session_state['listening']:
+            audio_data = np.array([], dtype=np.float32)
+            for _ in range(int(RATE / CHUNK * 1.5)):
+                # Read audio chunk from the stream
+                data = stream.read(CHUNK, exception_on_overflow=False)
+                # Convert byte data to numpy array and normalize
+                chunk_data = np.frombuffer(data, dtype=np.int16).astype(np.float32) / 32768.0
+                audio_data = np.concatenate((audio_data, chunk_data))
+            # Check if there is significant sound
+            if np.max(np.abs(audio_data)) > 0.05:  # Threshold for detecting sound
+                # Process the audio data
+                inputs = feature_extractor(audio_data, sampling_rate=RATE, return_tensors="pt", padding=True)
+                # Perform inference
+                with torch.no_grad():
+                    logits = model(**inputs).logits
+                    predicted_ids = torch.argmax(logits, dim=-1)
+                    # Map predicted IDs to labels
+                    predicted_label = model.config.id2label[predicted_ids.item()]
+                    if predicted_label != st.session_state['prediction']:
+                        st.session_state['prediction'] = predicted_label
+                        # st.write(f"Detected Gender: {predicted_label}")
+                        placeholder.write(f"Detected Gender: {predicted_label}")
+            else:
+                st.session_state['prediction'] = "---- No significant sound detected, skipping prediction. ----"
+                placeholder.empty()
+        placeholder.empty()
+    except Exception as e:
+        logging.error(f"An error occurred: {e}")
+        st.error(f"An error occurred: {e}")
+        stop_listening()
+# Buttons to start and stop listening
+col1, col2 = st.columns(2)
+with col1:
+    if st.button("Start Listening"):
+        start_listening()
+with col2:
+    if st.button("Stop Listening"):
+        stop_listening()

requirements.txt ADDED Viewed

	@@ -0,0 +1,54 @@

+altair==5.5.0
+attrs==25.1.0
+blinker==1.9.0
+cachetools==5.5.1
+certifi==2025.1.31
+charset-normalizer==3.4.1
+click==8.1.8
+filelock==3.17.0
+fsspec==2025.2.0
+gitdb==4.0.12
+GitPython==3.1.44
+huggingface-hub==0.28.1
+idna==3.10
+Jinja2==3.1.5
+jsonschema==4.23.0
+jsonschema-specifications==2024.10.1
+markdown-it-py==3.0.0
+MarkupSafe==3.0.2
+mdurl==0.1.2
+mpmath==1.3.0
+narwhals==1.26.0
+networkx==3.4.2
+numpy==2.2.3
+packaging==24.2
+pandas==2.2.3
+pillow==11.1.0
+protobuf==5.29.3
+pyarrow==19.0.0
+PyAudio==0.2.14
+pydeck==0.9.1
+Pygments==2.19.1
+python-dateutil==2.9.0.post0
+pytz==2025.1
+PyYAML==6.0.2
+referencing==0.36.2
+regex==2024.11.6
+requests==2.32.3
+rich==13.9.4
+rpds-py==0.22.3
+safetensors==0.5.2
+six==1.17.0
+smmap==5.0.2
+streamlit==1.42.0
+sympy==1.13.1
+tenacity==9.0.0
+tokenizers==0.21.0
+toml==0.10.2
+torch==2.6.0
+tornado==6.4.2
+tqdm==4.67.1
+transformers==4.48.3
+typing_extensions==4.12.2
+tzdata==2025.1
+urllib3==2.3.0

runtime.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ python-3.10