Spaces:

invincible-jha
/

MentalHealthVocalBiomarkers

Runtime error

App Files Files Community

invincible-jha commited on Nov 18, 2024

Commit

822dda9

verified ·

1 Parent(s): 78a3bb0

Upload 9 files

Browse files

Files changed (9) hide show

analyzer.py +61 -0
app.py +62 -0
audio-processor.py +55 -0
gpu-optimizer.py +30 -0
model-cache.py +18 -0
model-manager.py +79 -0
readme.md +38 -0
requirements.txt +9 -0
visualizer.py +74 -0

analyzer.py ADDED Viewed

	@@ -0,0 +1,61 @@

+from .model_manager import ModelManager
+from .audio_processor import AudioProcessor
+from typing import Dict
+class Analyzer:
+    def __init__(self, model_manager: ModelManager, audio_processor: AudioProcessor):
+        self.model_manager = model_manager
+        self.audio_processor = audio_processor
+        self.model_manager.load_models()
+    def analyze(self, audio_path: str) -> Dict:
+        # Process audio
+        waveform, features = self.audio_processor.process_audio(audio_path)
+        # Get transcription
+        transcription = self.model_manager.transcribe(waveform)
+        # Analyze emotions
+        emotions = self.model_manager.analyze_emotions(transcription)
+        # Analyze mental health indicators
+        mental_health = self.model_manager.analyze_mental_health(transcription)
+        # Combine analysis with audio features
+        mental_health = self._combine_analysis(mental_health, features)
+        return {
+            'transcription': transcription,
+            'emotions': {
+                'scores': emotions,
+                'dominant_emotion': max(emotions.items(), key=lambda x: x[1])[0]
+            },
+            'mental_health_indicators': mental_health,
+            'audio_features': features
+        }
+    def _combine_analysis(self, mental_health: Dict, features: Dict) -> Dict:
+        """Combine mental health analysis with audio features"""
+        # Adjust risk scores based on audio features
+        energy_level = features['energy']['mean']
+        pitch_variability = features['pitch']['std']
+        # Simple risk score adjustment based on audio features
+        mental_health['depression_risk'] = (
+            mental_health['depression_risk'] * 0.7 +
+            (1 - energy_level) * 0.3  # Lower energy may indicate depression
+        )
+        mental_health['anxiety_risk'] = (
+            mental_health['anxiety_risk'] * 0.7 +
+            pitch_variability * 0.3  # Higher pitch variability may indicate anxiety
+        )
+        # Add confidence scores
+        mental_health['confidence'] = {
+            'depression': 0.8,  # Example confidence scores
+            'anxiety': 0.8,
+            'stress': 0.7
+        }
+        return mental_health

app.py ADDED Viewed

	@@ -0,0 +1,62 @@

+import gradio as gr
+from src.models import ModelManager, AudioProcessor, Analyzer
+from src.utils import visualizer, GPUOptimizer, ModelCache
+# Initialize components
+optimizer = GPUOptimizer()
+optimizer.optimize()
+model_manager = ModelManager()
+audio_processor = AudioProcessor()
+analyzer = Analyzer(model_manager, audio_processor)
+cache = ModelCache()
+def process_audio(audio_file):
+    try:
+        # Check cache
+        with open(audio_file, 'rb') as f:
+            cache_key = cache.get_cache_key(f.read())
+        cached_result = cache.cache_result(cache_key, None)
+        if cached_result:
+            return cached_result
+        # Process audio
+        results = analyzer.analyze(audio_file)
+        # Format outputs
+        outputs = (
+            results['transcription'],
+            visualizer.create_emotion_plot(results['emotions']['scores']),
+            _format_indicators(results['mental_health_indicators'])
+        )
+        # Cache results
+        cache.cache_result(cache_key, outputs)
+        return outputs
+    except Exception as e:
+        return str(e), "Error in analysis", "Error in analysis"
+def _format_indicators(indicators):
+    return f"""
+    ### Mental Health Indicators
+    - Depression Risk: {indicators['depression_risk']:.2f}
+    - Anxiety Risk: {indicators['anxiety_risk']:.2f}
+    - Stress Level: {indicators['stress_level']:.2f}
+    """
+interface = gr.Interface(
+    fn=process_audio,
+    inputs=gr.Audio(source="microphone", type="filepath"),
+    outputs=[
+        gr.Textbox(label="Transcription"),
+        gr.HTML(label="Emotion Analysis"),
+        gr.Markdown(label="Mental Health Indicators")
+    ],
+    title="Vocal Biomarker Analysis",
+    description="Analyze voice for emotional and mental health indicators"
+)
+interface.launch()

audio-processor.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import librosa
+import numpy as np
+from typing import Dict, Tuple
+class AudioProcessor:
+    def __init__(self):
+        self.sample_rate = 16000
+        self.n_mfcc = 13
+        self.n_mels = 128
+    def process_audio(self, audio_path: str) -> Tuple[np.ndarray, Dict]:
+        # Load and preprocess audio
+        waveform, sr = librosa.load(audio_path, sr=self.sample_rate)
+        # Extract features
+        features = {
+            'mfcc': self._extract_mfcc(waveform),
+            'pitch': self._extract_pitch(waveform),
+            'energy': self._extract_energy(waveform)
+        }
+        return waveform, features
+    def _extract_mfcc(self, waveform: np.ndarray) -> np.ndarray:
+        mfccs = librosa.feature.mfcc(
+            y=waveform,
+            sr=self.sample_rate,
+            n_mfcc=self.n_mfcc
+        )
+        return mfccs.mean(axis=1)
+    def _extract_pitch(self, waveform: np.ndarray) -> Dict:
+        f0, voiced_flag, voiced_probs = librosa.pyin(
+            waveform,
+            fmin=librosa.note_to_hz('C2'),
+            fmax=librosa.note_to_hz('C7'),
+            sr=self.sample_rate
+        )
+        return {
+            'mean': float(np.nanmean(f0)),
+            'std': float(np.nanstd(f0)),
+            'max': float(np.nanmax(f0)),
+            'min': float(np.nanmin(f0))
+        }
+    def _extract_energy(self, waveform: np.ndarray) -> Dict:
+        rms = librosa.feature.rms(y=waveform)[0]
+        return {
+            'mean': float(np.mean(rms)),
+            'std': float(np.std(rms)),
+            'max': float(np.max(rms)),
+            'min': float(np.min(rms))
+        }

gpu-optimizer.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import torch
+import gc
+class GPUOptimizer:
+    def __init__(self):
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    def optimize(self):
+        if torch.cuda.is_available():
+            # Clear cache
+            torch.cuda.empty_cache()
+            gc.collect()
+            # Set memory fraction
+            torch.cuda.set_per_process_memory_fraction(0.9)
+            # Enable TF32 for better performance
+            torch.backends.cuda.matmul.allow_tf32 = True
+            torch.backends.cudnn.allow_tf32 = True
+            # Enable autocast for mixed precision
+            torch.cuda.amp.autocast(enabled=True)
+    def get_memory_usage(self):
+        if torch.cuda.is_available():
+            return {
+                'allocated': torch.cuda.memory_allocated() / 1024**2,  # MB
+                'reserved': torch.cuda.memory_reserved() / 1024**2     # MB
+            }
+        return {'allocated': 0, 'reserved': 0}

model-cache.py ADDED Viewed

	@@ -0,0 +1,18 @@

+from functools import lru_cache
+import hashlib
+import json
+class ModelCache:
+    def __init__(self, cache_size=128):
+        self.cache_size = cache_size
+    @lru_cache(maxsize=128)
+    def cache_result(self, input_key, result):
+        return result
+    def get_cache_key(self, audio_data):
+        # Create hash of audio data for cache key
+        return hashlib.md5(audio_data).hexdigest()
+    def clear_cache(self):
+        self.cache_result.cache_clear()

model-manager.py ADDED Viewed

	@@ -0,0 +1,79 @@

+from transformers import (
+    WhisperProcessor, WhisperForConditionalGeneration,
+    AutoModelForSequenceClassification, AutoTokenizer
+)
+import torch
+class ModelManager:
+    def __init__(self):
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.models = {}
+        self.tokenizers = {}
+        self.processors = {}
+    def load_models(self):
+        # Load Whisper for speech recognition
+        self.processors['whisper'] = WhisperProcessor.from_pretrained("openai/whisper-base")
+        self.models['whisper'] = WhisperForConditionalGeneration.from_pretrained(
+            "openai/whisper-base"
+        ).to(self.device)
+        # Load EmoBERTa for emotion detection
+        self.tokenizers['emotion'] = AutoTokenizer.from_pretrained("arpanghoshal/EmoRoBERTa")
+        self.models['emotion'] = AutoModelForSequenceClassification.from_pretrained(
+            "arpanghoshal/EmoRoBERTa"
+        ).to(self.device)
+        # Load ClinicalBERT for analysis
+        self.tokenizers['clinical'] = AutoTokenizer.from_pretrained(
+            "emilyalsentzer/Bio_ClinicalBERT"
+        )
+        self.models['clinical'] = AutoModelForSequenceClassification.from_pretrained(
+            "emilyalsentzer/Bio_ClinicalBERT"
+        ).to(self.device)
+    def transcribe(self, audio_input):
+        inputs = self.processors['whisper'](
+            audio_input,
+            return_tensors="pt"
+        ).input_features.to(self.device)
+        generated_ids = self.models['whisper'].generate(inputs)
+        transcription = self.processors['whisper'].batch_decode(
+            generated_ids,
+            skip_special_tokens=True
+        )[0]
+        return transcription
+    def analyze_emotions(self, text):
+        inputs = self.tokenizers['emotion'](
+            text,
+            return_tensors="pt",
+            padding=True,
+            truncation=True,
+            max_length=512
+        ).to(self.device)
+        outputs = self.models['emotion'](**inputs)
+        probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
+        emotions = ['anger', 'fear', 'joy', 'love', 'sadness', 'surprise']
+        return {emotion: float(prob) for emotion, prob in zip(emotions, probs[0])}
+    def analyze_mental_health(self, text):
+        inputs = self.tokenizers['clinical'](
+            text,
+            return_tensors="pt",
+            padding=True,
+            truncation=True,
+            max_length=512
+        ).to(self.device)
+        outputs = self.models['clinical'](**inputs)
+        scores = torch.sigmoid(outputs.logits)
+        return {
+            'depression_risk': float(scores[0][0]),
+            'anxiety_risk': float(scores[0][1]),
+            'stress_level': float(scores[0][2])
+        }

readme.md ADDED Viewed

	@@ -0,0 +1,38 @@

+---
+title: Vocal Biomarker Analysis
+emoji: 🎤
+colorFrom: blue
+colorTo: purple
+sdk: gradio
+sdk_version: 4.12.0
+python_version: 3.10
+app_file: app.py
+pinned: false
+license: mit
+---
+# Vocal Biomarker Analysis
+This application analyzes voice recordings to detect emotional and mental health indicators using AI models.
+## Features
+- Speech-to-text transcription
+- Emotion detection
+- Mental health risk assessment
+- Real-time visualization
+## Models
+- Whisper Base (Speech Recognition)
+- EmoBERTa (Emotion Detection)
+- ClinicalBERT (Analysis)
+## Usage
+1. Record audio or upload file
+2. Click analyze
+3. View results:
+   - Transcription
+   - Emotion analysis
+   - Mental health indicators
+## License
+MIT License

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+gradio==4.12.0
+torch==2.1.0
+transformers==4.36.0
+librosa==0.10.1
+numpy==1.24.3
+plotly==5.18.0
+scipy==1.11.3
+soundfile==0.12.1
+pandas==2.1.1

visualizer.py ADDED Viewed

	@@ -0,0 +1,74 @@

+import plotly.graph_objects as go
+from typing import Dict
+def create_emotion_plot(emotions: Dict[str, float]) -> str:
+    """Create emotion distribution plot"""
+    fig = go.Figure()
+    # Add bar plot
+    fig.add_trace(go.Bar(
+        x=list(emotions.keys()),
+        y=list(emotions.values()),
+        marker_color='rgb(55, 83, 109)'
+    ))
+    # Update layout
+    fig.update_layout(
+        title='Emotion Distribution',
+        xaxis_title='Emotion',
+        yaxis_title='Score',
+        yaxis_range=[0, 1],
+        template='plotly_white',
+        height=400
+    )
+    return fig.to_html(include_plotlyjs=True)
+def create_pitch_plot(pitch_data: Dict) -> str:
+    """Create pitch analysis plot"""
+    fig = go.Figure()
+    # Add box plot
+    fig.add_trace(go.Box(
+        y=[pitch_data['min'], pitch_data['mean'], pitch_data['max']],
+        name='Pitch Distribution',
+        boxpoints='all'
+    ))
+    # Update layout
+    fig.update_layout(
+        title='Pitch Analysis',
+        yaxis_title='Frequency (Hz)',
+        template='plotly_white',
+        height=400
+    )
+    return fig.to_html(include_plotlyjs=True)
+def create_energy_plot(energy_data: Dict) -> str:
+    """Create energy analysis plot"""
+    fig = go.Figure()
+    # Add indicator
+    fig.add_trace(go.Indicator(
+        mode='gauge+number',
+        value=energy_data['mean'],
+        title={'text': 'Voice Energy Level'},
+        gauge={
+            'axis': {'range': [0, 1]},
+            'bar': {'color': 'darkblue'},
+            'steps': [
+                {'range': [0, 0.3], 'color': 'lightgray'},
+                {'range': [0.3, 0.7], 'color': 'gray'},
+                {'range': [0.7, 1], 'color': 'darkgray'}
+            ]
+        }
+    ))
+    # Update layout
+    fig.update_layout(
+        height=300,
+        template='plotly_white'
+    )
+    return fig.to_html(include_plotlyjs=True)