File size: 3,985 Bytes
00ae0ce
 
f52a928
00ae0ce
f52a928
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0a29c8e
f52a928
 
 
 
 
dc5c04c
f52a928
 
 
 
dc5c04c
f52a928
 
dc5c04c
f52a928
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dc5c04c
f52a928
 
dc5c04c
f52a928
dc5c04c
f52a928
 
 
 
 
dc5c04c
f52a928
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dc5c04c
00ae0ce
f52a928
 
 
 
 
 
 
 
00ae0ce
f52a928
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import gradio as gr
from transformers import pipeline
import numpy as np

def create_speech_analyzer():
    """Initialize the speech-to-text and sentiment analysis models"""
    try:
        # Load Faster Whisper for improved speech recognition
        transcriber = pipeline(
            "automatic-speech-recognition", 
            model="openai/whisper-small",  # Using smaller model for faster processing
            max_new_tokens=128
        )
        
        # Load RoBERTa model for more nuanced sentiment analysis
        sentiment_model = pipeline(
            "sentiment-analysis",
            model="cardiffnlp/twitter-roberta-base-sentiment-latest"
        )
        
        return transcriber, sentiment_model
    
    except Exception as e:
        raise RuntimeError(f"Error loading models: {str(e)}")

def analyze_speech(audio_file):
    """
    Analyze speech for transcription and emotional content.
    Returns both the transcription and detailed sentiment analysis.
    """
    try:
        transcriber, sentiment_model = create_speech_analyzer()
        
        # Get transcription
        transcription = transcriber(audio_file)["text"]
        
        # RoBERTa provides more detailed sentiment analysis
        sentiment_result = sentiment_model(transcription)[0]
        
        # Map sentiment labels to more readable format
        sentiment_mapping = {
            'LABEL_0': 'Negative',
            'LABEL_1': 'Neutral',
            'LABEL_2': 'Positive'
        }
        
        # Get the sentiment label and score
        sentiment = sentiment_mapping.get(sentiment_result['label'], sentiment_result['label'])
        confidence = sentiment_result['score']
        
        # Analyze sentiment of smaller chunks for longer texts
        if len(transcription.split()) > 50:
            # Split into sentences or chunks
            chunks = transcription.split('.')
            chunk_sentiments = [sentiment_model(chunk)[0] for chunk in chunks if len(chunk.strip()) > 0]
            
            # Calculate average sentiment
            avg_sentiment = np.mean([s['score'] for s in chunk_sentiments])
            sentiment_variation = np.std([s['score'] for s in chunk_sentiments])
            
            detailed_analysis = (
                f"Overall Sentiment: {sentiment} ({confidence:.2%})\n"
                f"Sentiment Stability: {1 - sentiment_variation:.2%}\n"
                f"Text chunks analyzed: {len(chunk_sentiments)}"
            )
        else:
            detailed_analysis = f"Sentiment: {sentiment} ({confidence:.2%})"
        
        return {
            "transcription": transcription,
            "sentiment": sentiment,
            "analysis": detailed_analysis
        }
        
    except Exception as e:
        return {
            "transcription": f"Error in processing: {str(e)}",
            "sentiment": "ERROR",
            "analysis": "Analysis failed"
        }

def create_interface():
    """Create and configure the Gradio interface"""
    return gr.Interface(
        fn=analyze_speech,
        inputs=gr.Audio(
            sources=["microphone", "upload"],
            type="filepath",
            label="Upload or Record Audio"
        ),
        outputs=[
            gr.Textbox(label="Transcription", lines=3),
            gr.Textbox(label="Sentiment"),
            gr.Textbox(label="Detailed Analysis", lines=3)
        ],
        title="Advanced Speech Sentiment Analyzer",
        description="""
        This tool performs advanced sentiment analysis on speech using RoBERTa.
        It provides detailed sentiment analysis for longer texts and handles
        both audio uploads and microphone recordings.
        """,
        theme=gr.themes.Soft(),
        examples=[]
    )

def main():
    """Launch the application"""
    interface = create_interface()
    interface.launch(
        share=True,
        debug=True,
        server_name="0.0.0.0"
    )

if __name__ == "__main__":
    main()