import gradio as gr import whisper from transformers import pipeline model = whisper.load_model("base") sentiment_analysis = pipeline("sentiment-analysis", framework="pt", model="SamLowe/roberta-base-go_emotions") def analyze_sentiment(text): results = sentiment_analysis(text) sentiment_results = {result['label']: result['score'] for result in results} return sentiment_results def get_sentiment_emoji(sentiment): # Define the emojis corresponding to each sentiment emoji_mapping = { "disappointment": "😞", "sadness": "😢", "annoyance": "😠", "neutral": "😐", "disapproval": "👎", "realization": "😮", "nervousness": "😬", "approval": "👍", "joy": "😄", "anger": "😡", "embarrassment": "😳", "caring": "🤗", "remorse": "😔", "disgust": "🤢", "grief": "😥", "confusion": "😕", "relief": "😌", "desire": "😍", "admiration": "😌", "optimism": "😊", "fear": "😨", "love": "❤️", "excitement": "🎉", "curiosity": "🤔", "amusement": "😄", "surprise": "😲", "gratitude": "🙏", "pride": "🦁" } return emoji_mapping.get(sentiment, "") def display_sentiment_results(sentiment_results, option): sentiment_text = "" for sentiment, score in sentiment_results.items(): emoji = get_sentiment_emoji(sentiment) if option == "Sentiment Only": sentiment_text += f"{sentiment} {emoji}\n" elif option == "Sentiment + Score": sentiment_text += f"{sentiment} {emoji}: {score}\n" return sentiment_text def inference(audio, sentiment_option): audio = whisper.load_audio(audio) audio = whisper.pad_or_trim(audio) mel = whisper.log_mel_spectrogram(audio).to(model.device) _, probs = model.detect_language(mel) lang = max(probs, key=probs.get) options = whisper.DecodingOptions(fp16=False) result = whisper.decode(model, mel, options) sentiment_results = analyze_sentiment(result.text) sentiment_output = display_sentiment_results(sentiment_results, sentiment_option) return lang.upper(), result.text, sentiment_output title = """

Audio Sentiment Analysis

""" subtitle = """
Automatic Speech Recognition
""" image_path = "Arquitecture_W.jpg" description = """

With cross-modal interaction and AI (tools and pre-trained models in NLP), we can analyze large audio data in real-time, such as recorded conversations, customer service calls, or voice recordings, in order to identify and categorize emotions (from positive and neutral to sad and angry.


Components of the tool:
     - Input: Real-time multilingual
     - Video Call speech recognition
     - Pre-trained model: Whisper
     - Model size: Large with 769M Parameters
     - Encoder/Decoder Arquitecture
     - Transcribe, Translate, and Identify Audio
     - Output: Sentiment analysis

""" custom_css = """ banner-image { margin-left: auto; margin-right: auto; } chat-message { font-size: 300px; min-height: 600px; } img { border-radius: 8px; max-width: 100%; height: auto; } """ block = gr.Blocks(css=custom_css, theme='gradio/default',title="Analytics Projects by Ray Espinoza") #block = gr.Blocks(css=custom_css, title="Analytics Projects by Ray Espinoza") #block = gr.Blocks(css=".gradio-container {background-color: black}", title="Analytics Projects by Ray Espinoza") #block = gr.Blocks(css=".gradio-container {background: url('file=pic4.jpg')}", title="Analytics Projects by Ray Espinoza") with block: gr.HTML(title) gr.HTML(subtitle) with gr.Row(): with gr.Column(scale=2): gr.Image(image_path, elem_id="banner-image", show_label=False, show_download_button=False) #banner-image #gr.Markdown(value=image_path, elem_id="img") #gr.Image(image_path, elem_id="chat-message", show_label=False) with gr.Column(): gr.HTML(description) with gr.Group(): with gr.Box(): audio = gr.Audio( label="Input Audio", show_label=False,#Here#False source="microphone", type="filepath" ) sentiment_option = gr.Radio( choices=["Sentiment Only", "Sentiment + Score"], label="Select an option", default="Sentiment Only" ) btn = gr.Button("Execute: Transcribe",variant="primary") lang_str = gr.Textbox(label="Language:") text = gr.Textbox(label="Transcription:") sentiment_output = gr.Textbox(label="Sentiment Analysis Results:", output=True) btn.click(inference, inputs=[audio, sentiment_option], outputs=[lang_str, text, sentiment_output]) gr.HTML(''' ''') block.launch(share=True)