Spaces:

CVMX-jaca-tonos
/

Sentiment-Analysis-of-Spanish-Transcribed-Audios

Runtime error

Sentiment-Analysis-of-Spanish-Transcribed-Audios

File size: 2,264 Bytes

1d3d4a8
 
 
 
 
 
71e5c37
1d3d4a8
 
 
 
 
 
 
ceeab94
1d3d4a8
 
ceeab94
1d3d4a8
 
 
 
 
 
619b35a
60c30fb
619b35a
3a603a5
 
6e845d7
 
 
 
3a603a5
 
 
 
 
 
1d3d4a8
 
 
 
 
 
 
619b35a
06c7cc8
1d3d4a8
3a603a5
1d3d4a8

import gradio as gr
import librosa
from transformers import AutoFeatureExtractor, pipeline


def load_and_fix_data(input_file, model_sampling_rate):
    speech, sample_rate = librosa.load(input_file) 
    if len(speech.shape) > 1:
        speech = speech[:, 0] + speech[:, 1]
    if sample_rate != model_sampling_rate:
        speech = librosa.resample(speech, sample_rate, model_sampling_rate)
    return speech


feature_extractor = AutoFeatureExtractor.from_pretrained("jonatasgrosman/wav2vec2-xls-r-1b-spanish")
sampling_rate = feature_extractor.sampling_rate

asr = pipeline("automatic-speech-recognition", model="jonatasgrosman/wav2vec2-xls-r-1b-spanish")


def predict_and_ctc_lm_decode(input_file):
    speech = load_and_fix_data(input_file, sampling_rate)
    transcribed_text = asr(speech, chunk_length_s=5, stride_length_s=1)["text"]
    pipe1 = pipeline("sentiment-analysis", model = "finiteautomata/beto-sentiment-analysis")
    sentiment = pipe1(transcribed_text)[0]["label"]
    return f"Detected Sentiment: {sentiment}"

description = """ This is a Gradio demo for Sentiment Analysis of Transcribed Spanish Audio. First, we do Speech to Text, and then we perform sentiment analysis on the obtained transcription of the input audio. 


**NOTE regarding predicted labels : NEG --> NEGATIVE, NEU--> NEUTRAL, POS--> POSITIVE**


Pre-trained model used for Spanish ASR: [jonatasgrosman/wav2vec2-xls-r-1b-spanish](https://huggingface.co/jonatasgrosman/wav2vec2-xls-r-1b-spanish)


Pre-trained model used for Sentiment Analysis of transcribed audio: [finiteautomata/beto-sentiment-analysis](https://huggingface.co/finiteautomata/beto-sentiment-analysis)
"""


gr.Interface(
    predict_and_ctc_lm_decode,
    inputs=[
        gr.inputs.Audio(source="microphone", type="filepath", label="Record your audio")
    ],
    #outputs=[gr.outputs.Label(num_top_classes=2),gr.outputs.Label(num_top_classes=2), gr.outputs.Label(num_top_classes=2)],
    outputs=[gr.outputs.Textbox(label="Predicción")],
    examples=[["audio_test.wav"], ["sample_audio.wav"], ["test2.wav"]],
    title="Sentiment Analysis of Spanish Transcribed Audio",
    description=description,
    layout="horizontal",
    theme="huggingface",
).launch(enable_queue=True, cache_examples=True)