File size: 3,012 Bytes
d9e3953
afc5c99
ee55457
 
 
d9e3953
ee55457
 
 
 
 
 
 
 
d9e3953
 
 
ee55457
d9e3953
ee55457
d9e3953
 
 
ee55457
d9e3953
 
ee55457
d9e3953
 
 
 
826ab06
d9e3953
 
34b8794
d9e3953
f9697a4
d9e3953
08ca6e7
826ab06
81b2874
f1e7215
 
1e4b89a
 
 
 
 
f1e7215
 
 
c6bdae3
f1e7215
 
48b85f9
ee55457
 
d9e3953
1e4b89a
 
 
 
378d519
ee55457
 
8f07d61
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#importing all the required libraries
import gradio as gr 
import librosa
from transformers import AutoFeatureExtractor, pipeline

#Loading and fixing the audio input
def load_and_fix_data(input_file, model_sampling_rate):
    speech, sample_rate = librosa.load(input_file)
    if len(speech.shape) > 1:
        speech = speech[:, 0] + speech[:, 1]
    if sample_rate != model_sampling_rate:
        speech = librosa.resample(speech, sample_rate, model_sampling_rate)
    return speech

#Loading the feature extractor and instantiating the pipeline
model_name1 = "jonatasgrosman/wav2vec2-xls-r-1b-spanish"
feature_extractor = AutoFeatureExtractor.from_pretrained(model_name1)
sampling_rate = feature_extractor.sampling_rate
asr = pipeline("automatic-speech-recognition", model=model_name1)

#Instantiating a pipeline for harassment detection (text classification)
model_name2 = "hackathon-pln-es/Detect-Acoso-Twitter-Es"
classifier = pipeline("text-classification", model = model_name2)

#Defining a function for speech-to-text-conversion
def speech_to_text(input_file):
    speech = load_and_fix_data(input_file, sampling_rate)
    transcribed_text = asr(speech, chunk_length_s=15, stride_length_s=1)["text"]
    return transcribed_text
    
#Defining a function for Harassment detection (text classification)
def harassment_detector(transcribed_text):
    harassment_detection = classifier(transcribed_text)[0]["label"]
    return harassment_detection

#Defining a function which outputs audio transcription and the output of harassment detection module
new_line = "\n\n\n"
def asr_and_harassment_detection(input_file):
    transcribed_text = speech_to_text(input_file)
    harassment_detection = harassment_detector(transcribed_text)
    return f"Audio Transcription :{transcribed_text} {new_line} Audio content is: {harassment_detection}"


inputs=[gr.inputs.Audio(source="microphone", type="filepath", label="Record your audio")]
outputs=[gr.outputs.Textbox(label="Predicción")]
examples=[["audio2.wav"], ["sample_audio.wav"], ["test1.wav"], ["test2.wav"]]
title="Spanish-Audio-Transcription-based-Harassment-Detection"

description = """ This is a Gradio demo for Spanish audio transcription-based harassment detection. To use this, simply provide an audio input (audio recording or via microphone), which will subsequently be transcribed and classified as Harassment/non-harassment pertaining to audio (transcription) with the help of pre-trained models.


Pre-trained model used for Spanish ASR: [jonatasgrosman/wav2vec2-xls-r-1b-spanish](https://huggingface.co/jonatasgrosman/wav2vec2-xls-r-1b-spanish)


Pre-trained model used for Harassment Detection: [hackathon-pln-es/Detect-Acoso-Twitter-Es](https://huggingface.co/hackathon-pln-es/Detect-Acoso-Twitter-Es)"""

gr.Interface(
    asr_and_harassment_detection,
    inputs=inputs,
    outputs=outputs,
    examples=examples,
    title=title,
    description=description,
    layout="horizontal",
    theme="huggingface",
).launch(enable_queue=True)