gngpostalsrvc commited on
Commit
e2e3b5f
1 Parent(s): 729ba94

added application file

Browse files
Files changed (1) hide show
  1. app.py +57 -0
app.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+
3
+ import crepe
4
+ import spacy
5
+ import librosa
6
+ import gradio as gr
7
+ import pandas as pd
8
+ from transformers import pipeline
9
+
10
+ asr = pipeline('automatic-speech-recognition', model='facebook/wav2vec2-large-960h-lv60-self')
11
+ emo = pipeline('sentiment-analysis', model='arpanghoshal/EmoRoBERTa')
12
+ lang_model = spacy.load("en_core_web_sm")
13
+
14
+ def transcribe_and_describe(audio):
15
+
16
+ audio, sr = librosa.load(audio, sr=16000)
17
+
18
+ text = asr(audio)['text']
19
+
20
+ doc = lang_model(text)
21
+ filler_words = [token.text for token in doc if token.pos_ == 'INTJ']
22
+ filler_word_pr = len(filler_words) / len(doc)
23
+
24
+ flatness = pd.DataFrame(librosa.feature.spectral_flatness(y=audio).T).describe().T
25
+ loudness = pd.DataFrame(librosa.feature.rms(audio).T).describe().T
26
+ time, frequency, confidence, activation = crepe.predict(audio, sr)
27
+ frequency = pd.DataFrame(frequency.T).describe().T
28
+
29
+ mean_spectral_flatness = flatness.loc[0, 'mean']
30
+ spectral_flatness_std = flatness.loc[0, 'std']
31
+ mean_pitch = frequency.loc[0, 'mean']
32
+ pitch_std = frequency.loc[0, 'std']
33
+ mean_volume = loudness.loc[0, 'mean']
34
+ volume_std = loudness.loc[0, 'std']
35
+
36
+ words_per_minute = len(text.split(" ")) / (librosa.get_duration(audio, sr) / 60)
37
+
38
+ emotion = emo(text)[0]['label']
39
+
40
+ return (text, filler_word_pr, words_per_minute, mean_pitch, pitch_std, mean_volume, volume_std, mean_spectral_flatness, spectral_flatness_std, emotion)
41
+
42
+ gr.Interface(
43
+ fn=transcribe_and_describe,
44
+ inputs=gr.Audio(source="microphone", type="filepath"),
45
+ outputs=[
46
+ gr.Text(label="Transcription"),
47
+ gr.Text(label="Rate of Speech (WPM)"),
48
+ gr.Text(label="Filler Word Percent"),
49
+ gr.Text(label="Mean Pitch (Hz)"),
50
+ gr.Text(label="Pitch Variation (Hz)"),
51
+ gr.Text(label="Mean Volume (W)"),
52
+ gr.Text(label="Volume Variation (W)"),
53
+ gr.Text(label="Mean Spectral Flatness (dB)"),
54
+ gr.Text(label="Spectral Flatness Variation (dB)"),
55
+ gr.Text(label="Emotion")
56
+ ]
57
+ ).launch()