AutoAgent / app.py
Adrian8as's picture
Adding Probability
65c6abe verified
raw
history blame
2.59 kB
import librosa, joblib, numpy as np, gradio as gr
from scipy.interpolate import interp1d
from pyAudioAnalysis import ShortTermFeatures
from pydub.silence import detect_nonsilent
from pydub import AudioSegment
def smart_resize(arr, target_size):
current_size = arr.shape[1]
current_idx = np.linspace(0, current_size - 1, current_size)
target_idx = np.linspace(0, current_size - 1, target_size)
# Interpolate/extrapolate
interp_func = interp1d(current_idx, arr.squeeze(), kind='linear', fill_value="extrapolate")
resized_arr = interp_func(target_idx)
return resized_arr.reshape(1, target_size)
def remove_silence(wav_file):
audSeg = AudioSegment.from_wav(wav_file)
non_silence_ranges = detect_nonsilent(audSeg, min_silence_len=5, silence_thresh=-30)
if not non_silence_ranges:
sound = audSeg
else:
start = non_silence_ranges[0][0]
end = non_silence_ranges[-1][1]
trimmed_sound = audSeg[start:end]
sound = trimmed_sound
sound.export('audio.wav', format="wav")
def transform_data(audio):
remove_silence(audio)
x, sr = librosa.load('audio.wav')
result, f_names = ShortTermFeatures.feature_extraction(x, sr, 0.050*sr, 0.025*sr)
resize_features = smart_resize(result.reshape(1,-1), 20)
return resize_features
def predict(newdf, loaded_model):
prediction = loaded_model.predict(newdf)
proba = loaded_model.predict_proba(newdf)
return prediction, proba[0]
def get_label(newpred):
if newpred == 0:
return 'No'
else:
return 'Si'
def load_model():
ram_for = joblib.load('models/sgd_90.pkl')
return ram_for
def main(audio):
newdf = transform_data(audio)
loaded_model = load_model()
newpred, proba = predict(newdf, loaded_model)
final = get_label(newpred)
return final, {'Probability - Si': proba[1],
'Probability - No': proba[0]}
demo = gr.Interface(
title = "Autoagent | YES or NO Classification - Layer7",
description = """<h3>This model is useful to classify if the user says 'Si' or 'No'. 🎙️ </h3>
<img src="https://huggingface.co/spaces/Adrian8as/imagen/resolve/main/output.png" width="350" height="350"/> <br>
<b>Record your voice:</b>""",
allow_flagging = "never",
fn = main,
inputs = gr.Audio(
sources=["microphone"],
type="filepath",
),
outputs = [gr.Textbox(label="Clasification"),"label"]
)
if __name__ == "__main__":
demo.launch(show_api=False)