import gradio as gr import matplotlib.pyplot as plt import subprocess import re import logging import os import numpy as np import matplotlib import scipy.io import scipy.io.wavfile from pathlib import Path matplotlib.use('Agg') logging.basicConfig(level=logging.INFO) logging.getLogger() def convert_to_wav(filename): os.rename(filename, filename.replace(" ", "_")) filename = filename.replace(" ", "_") new_name = f"{os.path.splitext(filename)[0]}_converted.wav".replace(" ", "_") command = f"ffmpeg -i {filename} -f wav -bitexact -acodec pcm_s16le -ar 22050 -ac 1 {new_name} -y" subprocess.run(command.split()) return new_name def get_chunk_times(in_filename, silence_threshold, silence_duration=1): silence_duration_re = re.compile('silence_duration: (\d+.\d+)') silence_end_re = re.compile('silence_end: (\d+.\d+)\s') command = f"ffmpeg -i {in_filename} -af silencedetect=n=-{silence_threshold}dB:d={silence_duration} -f null - " out = subprocess.run(command.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True) stdout = out.stdout lines = stdout.splitlines() ts = 0 chunks = [] for line in lines: match = silence_duration_re.search(str(line)) if(match): chunk_time = float(match.group(1)) ts = ts + chunk_time end = silence_end_re.search(str(line)) if(end): t_end = float(end.group(1)) t_start = t_end - chunk_time chunks.append([t_start, t_end, chunks]) logging.info(f"TS audio {os.path.basename(in_filename)} = {ts}") return ts, chunks def get_audio_plot(filename, chunks): fig, ax = plt.subplots() fig.set_size_inches(18.5, 10.5) sampleRate, audioBuffer = scipy.io.wavfile.read(filename) duration = len(audioBuffer)/sampleRate time = np.arange(0,duration,1/sampleRate) ax.plot(time,audioBuffer) y1 = min(audioBuffer) y2 = max(audioBuffer) for c in chunks: ax.fill_between(c[0:2], y1, y2, color='gray', alpha=0.5) plt.xlabel('Time [s]') plt.ylabel('Amplitude') plt.title("Audio with silence marks") return plt.gcf() def get_audio_info(audio, treshold): new_audio = convert_to_wav(audio) ts, chunks = get_chunk_times(new_audio, treshold, 1) p = get_audio_plot(new_audio, chunks) return str(ts), p otext = gr.outputs.Textbox(type="auto", label="Silence time:") oplot = gr.outputs.Image(type="plot", label="Audio with silence in gray areas") iaudio = gr.inputs.Audio(source="upload", type="filepath", label=None) isensitivity = gr.inputs.Slider(minimum=0, maximum=50, step=1, default=25, label="Silence sensitivity") iface = gr.Interface( get_audio_info, [iaudio, isensitivity], [otext, oplot], description="Enter audio to view silence areas", ) iface.launch()