|
|
|
|
|
''' |
|
MIT License |
|
|
|
Copyright (c) 2018 Mauricio |
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy |
|
of this software and associated documentation files (the "Software"), to deal |
|
in the Software without restriction, including without limitation the rights |
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|
copies of the Software, and to permit persons to whom the Software is |
|
furnished to do so, subject to the following conditions: |
|
|
|
The above copyright notice and this permission notice shall be included in all |
|
copies or substantial portions of the Software. |
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|
SOFTWARE. |
|
|
|
Adapted from https://github.com/mauriciovander/silence-removal/blob/master/vad.py |
|
''' |
|
import numpy |
|
|
|
class VoiceActivityDetection: |
|
|
|
def __init__(self): |
|
self.__step = 160 |
|
self.__buffer_size = 160 |
|
self.__buffer = numpy.array([],dtype=numpy.int16) |
|
self.__out_buffer = numpy.array([],dtype=numpy.int16) |
|
self.__n = 0 |
|
self.__VADthd = 0. |
|
self.__VADn = 0. |
|
self.__silence_counter = 0 |
|
|
|
|
|
|
|
def vad(self, _frame, sc_threshold=20): |
|
frame = numpy.array(_frame) ** 2. |
|
result = True |
|
threshold = 0.2 |
|
thd = numpy.min(frame) + numpy.ptp(frame) * threshold |
|
self.__VADthd = (self.__VADn * self.__VADthd + thd) / float(self.__VADn + 1.) |
|
self.__VADn += 1. |
|
|
|
if numpy.mean(frame) <= self.__VADthd: |
|
self.__silence_counter += 1 |
|
else: |
|
self.__silence_counter = 0 |
|
if self.__silence_counter > sc_threshold: |
|
result = False |
|
return result |
|
|
|
|
|
def add_samples(self, data): |
|
self.__buffer = numpy.append(self.__buffer, data) |
|
result = len(self.__buffer) >= self.__buffer_size |
|
|
|
return result |
|
|
|
|
|
|
|
|
|
def get_frame(self): |
|
window = self.__buffer[:self.__buffer_size] |
|
self.__buffer = self.__buffer[self.__step:] |
|
|
|
return window |
|
|
|
|
|
|
|
def process(self, data, sc_threshold): |
|
self.__buffer = numpy.array([],dtype=numpy.int16) |
|
self.__out_buffer = numpy.array([],dtype=numpy.int16) |
|
if self.add_samples(data): |
|
while len(self.__buffer) >= self.__buffer_size: |
|
|
|
window = self.get_frame() |
|
if self.vad(window, sc_threshold): |
|
self.__out_buffer = numpy.append(self.__out_buffer, window) |
|
return self.__out_buffer |
|
|