aadnk commited on
Commit
31ba778
1 Parent(s): 0cb931d

Do not add speech detected in gaps to the prompt window

Browse files
Files changed (1) hide show
  1. src/vad.py +8 -6
src/vad.py CHANGED
@@ -151,6 +151,7 @@ class AbstractTranscription(ABC):
151
  segment_start = segment['start']
152
  segment_end = segment['end']
153
  segment_expand_amount = segment.get('expand_amount', 0)
 
154
 
155
  segment_duration = segment_end - segment_start
156
 
@@ -187,19 +188,20 @@ class AbstractTranscription(ABC):
187
  languageCounter[segment_result['language']] += 1
188
 
189
  # Update prompt window
190
- self.__update_prompt_window(prompt_window, adjusted_segments, segment_end)
191
 
192
  if len(languageCounter) > 0:
193
  result['language'] = languageCounter.most_common(1)[0][0]
194
 
195
  return result
196
 
197
- def __update_prompt_window(self, prompt_window: Deque, adjusted_segments: List, segment_end: float):
198
  if (self.max_prompt_window is not None and self.max_prompt_window > 0):
199
- # Add segments to the current prompt window
200
- for segment in adjusted_segments:
201
- if segment.get('no_speech_prob', 0) <= PROMPT_NO_SPEECH_PROB:
202
- prompt_window.append(segment)
 
203
 
204
  while (len(prompt_window) > 0):
205
  first_end_time = prompt_window[0].get('end', 0)
 
151
  segment_start = segment['start']
152
  segment_end = segment['end']
153
  segment_expand_amount = segment.get('expand_amount', 0)
154
+ segment_gap = segment.get('gap', False)
155
 
156
  segment_duration = segment_end - segment_start
157
 
 
188
  languageCounter[segment_result['language']] += 1
189
 
190
  # Update prompt window
191
+ self.__update_prompt_window(prompt_window, adjusted_segments, segment_end, segment_gap)
192
 
193
  if len(languageCounter) > 0:
194
  result['language'] = languageCounter.most_common(1)[0][0]
195
 
196
  return result
197
 
198
+ def __update_prompt_window(self, prompt_window: Deque, adjusted_segments: List, segment_end: float, segment_gap: bool = False):
199
  if (self.max_prompt_window is not None and self.max_prompt_window > 0):
200
+ # Add segments to the current prompt window (unless it is a speech gap)
201
+ if not segment_gap:
202
+ for segment in adjusted_segments:
203
+ if segment.get('no_speech_prob', 0) <= PROMPT_NO_SPEECH_PROB:
204
+ prompt_window.append(segment)
205
 
206
  while (len(prompt_window) > 0):
207
  first_end_time = prompt_window[0].get('end', 0)