kevinwang676 commited on
Commit
6829dab
1 Parent(s): 1d505ec

Update app_share.py

Browse files
Files changed (1) hide show
  1. app_share.py +42 -7
app_share.py CHANGED
@@ -217,7 +217,41 @@ def read_srt(uploaded_file):
217
  subtitle_list.append(st)
218
  return subtitle_list
219
 
 
220
  from pydub import AudioSegment
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
 
222
  def trim_audio(intervals, input_file_path, output_file_path):
223
  # load the audio file
@@ -227,19 +261,20 @@ def trim_audio(intervals, input_file_path, output_file_path):
227
  for i, (start_time, end_time) in enumerate(intervals):
228
  # extract the segment of the audio
229
  segment = audio[start_time*1000:end_time*1000]
230
- output_file_path_i = f"{output_file_path}_{i}.wav"
231
 
232
- if len(segment) < 3000:
233
- # Calculate how many times to repeat the audio to make it at least 2 seconds long
234
- repeat_count = (3000 // len(segment)) + 2
235
  # Repeat the audio
236
  longer_audio = segment * repeat_count
237
  # Save the extended audio
238
- print(f"Audio was less than 3 seconds. Extended to {len(longer_audio)} milliseconds.")
239
  longer_audio.export(output_file_path_i, format='wav')
 
240
  else:
241
- print("Audio is already 3 seconds or longer.")
242
- segment.export(output_file_path_i, format='wav')
243
 
244
  import re
245
 
 
217
  subtitle_list.append(st)
218
  return subtitle_list
219
 
220
+ import webrtcvad
221
  from pydub import AudioSegment
222
+ from pydub.utils import make_chunks
223
+
224
+ def vad(audio_name, out_path_name):
225
+ audio = AudioSegment.from_file(audio_name, format="wav")
226
+ # Set the desired sample rate (WebRTC VAD supports only 8000, 16000, 32000, or 48000 Hz)
227
+ audio = audio.set_frame_rate(48000)
228
+ # Set single channel (mono)
229
+ audio = audio.set_channels(1)
230
+
231
+ # Initialize VAD
232
+ vad = webrtcvad.Vad()
233
+ # Set aggressiveness mode (an integer between 0 and 3, 3 is the most aggressive)
234
+ vad.set_mode(3)
235
+
236
+ # Convert pydub audio to bytes
237
+ frame_duration = 30 # Duration of a frame in ms
238
+ frame_width = int(audio.frame_rate * frame_duration / 1000) # width of a frame in samples
239
+ frames = make_chunks(audio, frame_duration)
240
+
241
+ # Perform voice activity detection
242
+ voiced_frames = []
243
+ for frame in frames:
244
+ if len(frame.raw_data) < frame_width * 2: # Ensure frame is correct length
245
+ break
246
+ is_speech = vad.is_speech(frame.raw_data, audio.frame_rate)
247
+ if is_speech:
248
+ voiced_frames.append(frame)
249
+
250
+ # Combine voiced frames back to an audio segment
251
+ voiced_audio = sum(voiced_frames, AudioSegment.silent(duration=0))
252
+
253
+ voiced_audio.export(f"{out_path_name}.wav", format="wav")
254
+
255
 
256
  def trim_audio(intervals, input_file_path, output_file_path):
257
  # load the audio file
 
261
  for i, (start_time, end_time) in enumerate(intervals):
262
  # extract the segment of the audio
263
  segment = audio[start_time*1000:end_time*1000]
264
+ output_file_path_i = f"increased_{i}.wav"
265
 
266
+ if len(segment) < 5000:
267
+ # Calculate how many times to repeat the audio to make it at least 5 seconds long
268
+ repeat_count = (5000 // len(segment)) + 3
269
  # Repeat the audio
270
  longer_audio = segment * repeat_count
271
  # Save the extended audio
272
+ print(f"Audio was less than 5 seconds. Extended to {len(longer_audio)} milliseconds.")
273
  longer_audio.export(output_file_path_i, format='wav')
274
+ vad(f"{output_file_path_i}", f"{output_file_path}_{i}")
275
  else:
276
+ print("Audio is already 5 seconds or longer.")
277
+ segment.export(f"{output_file_path}_{i}.wav", format='wav')
278
 
279
  import re
280