kevinwang676 commited on
Commit
53bd66a
·
verified ·
1 Parent(s): ada57ff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -25
app.py CHANGED
@@ -157,16 +157,16 @@ class subtitle:
157
  def normalize(self,ntype:str,fps=30):
158
  if ntype=="prcsv":
159
  h,m,s,fs=(self.start_time.replace(';',':')).split(":")#seconds
160
- self.start_time=int(h)*3600+int(m)*60+int(s)+round(int(fs)/fps,2)
161
  h,m,s,fs=(self.end_time.replace(';',':')).split(":")
162
- self.end_time=int(h)*3600+int(m)*60+int(s)+round(int(fs)/fps,2)
163
  elif ntype=="srt":
164
  h,m,s=self.start_time.split(":")
165
  s=s.replace(",",".")
166
- self.start_time=int(h)*3600+int(m)*60+round(float(s),2)
167
  h,m,s=self.end_time.split(":")
168
  s=s.replace(",",".")
169
- self.end_time=int(h)*3600+int(m)*60+round(float(s),2)
170
  else:
171
  raise ValueError
172
  def add_offset(self,offset=0):
@@ -217,7 +217,41 @@ def read_srt(uploaded_file):
217
  subtitle_list.append(st)
218
  return subtitle_list
219
 
 
220
  from pydub import AudioSegment
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
 
222
  def trim_audio(intervals, input_file_path, output_file_path):
223
  # load the audio file
@@ -227,21 +261,20 @@ def trim_audio(intervals, input_file_path, output_file_path):
227
  for i, (start_time, end_time) in enumerate(intervals):
228
  # extract the segment of the audio
229
  segment = audio[start_time*1000:end_time*1000]
230
- output_file_path_i = f"{output_file_path}_{i}.wav"
231
 
232
- if len(segment) < 3000:
233
- # Calculate how many times to repeat the audio to make it at least 2 seconds long
234
- repeat_count = (3000 // len(segment)) + 2
235
  # Repeat the audio
236
  longer_audio = segment * repeat_count
237
  # Save the extended audio
238
- print(f"Audio was less than 3 seconds. Extended to {len(longer_audio)} milliseconds.")
239
  longer_audio.export(output_file_path_i, format='wav')
 
240
  else:
241
- print("Audio is already 3 seconds or longer.")
242
- segment.export(output_file_path_i, format='wav')
243
-
244
-
245
 
246
  import re
247
 
@@ -286,19 +319,24 @@ def convert_from_srt(apikey, filename, audio_full, voice, multilingual):
286
  shutil.rmtree("output")
287
  if multilingual==False:
288
  for i in subtitle_list:
289
- os.makedirs("output", exist_ok=True)
290
- trim_audio([[i.start_time, i.end_time]], audio_full, f"sliced_audio_{i.index}")
291
- print(f"正在合成第{i.index}条语音")
292
- print(f"语音内容:{i.text}")
293
- convert(apikey, i.text, f"sliced_audio_{i.index}_0.wav", voice, i.text + " " + str(i.index))
 
 
 
294
  else:
295
  for i in subtitle_list:
296
- os.makedirs("output", exist_ok=True)
297
- trim_audio([[i.start_time, i.end_time]], audio_full, f"sliced_audio_{i.index}")
298
- print(f"正在合成第{i.index}条语音")
299
- print(f"语音内容:{i.text.splitlines()[1]}")
300
- convert(apikey, i.text.splitlines()[1], f"sliced_audio_{i.index}_0.wav", voice, i.text.splitlines()[1] + " " + str(i.index))
301
-
 
 
302
  merge_audios("output")
303
 
304
  return "AI配音版.wav"
@@ -334,4 +372,4 @@ with gr.Blocks() as app:
334
  </div>
335
  ''')
336
 
337
- app.launch(show_error=True)
 
157
  def normalize(self,ntype:str,fps=30):
158
  if ntype=="prcsv":
159
  h,m,s,fs=(self.start_time.replace(';',':')).split(":")#seconds
160
+ self.start_time=int(h)*3600+int(m)*60+int(s)+round(int(fs)/fps,5)
161
  h,m,s,fs=(self.end_time.replace(';',':')).split(":")
162
+ self.end_time=int(h)*3600+int(m)*60+int(s)+round(int(fs)/fps,5)
163
  elif ntype=="srt":
164
  h,m,s=self.start_time.split(":")
165
  s=s.replace(",",".")
166
+ self.start_time=int(h)*3600+int(m)*60+round(float(s),5)
167
  h,m,s=self.end_time.split(":")
168
  s=s.replace(",",".")
169
+ self.end_time=int(h)*3600+int(m)*60+round(float(s),5)
170
  else:
171
  raise ValueError
172
  def add_offset(self,offset=0):
 
217
  subtitle_list.append(st)
218
  return subtitle_list
219
 
220
+ import webrtcvad
221
  from pydub import AudioSegment
222
+ from pydub.utils import make_chunks
223
+
224
+ def vad(audio_name, out_path_name):
225
+ audio = AudioSegment.from_file(audio_name, format="wav")
226
+ # Set the desired sample rate (WebRTC VAD supports only 8000, 16000, 32000, or 48000 Hz)
227
+ audio = audio.set_frame_rate(48000)
228
+ # Set single channel (mono)
229
+ audio = audio.set_channels(1)
230
+
231
+ # Initialize VAD
232
+ vad = webrtcvad.Vad()
233
+ # Set aggressiveness mode (an integer between 0 and 3, 3 is the most aggressive)
234
+ vad.set_mode(3)
235
+
236
+ # Convert pydub audio to bytes
237
+ frame_duration = 30 # Duration of a frame in ms
238
+ frame_width = int(audio.frame_rate * frame_duration / 1000) # width of a frame in samples
239
+ frames = make_chunks(audio, frame_duration)
240
+
241
+ # Perform voice activity detection
242
+ voiced_frames = []
243
+ for frame in frames:
244
+ if len(frame.raw_data) < frame_width * 2: # Ensure frame is correct length
245
+ break
246
+ is_speech = vad.is_speech(frame.raw_data, audio.frame_rate)
247
+ if is_speech:
248
+ voiced_frames.append(frame)
249
+
250
+ # Combine voiced frames back to an audio segment
251
+ voiced_audio = sum(voiced_frames, AudioSegment.silent(duration=0))
252
+
253
+ voiced_audio.export(f"{out_path_name}.wav", format="wav")
254
+
255
 
256
  def trim_audio(intervals, input_file_path, output_file_path):
257
  # load the audio file
 
261
  for i, (start_time, end_time) in enumerate(intervals):
262
  # extract the segment of the audio
263
  segment = audio[start_time*1000:end_time*1000]
264
+ output_file_path_i = f"increased_{i}.wav"
265
 
266
+ if len(segment) < 5000:
267
+ # Calculate how many times to repeat the audio to make it at least 5 seconds long
268
+ repeat_count = (5000 // len(segment)) + 3
269
  # Repeat the audio
270
  longer_audio = segment * repeat_count
271
  # Save the extended audio
272
+ print(f"Audio was less than 5 seconds. Extended to {len(longer_audio)} milliseconds.")
273
  longer_audio.export(output_file_path_i, format='wav')
274
+ vad(f"{output_file_path_i}", f"{output_file_path}_{i}")
275
  else:
276
+ print("Audio is already 5 seconds or longer.")
277
+ segment.export(f"{output_file_path}_{i}.wav", format='wav')
 
 
278
 
279
  import re
280
 
 
319
  shutil.rmtree("output")
320
  if multilingual==False:
321
  for i in subtitle_list:
322
+ try:
323
+ os.makedirs("output", exist_ok=True)
324
+ trim_audio([[i.start_time, i.end_time]], audio_full, f"sliced_audio_{i.index}")
325
+ print(f"正在合成第{i.index}条语音")
326
+ print(f"语音内容:{i.text}")
327
+ convert(apikey, i.text, f"sliced_audio_{i.index}_0.wav", voice, i.text + " " + str(i.index))
328
+ except Exception:
329
+ pass
330
  else:
331
  for i in subtitle_list:
332
+ try:
333
+ os.makedirs("output", exist_ok=True)
334
+ trim_audio([[i.start_time, i.end_time]], audio_full, f"sliced_audio_{i.index}")
335
+ print(f"正在合成第{i.index}条语音")
336
+ print(f"语音内容:{i.text.splitlines()[1]}")
337
+ convert(apikey, i.text.splitlines()[1], f"sliced_audio_{i.index}_0.wav", voice, i.text.splitlines()[1] + " " + str(i.index))
338
+ except Exception:
339
+ pass
340
  merge_audios("output")
341
 
342
  return "AI配音版.wav"
 
372
  </div>
373
  ''')
374
 
375
+ app.launch(share=False, show_error=True)