avans06 commited on
Commit
9428712
·
1 Parent(s): 90ee2b3

Add an argument: merge_subtitle_with_sources.

Browse files

When set to the command line arguments of app.py, it will merge the subtitle(srt) with the sources and move the sources files to the outputs directory.
This argument will take effect only after output_dir is set.

Files changed (1) hide show
  1. app.py +29 -12
app.py CHANGED
@@ -102,15 +102,14 @@ class WhisperTranscriber:
102
  vad, vadMergeWindow, vadMaxMergeSize,
103
  word_timestamps: bool = False, highlight_words: bool = False,
104
  progress=gr.Progress()):
 
105
  if languageName == "Chinese":
106
- initial_prompt = "繁體: "
107
  self.app_config.vad_initial_prompt_mode = "prepend_all_segments"
108
 
109
  vadOptions = VadOptions(vad, vadMergeWindow, vadMaxMergeSize, self.app_config.vad_padding, self.app_config.vad_prompt_window, self.app_config.vad_initial_prompt_mode)
110
 
111
- return self.transcribe_webui(modelName, languageName, urlData, multipleFiles, microphoneData, task, vadOptions,
112
- initial_prompt=initial_prompt,
113
- word_timestamps=word_timestamps, highlight_words=highlight_words, progress=progress)
114
 
115
  # Entry function for the full tab
116
  def transcribe_webui_full(self, modelName, languageName, urlData, multipleFiles, microphoneData, task,
@@ -260,21 +259,37 @@ class WhisperTranscriber:
260
  # Cleanup source
261
  if self.deleteUploadedFiles:
262
  for source in sources:
263
- if self.app_config.save_downloaded_files and self.app_config.output_dir is not None and urlData:
264
- print("Saving downloaded file [" + os.path.basename(source.source_path) + "]")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
265
  try:
266
  shutil.copy(source.source_path, self.app_config.output_dir)
267
  except Exception as e:
268
  # Ignore error - it's just a cleanup
269
- print("Error saving downloaded file " + source.source_path + ": " + str(e))
270
 
271
- print("Deleting source file " + source.source_path)
272
-
273
  try:
274
  os.remove(source.source_path)
275
  except Exception as e:
276
  # Ignore error - it's just a cleanup
277
- print("Error deleting source file " + source.source_path + ": " + str(e))
278
 
279
  except ExceededMaximumDuration as e:
280
  return [], ("[ERROR]: Maximum remote video length is " + str(e.maxDuration) + "s, file was " + str(e.videoDuration) + "s"), "[ERROR]"
@@ -481,7 +496,7 @@ class WhisperTranscriber:
481
  def create_ui(app_config: ApplicationConfig):
482
  ui = WhisperTranscriber(app_config.input_audio_max_duration, app_config.vad_process_timeout, app_config.vad_cpu_cores,
483
  app_config.delete_uploaded_files, app_config.output_dir, app_config)
484
-
485
  # Specify a list of devices to use for parallel processing
486
  ui.set_parallel_devices(app_config.vad_parallel_devices)
487
  ui.set_auto_parallel(app_config.auto_parallel)
@@ -666,7 +681,9 @@ if __name__ == '__main__':
666
  parser.add_argument("--language", type=str, default=None, choices=sorted(get_language_names()) + sorted([k.title() for k in _TO_LANGUAGE_CODE.keys()]),
667
  help="language spoken in the audio, specify None to perform language detection")
668
  parser.add_argument("--save_downloaded_files", action='store_true', \
669
- help="True to move downloaded files to outputs.")
 
 
670
  parser.add_argument("--autolaunch", action='store_true', \
671
  help="open the webui URL in the system's default browser upon launch")
672
 
 
102
  vad, vadMergeWindow, vadMaxMergeSize,
103
  word_timestamps: bool = False, highlight_words: bool = False,
104
  progress=gr.Progress()):
105
+ decodeOptions = dict(word_timestamps=word_timestamps)
106
  if languageName == "Chinese":
107
+ decodeOptions.update(initial_prompt="繁體: ")
108
  self.app_config.vad_initial_prompt_mode = "prepend_all_segments"
109
 
110
  vadOptions = VadOptions(vad, vadMergeWindow, vadMaxMergeSize, self.app_config.vad_padding, self.app_config.vad_prompt_window, self.app_config.vad_initial_prompt_mode)
111
 
112
+ return self.transcribe_webui(modelName, languageName, urlData, multipleFiles, microphoneData, task, vadOptions, highlight_words=highlight_words, progress=progress, **decodeOptions)
 
 
113
 
114
  # Entry function for the full tab
115
  def transcribe_webui_full(self, modelName, languageName, urlData, multipleFiles, microphoneData, task,
 
259
  # Cleanup source
260
  if self.deleteUploadedFiles:
261
  for source in sources:
262
+ if self.app_config.merge_subtitle_with_sources and self.app_config.output_dir is not None and len(source_download) > 0:
263
+ print("merge subtitle(srt) with source file [" + source.source_name + "]")
264
+ outRsult = ""
265
+ try:
266
+ srt_path = source_download[0]
267
+ save_path = os.path.join(self.app_config.output_dir, source.source_name)
268
+ save_without_ext, ext = os.path.splitext(save_path)
269
+ output_with_srt = save_without_ext + ".srt" + ext
270
+
271
+ #ffmpeg -i "input.mp4" -i "input.srt" -c copy -c:s mov_text output.mp4
272
+ input_file = ffmpeg.input(source.source_path)
273
+ input_srt = ffmpeg.input(srt_path)
274
+ out = ffmpeg.output(input_file, input_srt, output_with_srt, vcodec='copy', acodec='copy', scodec='mov_text')
275
+ outRsult = out.run()
276
+ except Exception as e:
277
+ # Ignore error - it's just a cleanup
278
+ print("Error merge subtitle with source file: \n" + source.source_path + ", \n" + str(e), outRsult)
279
+ elif self.app_config.save_downloaded_files and self.app_config.output_dir is not None and urlData:
280
+ print("Saving downloaded file [" + source.source_name + "]")
281
  try:
282
  shutil.copy(source.source_path, self.app_config.output_dir)
283
  except Exception as e:
284
  # Ignore error - it's just a cleanup
285
+ print("Error saving downloaded file: \n" + source.source_path + ", \n" + str(e))
286
 
287
+ print("Deleting temporary source file: " + source.source_path)
 
288
  try:
289
  os.remove(source.source_path)
290
  except Exception as e:
291
  # Ignore error - it's just a cleanup
292
+ print("Error deleting temporary source file: \n" + source.source_path + ", \n" + str(e))
293
 
294
  except ExceededMaximumDuration as e:
295
  return [], ("[ERROR]: Maximum remote video length is " + str(e.maxDuration) + "s, file was " + str(e.videoDuration) + "s"), "[ERROR]"
 
496
  def create_ui(app_config: ApplicationConfig):
497
  ui = WhisperTranscriber(app_config.input_audio_max_duration, app_config.vad_process_timeout, app_config.vad_cpu_cores,
498
  app_config.delete_uploaded_files, app_config.output_dir, app_config)
499
+
500
  # Specify a list of devices to use for parallel processing
501
  ui.set_parallel_devices(app_config.vad_parallel_devices)
502
  ui.set_auto_parallel(app_config.auto_parallel)
 
681
  parser.add_argument("--language", type=str, default=None, choices=sorted(get_language_names()) + sorted([k.title() for k in _TO_LANGUAGE_CODE.keys()]),
682
  help="language spoken in the audio, specify None to perform language detection")
683
  parser.add_argument("--save_downloaded_files", action='store_true', \
684
+ help="True to move downloaded files to outputs directory. This argument will take effect only after output_dir is set.")
685
+ parser.add_argument("--merge_subtitle_with_sources", action='store_true', \
686
+ help="True to merge subtitle(srt) with sources and move the sources files to the outputs directory. This argument will take effect only after output_dir is set.")
687
  parser.add_argument("--autolaunch", action='store_true', \
688
  help="open the webui URL in the system's default browser upon launch")
689