Spaces:
Running
Running
Add output_dir to the WebUI
Browse files
app.py
CHANGED
@@ -60,7 +60,8 @@ LANGUAGES = [
|
|
60 |
WHISPER_MODELS = ["tiny", "base", "small", "medium", "large", "large-v1", "large-v2"]
|
61 |
|
62 |
class WhisperTranscriber:
|
63 |
-
def __init__(self, input_audio_max_duration: float = DEFAULT_INPUT_AUDIO_MAX_DURATION, vad_process_timeout: float = None,
|
|
|
64 |
self.model_cache = ModelCache()
|
65 |
self.parallel_device_list = None
|
66 |
self.gpu_parallel_context = None
|
@@ -71,6 +72,7 @@ class WhisperTranscriber:
|
|
71 |
self.vad_model = None
|
72 |
self.inputAudioMaxDuration = input_audio_max_duration
|
73 |
self.deleteUploadedFiles = delete_uploaded_files
|
|
|
74 |
|
75 |
def set_parallel_devices(self, vad_parallel_devices: str):
|
76 |
self.parallel_device_list = [ device.strip() for device in vad_parallel_devices.split(",") ] if vad_parallel_devices else None
|
@@ -103,6 +105,8 @@ class WhisperTranscriber:
|
|
103 |
downloadDirectory = tempfile.mkdtemp()
|
104 |
source_index = 0
|
105 |
|
|
|
|
|
106 |
# Execute whisper
|
107 |
for source in sources:
|
108 |
source_prefix = ""
|
@@ -117,7 +121,7 @@ class WhisperTranscriber:
|
|
117 |
result = self.transcribe_file(model, source.source_path, selectedLanguage, task, vad, vadMergeWindow, vadMaxMergeSize, vadPadding, vadPromptWindow)
|
118 |
filePrefix = slugify(source_prefix + source.get_short_name(), allow_unicode=True)
|
119 |
|
120 |
-
source_download, source_text, source_vtt = self.write_result(result, filePrefix,
|
121 |
|
122 |
if len(sources) > 1:
|
123 |
# Add new line separators
|
@@ -332,8 +336,10 @@ class WhisperTranscriber:
|
|
332 |
|
333 |
|
334 |
def create_ui(input_audio_max_duration, share=False, server_name: str = None, server_port: int = 7860,
|
335 |
-
default_model_name: str = "medium", default_vad: str = None, vad_parallel_devices: str = None,
|
336 |
-
|
|
|
|
|
337 |
|
338 |
# Specify a list of devices to use for parallel processing
|
339 |
ui.set_parallel_devices(vad_parallel_devices)
|
@@ -385,6 +391,7 @@ if __name__ == '__main__':
|
|
385 |
parser.add_argument("--vad_cpu_cores", type=int, default=1, help="The number of CPU cores to use for VAD pre-processing.")
|
386 |
parser.add_argument("--vad_process_timeout", type=float, default="1800", help="The number of seconds before inactivate processes are terminated. Use 0 to close processes immediately, or None for no timeout.")
|
387 |
parser.add_argument("--auto_parallel", type=bool, default=False, help="True to use all available GPUs and CPU cores for processing. Use vad_cpu_cores/vad_parallel_devices to specify the number of CPU cores/GPUs to use.")
|
|
|
388 |
|
389 |
args = parser.parse_args().__dict__
|
390 |
create_ui(**args)
|
|
|
60 |
WHISPER_MODELS = ["tiny", "base", "small", "medium", "large", "large-v1", "large-v2"]
|
61 |
|
62 |
class WhisperTranscriber:
|
63 |
+
def __init__(self, input_audio_max_duration: float = DEFAULT_INPUT_AUDIO_MAX_DURATION, vad_process_timeout: float = None,
|
64 |
+
vad_cpu_cores: int = 1, delete_uploaded_files: bool = DELETE_UPLOADED_FILES, output_dir: str = None):
|
65 |
self.model_cache = ModelCache()
|
66 |
self.parallel_device_list = None
|
67 |
self.gpu_parallel_context = None
|
|
|
72 |
self.vad_model = None
|
73 |
self.inputAudioMaxDuration = input_audio_max_duration
|
74 |
self.deleteUploadedFiles = delete_uploaded_files
|
75 |
+
self.output_dir = output_dir
|
76 |
|
77 |
def set_parallel_devices(self, vad_parallel_devices: str):
|
78 |
self.parallel_device_list = [ device.strip() for device in vad_parallel_devices.split(",") ] if vad_parallel_devices else None
|
|
|
105 |
downloadDirectory = tempfile.mkdtemp()
|
106 |
source_index = 0
|
107 |
|
108 |
+
outputDirectory = self.output_dir if self.output_dir is not None else downloadDirectory
|
109 |
+
|
110 |
# Execute whisper
|
111 |
for source in sources:
|
112 |
source_prefix = ""
|
|
|
121 |
result = self.transcribe_file(model, source.source_path, selectedLanguage, task, vad, vadMergeWindow, vadMaxMergeSize, vadPadding, vadPromptWindow)
|
122 |
filePrefix = slugify(source_prefix + source.get_short_name(), allow_unicode=True)
|
123 |
|
124 |
+
source_download, source_text, source_vtt = self.write_result(result, filePrefix, outputDirectory)
|
125 |
|
126 |
if len(sources) > 1:
|
127 |
# Add new line separators
|
|
|
336 |
|
337 |
|
338 |
def create_ui(input_audio_max_duration, share=False, server_name: str = None, server_port: int = 7860,
|
339 |
+
default_model_name: str = "medium", default_vad: str = None, vad_parallel_devices: str = None,
|
340 |
+
vad_process_timeout: float = None, vad_cpu_cores: int = 1, auto_parallel: bool = False,
|
341 |
+
output_dir: str = None):
|
342 |
+
ui = WhisperTranscriber(input_audio_max_duration, vad_process_timeout, vad_cpu_cores, DELETE_UPLOADED_FILES, output_dir)
|
343 |
|
344 |
# Specify a list of devices to use for parallel processing
|
345 |
ui.set_parallel_devices(vad_parallel_devices)
|
|
|
391 |
parser.add_argument("--vad_cpu_cores", type=int, default=1, help="The number of CPU cores to use for VAD pre-processing.")
|
392 |
parser.add_argument("--vad_process_timeout", type=float, default="1800", help="The number of seconds before inactivate processes are terminated. Use 0 to close processes immediately, or None for no timeout.")
|
393 |
parser.add_argument("--auto_parallel", type=bool, default=False, help="True to use all available GPUs and CPU cores for processing. Use vad_cpu_cores/vad_parallel_devices to specify the number of CPU cores/GPUs to use.")
|
394 |
+
parser.add_argument("--output_dir", "-o", type=str, default=None, help="directory to save the outputs")
|
395 |
|
396 |
args = parser.parse_args().__dict__
|
397 |
create_ui(**args)
|