faster-whisper-webui

Sleeping

App Files Files Community

aadnk commited on Nov 27, 2022

Commit

20f75ae

•

1 Parent(s): c4e4d19

Add an option for automatically configuring parallel execution

Browse files

Files changed (3) hide show

app.py +16 -1
cli.py +3 -0
src/vadParallel.py +4 -0

app.py CHANGED Viewed

@@ -6,6 +6,8 @@ from io import StringIO
 import os
 import pathlib
 import tempfile
 from src.modelCache import ModelCache
 from src.vadParallel import ParallelContext, ParallelTranscription
@@ -29,6 +31,9 @@ DELETE_UPLOADED_FILES = True
 # Gradio seems to truncate files without keeping the extension, so we need to truncate the file prefix ourself
 MAX_FILE_PREFIX_LENGTH = 17
 LANGUAGES = [
  "English", "Chinese", "German", "Spanish", "Russian", "Korean",
  "French", "Japanese", "Portuguese", "Turkish", "Polish", "Catalan",
@@ -65,6 +70,14 @@ class WhisperTranscriber:
     def set_parallel_devices(self, vad_parallel_devices: str):
         self.parallel_device_list = [ device.strip() for device in vad_parallel_devices.split(",") ] if vad_parallel_devices else None
     def transcribe_webui(self, modelName, languageName, urlData, uploadFile, microphoneData, task, vad, vadMergeWindow, vadMaxMergeSize, vadPadding, vadPromptWindow):
         try:
             source, sourceName = self.__get_source(urlData, uploadFile, microphoneData)
@@ -268,11 +281,12 @@ class WhisperTranscriber:
 def create_ui(input_audio_max_duration, share=False, server_name: str = None, server_port: int = 7860,
-              default_model_name: str = "medium", default_vad: str = None, vad_parallel_devices: str = None, vad_process_timeout: float = None, vad_cpu_cores: int = 1):
     ui = WhisperTranscriber(input_audio_max_duration, vad_process_timeout, vad_cpu_cores)
     # Specify a list of devices to use for parallel processing
     ui.set_parallel_devices(vad_parallel_devices)
     ui_description = "Whisper is a general-purpose speech recognition model. It is trained on a large dataset of diverse "
     ui_description += " audio and is also a multi-task model that can perform multilingual speech recognition "
@@ -319,6 +333,7 @@ if __name__ == '__main__':
     parser.add_argument("--vad_parallel_devices", type=str, default="", help="A commma delimited list of CUDA devices to use for parallel processing. If None, disable parallel processing.")
     parser.add_argument("--vad_cpu_cores", type=int, default=1, help="The number of CPU cores to use for VAD pre-processing.")
     parser.add_argument("--vad_process_timeout", type=float, default="1800", help="The number of seconds before inactivate processes are terminated. Use 0 to close processes immediately, or None for no timeout.")
     args = parser.parse_args().__dict__
     create_ui(**args)

 import os
 import pathlib
 import tempfile
+import torch
 from src.modelCache import ModelCache
 from src.vadParallel import ParallelContext, ParallelTranscription
 # Gradio seems to truncate files without keeping the extension, so we need to truncate the file prefix ourself
 MAX_FILE_PREFIX_LENGTH = 17
+# Limit auto_parallel to a certain number of CPUs (specify vad_cpu_cores to get a higher number)
+MAX_AUTO_CPU_CORES = 8
 LANGUAGES = [
  "English", "Chinese", "German", "Spanish", "Russian", "Korean",
  "French", "Japanese", "Portuguese", "Turkish", "Polish", "Catalan",
     def set_parallel_devices(self, vad_parallel_devices: str):
         self.parallel_device_list = [ device.strip() for device in vad_parallel_devices.split(",") ] if vad_parallel_devices else None
+    def set_auto_parallel(self, auto_parallel: bool):
+        if auto_parallel:
+            if torch.cuda.is_available():
+                self.parallel_device_list = [ str(gpu_id) for gpu_id in range(torch.cuda.device_count())]
+            self.vad_cpu_cores = min(os.cpu_count(), MAX_AUTO_CPU_CORES)
+            print("[Auto parallel] Using GPU devices " + str(self.parallel_device_list) + " and " + str(self.vad_cpu_cores) + " CPU cores for VAD/transcription.")
     def transcribe_webui(self, modelName, languageName, urlData, uploadFile, microphoneData, task, vad, vadMergeWindow, vadMaxMergeSize, vadPadding, vadPromptWindow):
         try:
             source, sourceName = self.__get_source(urlData, uploadFile, microphoneData)
 def create_ui(input_audio_max_duration, share=False, server_name: str = None, server_port: int = 7860,
+              default_model_name: str = "medium", default_vad: str = None, vad_parallel_devices: str = None, vad_process_timeout: float = None, vad_cpu_cores: int = 1, auto_parallel: bool = False):
     ui = WhisperTranscriber(input_audio_max_duration, vad_process_timeout, vad_cpu_cores)
     # Specify a list of devices to use for parallel processing
     ui.set_parallel_devices(vad_parallel_devices)
+    ui.set_auto_parallel(auto_parallel)
     ui_description = "Whisper is a general-purpose speech recognition model. It is trained on a large dataset of diverse "
     ui_description += " audio and is also a multi-task model that can perform multilingual speech recognition "
     parser.add_argument("--vad_parallel_devices", type=str, default="", help="A commma delimited list of CUDA devices to use for parallel processing. If None, disable parallel processing.")
     parser.add_argument("--vad_cpu_cores", type=int, default=1, help="The number of CPU cores to use for VAD pre-processing.")
     parser.add_argument("--vad_process_timeout", type=float, default="1800", help="The number of seconds before inactivate processes are terminated. Use 0 to close processes immediately, or None for no timeout.")
+    parser.add_argument("--auto_parallel", type=bool, default=False, help="True to use all available GPUs and CPU cores for processing. Use vad_cpu_cores/vad_parallel_devices to specify the number of CPU cores/GPUs to use.")
     args = parser.parse_args().__dict__
     create_ui(**args)

cli.py CHANGED Viewed

@@ -34,6 +34,7 @@ def cli():
     parser.add_argument("--vad_prompt_window", type=optional_float, default=3, help="The window size of the prompt to pass to Whisper")
     parser.add_argument("--vad_cpu_cores", type=int, default=1, help="The number of CPU cores to use for VAD pre-processing.")
     parser.add_argument("--vad_parallel_devices", type=str, default="", help="A commma delimited list of CUDA devices to use for parallel processing. If None, disable parallel processing.")
     parser.add_argument("--temperature", type=float, default=0, help="temperature to use for sampling")
     parser.add_argument("--best_of", type=optional_int, default=5, help="number of candidates when sampling with non-zero temperature")
@@ -75,10 +76,12 @@ def cli():
     vad_padding = args.pop("vad_padding")
     vad_prompt_window = args.pop("vad_prompt_window")
     vad_cpu_cores = args.pop("vad_cpu_cores")
     model = WhisperContainer(model_name, device=device, download_root=model_dir)
     transcriber = WhisperTranscriber(delete_uploaded_files=False, vad_cpu_cores=vad_cpu_cores)
     transcriber.set_parallel_devices(args.pop("vad_parallel_devices"))
     if (transcriber._has_parallel_devices()):
         print("Using parallel devices:", transcriber.parallel_device_list)

     parser.add_argument("--vad_prompt_window", type=optional_float, default=3, help="The window size of the prompt to pass to Whisper")
     parser.add_argument("--vad_cpu_cores", type=int, default=1, help="The number of CPU cores to use for VAD pre-processing.")
     parser.add_argument("--vad_parallel_devices", type=str, default="", help="A commma delimited list of CUDA devices to use for parallel processing. If None, disable parallel processing.")
+    parser.add_argument("--auto_parallel", type=bool, default=False, help="True to use all available GPUs and CPU cores for processing. Use vad_cpu_cores/vad_parallel_devices to specify the number of CPU cores/GPUs to use.")
     parser.add_argument("--temperature", type=float, default=0, help="temperature to use for sampling")
     parser.add_argument("--best_of", type=optional_int, default=5, help="number of candidates when sampling with non-zero temperature")
     vad_padding = args.pop("vad_padding")
     vad_prompt_window = args.pop("vad_prompt_window")
     vad_cpu_cores = args.pop("vad_cpu_cores")
+    auto_parallel = args.pop("auto_parallel")
     model = WhisperContainer(model_name, device=device, download_root=model_dir)
     transcriber = WhisperTranscriber(delete_uploaded_files=False, vad_cpu_cores=vad_cpu_cores)
     transcriber.set_parallel_devices(args.pop("vad_parallel_devices"))
+    transcriber.set_auto_parallel(auto_parallel)
     if (transcriber._has_parallel_devices()):
         print("Using parallel devices:", transcriber.parallel_device_list)

src/vadParallel.py CHANGED Viewed

@@ -176,6 +176,10 @@ class ParallelTranscription(AbstractTranscription):
         while (chunk_start < total_duration):
             chunk_end = min(chunk_start + chunk_size, total_duration)
             print("Parallel VAD: Executing chunk from " + str(chunk_start) + " to " +
                     str(chunk_end) + " on CPU device " + str(cpu_device_id))
             parameters.append([audio, config, chunk_start, chunk_end]);

         while (chunk_start < total_duration):
             chunk_end = min(chunk_start + chunk_size, total_duration)
+            if (chunk_end - chunk_start < 1):
+                # No need to process chunks that are less than 1 second
+                break
             print("Parallel VAD: Executing chunk from " + str(chunk_start) + " to " +
                     str(chunk_end) + " on CPU device " + str(cpu_device_id))
             parameters.append([audio, config, chunk_start, chunk_end]);