Whisper-WebUIFr

Sleeping

App Files Files Community

jhj0517 commited on Oct 28, 2024

Commit

2a2f7c6

1 Parent(s): 19e342a

Use constant for gradio none validation values

Browse files

Files changed (3) hide show

modules/utils/constants.py +3 -0
modules/whisper/base_transcription_pipeline.py +9 -9
modules/whisper/data_classes.py +7 -7

modules/utils/constants.py CHANGED Viewed

@@ -1,3 +1,6 @@
 from gradio_i18n import Translate, gettext as _
 AUTOMATIC_DETECTION = _("Automatic Detection")

 from gradio_i18n import Translate, gettext as _
 AUTOMATIC_DETECTION = _("Automatic Detection")
+GRADIO_NONE_STR = ""
+GRADIO_NONE_NUMBER_MAX = 9999
+GRADIO_NONE_NUMBER_MIN = 0

modules/whisper/base_transcription_pipeline.py CHANGED Viewed

@@ -15,7 +15,7 @@ from dataclasses import astuple
 from modules.uvr.music_separator import MusicSeparator
 from modules.utils.paths import (WHISPER_MODELS_DIR, DIARIZATION_MODELS_DIR, OUTPUT_DIR, DEFAULT_PARAMETERS_CONFIG_PATH,
                                  UVR_MODELS_DIR)
-from modules.utils.constants import AUTOMATIC_DETECTION
 from modules.utils.subtitle_manager import get_srt, get_vtt, get_txt, write_file, safe_filename
 from modules.utils.youtube_manager import get_ytdata, get_ytaudio
 from modules.utils.files_manager import get_media_files, format_gradio_files, load_yaml, save_yaml
@@ -519,19 +519,19 @@ class BaseTranscriptionPipeline(ABC):
             language_code_dict = {value: key for key, value in whisper.tokenizer.LANGUAGES.items()}
             params.whisper.lang = language_code_dict[params.lang]
-        if not params.whisper.initial_prompt:
             params.whisper.initial_prompt = None
-        if not params.whisper.prefix:
             params.whisper.prefix = None
-        if not params.whisper.hotwords:
             params.whisper.hotwords = None
-        if params.whisper.max_new_tokens == 0:
             params.whisper.max_new_tokens = None
-        if params.whisper.hallucination_silence_threshold == 0:
             params.whisper.hallucination_silence_threshold = None
-        if params.whisper.language_detection_threshold == 0:
             params.whisper.language_detection_threshold = None
-        if params.vad.max_speech_duration_s >= 9999:
             params.vad.max_speech_duration_s = float('inf')
         return params
@@ -555,7 +555,7 @@ class BaseTranscriptionPipeline(ABC):
             cached_yaml["whisper"]["lang"] = AUTOMATIC_DETECTION.unwrap()
         if cached_yaml["vad"].get("max_speech_duration_s", float('inf')) == float('inf'):
-            cached_yaml["vad"]["max_speech_duration_s"] = 9999
         if cached_yaml is not None and cached_yaml:
             save_yaml(cached_yaml, DEFAULT_PARAMETERS_CONFIG_PATH)

 from modules.uvr.music_separator import MusicSeparator
 from modules.utils.paths import (WHISPER_MODELS_DIR, DIARIZATION_MODELS_DIR, OUTPUT_DIR, DEFAULT_PARAMETERS_CONFIG_PATH,
                                  UVR_MODELS_DIR)
+from modules.utils.constants import *
 from modules.utils.subtitle_manager import get_srt, get_vtt, get_txt, write_file, safe_filename
 from modules.utils.youtube_manager import get_ytdata, get_ytaudio
 from modules.utils.files_manager import get_media_files, format_gradio_files, load_yaml, save_yaml
             language_code_dict = {value: key for key, value in whisper.tokenizer.LANGUAGES.items()}
             params.whisper.lang = language_code_dict[params.lang]
+        if params.whisper.initial_prompt == GRADIO_NONE_STR:
             params.whisper.initial_prompt = None
+        if params.whisper.prefix == GRADIO_NONE_STR:
             params.whisper.prefix = None
+        if params.whisper.hotwords == GRADIO_NONE_STR:
             params.whisper.hotwords = None
+        if params.whisper.max_new_tokens == GRADIO_NONE_NUMBER_MIN:
             params.whisper.max_new_tokens = None
+        if params.whisper.hallucination_silence_threshold == GRADIO_NONE_NUMBER_MIN:
             params.whisper.hallucination_silence_threshold = None
+        if params.whisper.language_detection_threshold == GRADIO_NONE_NUMBER_MIN:
             params.whisper.language_detection_threshold = None
+        if params.vad.max_speech_duration_s == GRADIO_NONE_NUMBER_MAX:
             params.vad.max_speech_duration_s = float('inf')
         return params
             cached_yaml["whisper"]["lang"] = AUTOMATIC_DETECTION.unwrap()
         if cached_yaml["vad"].get("max_speech_duration_s", float('inf')) == float('inf'):
+            cached_yaml["vad"]["max_speech_duration_s"] = GRADIO_NONE_NUMBER_MAX
         if cached_yaml is not None and cached_yaml:
             save_yaml(cached_yaml, DEFAULT_PARAMETERS_CONFIG_PATH)

modules/whisper/data_classes.py CHANGED Viewed

@@ -7,7 +7,7 @@ from enum import Enum
 from copy import deepcopy
 import yaml
-from modules.utils.constants import AUTOMATIC_DETECTION
 class WhisperImpl(Enum):
@@ -82,7 +82,7 @@ class VadParams(BaseParams):
             ),
             gr.Number(
                 label="Maximum Speech Duration (s)",
-                value=defaults.get("max_speech_duration_s", cls.__fields__["max_speech_duration_s"].default),
                 info="Maximum duration of speech chunks in \"seconds\"."
             ),
             gr.Number(
@@ -373,7 +373,7 @@ class WhisperParams(BaseParams):
             ),
             gr.Textbox(
                 label="Initial Prompt",
-                value=defaults.get("initial_prompt", cls.__fields__["initial_prompt"].default),
                 info="Initial prompt for first window"
             ),
             gr.Slider(
@@ -411,7 +411,7 @@ class WhisperParams(BaseParams):
             ),
             gr.Textbox(
                 label="Prefix",
-                value=defaults.get("prefix", cls.__fields__["prefix"].default),
                 info="Prefix text for first window"
             ),
             gr.Checkbox(
@@ -446,7 +446,7 @@ class WhisperParams(BaseParams):
             ),
             gr.Number(
                 label="Max New Tokens",
-                value=defaults.get("max_new_tokens", cls.__fields__["max_new_tokens"].default),
                 precision=0,
                 info="Maximum number of new tokens per chunk"
             ),
@@ -459,7 +459,7 @@ class WhisperParams(BaseParams):
             gr.Number(
                 label="Hallucination Silence Threshold (sec)",
                 value=defaults.get("hallucination_silence_threshold",
-                                   cls.__fields__["hallucination_silence_threshold"].default),
                 info="Threshold for skipping silent periods in hallucination detection"
             ),
             gr.Textbox(
@@ -470,7 +470,7 @@ class WhisperParams(BaseParams):
             gr.Number(
                 label="Language Detection Threshold",
                 value=defaults.get("language_detection_threshold",
-                                   cls.__fields__["language_detection_threshold"].default),
                 info="Threshold for language detection probability"
             ),
             gr.Number(

 from copy import deepcopy
 import yaml
+from modules.utils.constants import *
 class WhisperImpl(Enum):
             ),
             gr.Number(
                 label="Maximum Speech Duration (s)",
+                value=defaults.get("max_speech_duration_s", GRADIO_NONE_NUMBER_MAX),
                 info="Maximum duration of speech chunks in \"seconds\"."
             ),
             gr.Number(
             ),
             gr.Textbox(
                 label="Initial Prompt",
+                value=defaults.get("initial_prompt", GRADIO_NONE_STR),
                 info="Initial prompt for first window"
             ),
             gr.Slider(
             ),
             gr.Textbox(
                 label="Prefix",
+                value=defaults.get("prefix", GRADIO_NONE_STR),
                 info="Prefix text for first window"
             ),
             gr.Checkbox(
             ),
             gr.Number(
                 label="Max New Tokens",
+                value=defaults.get("max_new_tokens", GRADIO_NONE_NUMBER_MIN),
                 precision=0,
                 info="Maximum number of new tokens per chunk"
             ),
             gr.Number(
                 label="Hallucination Silence Threshold (sec)",
                 value=defaults.get("hallucination_silence_threshold",
+                                   GRADIO_NONE_NUMBER_MIN),
                 info="Threshold for skipping silent periods in hallucination detection"
             ),
             gr.Textbox(
             gr.Number(
                 label="Language Detection Threshold",
                 value=defaults.get("language_detection_threshold",
+                                   GRADIO_NONE_NUMBER_MIN),
                 info="Threshold for language detection probability"
             ),
             gr.Number(