jhj0517 commited on
Commit
2a2f7c6
·
1 Parent(s): 19e342a

Use constant for gradio none validation values

Browse files
modules/utils/constants.py CHANGED
@@ -1,3 +1,6 @@
1
  from gradio_i18n import Translate, gettext as _
2
 
3
  AUTOMATIC_DETECTION = _("Automatic Detection")
 
 
 
 
1
  from gradio_i18n import Translate, gettext as _
2
 
3
  AUTOMATIC_DETECTION = _("Automatic Detection")
4
+ GRADIO_NONE_STR = ""
5
+ GRADIO_NONE_NUMBER_MAX = 9999
6
+ GRADIO_NONE_NUMBER_MIN = 0
modules/whisper/base_transcription_pipeline.py CHANGED
@@ -15,7 +15,7 @@ from dataclasses import astuple
15
  from modules.uvr.music_separator import MusicSeparator
16
  from modules.utils.paths import (WHISPER_MODELS_DIR, DIARIZATION_MODELS_DIR, OUTPUT_DIR, DEFAULT_PARAMETERS_CONFIG_PATH,
17
  UVR_MODELS_DIR)
18
- from modules.utils.constants import AUTOMATIC_DETECTION
19
  from modules.utils.subtitle_manager import get_srt, get_vtt, get_txt, write_file, safe_filename
20
  from modules.utils.youtube_manager import get_ytdata, get_ytaudio
21
  from modules.utils.files_manager import get_media_files, format_gradio_files, load_yaml, save_yaml
@@ -519,19 +519,19 @@ class BaseTranscriptionPipeline(ABC):
519
  language_code_dict = {value: key for key, value in whisper.tokenizer.LANGUAGES.items()}
520
  params.whisper.lang = language_code_dict[params.lang]
521
 
522
- if not params.whisper.initial_prompt:
523
  params.whisper.initial_prompt = None
524
- if not params.whisper.prefix:
525
  params.whisper.prefix = None
526
- if not params.whisper.hotwords:
527
  params.whisper.hotwords = None
528
- if params.whisper.max_new_tokens == 0:
529
  params.whisper.max_new_tokens = None
530
- if params.whisper.hallucination_silence_threshold == 0:
531
  params.whisper.hallucination_silence_threshold = None
532
- if params.whisper.language_detection_threshold == 0:
533
  params.whisper.language_detection_threshold = None
534
- if params.vad.max_speech_duration_s >= 9999:
535
  params.vad.max_speech_duration_s = float('inf')
536
  return params
537
 
@@ -555,7 +555,7 @@ class BaseTranscriptionPipeline(ABC):
555
  cached_yaml["whisper"]["lang"] = AUTOMATIC_DETECTION.unwrap()
556
 
557
  if cached_yaml["vad"].get("max_speech_duration_s", float('inf')) == float('inf'):
558
- cached_yaml["vad"]["max_speech_duration_s"] = 9999
559
 
560
  if cached_yaml is not None and cached_yaml:
561
  save_yaml(cached_yaml, DEFAULT_PARAMETERS_CONFIG_PATH)
 
15
  from modules.uvr.music_separator import MusicSeparator
16
  from modules.utils.paths import (WHISPER_MODELS_DIR, DIARIZATION_MODELS_DIR, OUTPUT_DIR, DEFAULT_PARAMETERS_CONFIG_PATH,
17
  UVR_MODELS_DIR)
18
+ from modules.utils.constants import *
19
  from modules.utils.subtitle_manager import get_srt, get_vtt, get_txt, write_file, safe_filename
20
  from modules.utils.youtube_manager import get_ytdata, get_ytaudio
21
  from modules.utils.files_manager import get_media_files, format_gradio_files, load_yaml, save_yaml
 
519
  language_code_dict = {value: key for key, value in whisper.tokenizer.LANGUAGES.items()}
520
  params.whisper.lang = language_code_dict[params.lang]
521
 
522
+ if params.whisper.initial_prompt == GRADIO_NONE_STR:
523
  params.whisper.initial_prompt = None
524
+ if params.whisper.prefix == GRADIO_NONE_STR:
525
  params.whisper.prefix = None
526
+ if params.whisper.hotwords == GRADIO_NONE_STR:
527
  params.whisper.hotwords = None
528
+ if params.whisper.max_new_tokens == GRADIO_NONE_NUMBER_MIN:
529
  params.whisper.max_new_tokens = None
530
+ if params.whisper.hallucination_silence_threshold == GRADIO_NONE_NUMBER_MIN:
531
  params.whisper.hallucination_silence_threshold = None
532
+ if params.whisper.language_detection_threshold == GRADIO_NONE_NUMBER_MIN:
533
  params.whisper.language_detection_threshold = None
534
+ if params.vad.max_speech_duration_s == GRADIO_NONE_NUMBER_MAX:
535
  params.vad.max_speech_duration_s = float('inf')
536
  return params
537
 
 
555
  cached_yaml["whisper"]["lang"] = AUTOMATIC_DETECTION.unwrap()
556
 
557
  if cached_yaml["vad"].get("max_speech_duration_s", float('inf')) == float('inf'):
558
+ cached_yaml["vad"]["max_speech_duration_s"] = GRADIO_NONE_NUMBER_MAX
559
 
560
  if cached_yaml is not None and cached_yaml:
561
  save_yaml(cached_yaml, DEFAULT_PARAMETERS_CONFIG_PATH)
modules/whisper/data_classes.py CHANGED
@@ -7,7 +7,7 @@ from enum import Enum
7
  from copy import deepcopy
8
  import yaml
9
 
10
- from modules.utils.constants import AUTOMATIC_DETECTION
11
 
12
 
13
  class WhisperImpl(Enum):
@@ -82,7 +82,7 @@ class VadParams(BaseParams):
82
  ),
83
  gr.Number(
84
  label="Maximum Speech Duration (s)",
85
- value=defaults.get("max_speech_duration_s", cls.__fields__["max_speech_duration_s"].default),
86
  info="Maximum duration of speech chunks in \"seconds\"."
87
  ),
88
  gr.Number(
@@ -373,7 +373,7 @@ class WhisperParams(BaseParams):
373
  ),
374
  gr.Textbox(
375
  label="Initial Prompt",
376
- value=defaults.get("initial_prompt", cls.__fields__["initial_prompt"].default),
377
  info="Initial prompt for first window"
378
  ),
379
  gr.Slider(
@@ -411,7 +411,7 @@ class WhisperParams(BaseParams):
411
  ),
412
  gr.Textbox(
413
  label="Prefix",
414
- value=defaults.get("prefix", cls.__fields__["prefix"].default),
415
  info="Prefix text for first window"
416
  ),
417
  gr.Checkbox(
@@ -446,7 +446,7 @@ class WhisperParams(BaseParams):
446
  ),
447
  gr.Number(
448
  label="Max New Tokens",
449
- value=defaults.get("max_new_tokens", cls.__fields__["max_new_tokens"].default),
450
  precision=0,
451
  info="Maximum number of new tokens per chunk"
452
  ),
@@ -459,7 +459,7 @@ class WhisperParams(BaseParams):
459
  gr.Number(
460
  label="Hallucination Silence Threshold (sec)",
461
  value=defaults.get("hallucination_silence_threshold",
462
- cls.__fields__["hallucination_silence_threshold"].default),
463
  info="Threshold for skipping silent periods in hallucination detection"
464
  ),
465
  gr.Textbox(
@@ -470,7 +470,7 @@ class WhisperParams(BaseParams):
470
  gr.Number(
471
  label="Language Detection Threshold",
472
  value=defaults.get("language_detection_threshold",
473
- cls.__fields__["language_detection_threshold"].default),
474
  info="Threshold for language detection probability"
475
  ),
476
  gr.Number(
 
7
  from copy import deepcopy
8
  import yaml
9
 
10
+ from modules.utils.constants import *
11
 
12
 
13
  class WhisperImpl(Enum):
 
82
  ),
83
  gr.Number(
84
  label="Maximum Speech Duration (s)",
85
+ value=defaults.get("max_speech_duration_s", GRADIO_NONE_NUMBER_MAX),
86
  info="Maximum duration of speech chunks in \"seconds\"."
87
  ),
88
  gr.Number(
 
373
  ),
374
  gr.Textbox(
375
  label="Initial Prompt",
376
+ value=defaults.get("initial_prompt", GRADIO_NONE_STR),
377
  info="Initial prompt for first window"
378
  ),
379
  gr.Slider(
 
411
  ),
412
  gr.Textbox(
413
  label="Prefix",
414
+ value=defaults.get("prefix", GRADIO_NONE_STR),
415
  info="Prefix text for first window"
416
  ),
417
  gr.Checkbox(
 
446
  ),
447
  gr.Number(
448
  label="Max New Tokens",
449
+ value=defaults.get("max_new_tokens", GRADIO_NONE_NUMBER_MIN),
450
  precision=0,
451
  info="Maximum number of new tokens per chunk"
452
  ),
 
459
  gr.Number(
460
  label="Hallucination Silence Threshold (sec)",
461
  value=defaults.get("hallucination_silence_threshold",
462
+ GRADIO_NONE_NUMBER_MIN),
463
  info="Threshold for skipping silent periods in hallucination detection"
464
  ),
465
  gr.Textbox(
 
470
  gr.Number(
471
  label="Language Detection Threshold",
472
  value=defaults.get("language_detection_threshold",
473
+ GRADIO_NONE_NUMBER_MIN),
474
  info="Threshold for language detection probability"
475
  ),
476
  gr.Number(