jhj0517 commited on
Commit
d11cbce
·
1 Parent(s): 3ce4c46

Add UVR parameters

Browse files
Files changed (1) hide show
  1. modules/whisper/whisper_parameter.py +48 -17
modules/whisper/whisper_parameter.py CHANGED
@@ -47,6 +47,11 @@ class WhisperParameters:
47
  hotwords: gr.Textbox
48
  language_detection_threshold: gr.Number
49
  language_detection_segments: gr.Number
 
 
 
 
 
50
  """
51
  A data class for Gradio components of the Whisper Parameters. Use "before" Gradio pre-processing.
52
  This data class is used to mitigate the key-value problem between Gradio components and function parameters.
@@ -148,61 +153,76 @@ class WhisperParameters:
148
  diarization_device: gr.Dropdown
149
  This parameter is related with whisperx. Device to run diarization model
150
 
151
- length_penalty:
152
  This parameter is related to faster-whisper. Exponential length penalty constant.
153
 
154
- repetition_penalty:
155
  This parameter is related to faster-whisper. Penalty applied to the score of previously generated tokens
156
  (set > 1 to penalize).
157
 
158
- no_repeat_ngram_size:
159
  This parameter is related to faster-whisper. Prevent repetitions of n-grams with this size (set 0 to disable).
160
 
161
- prefix:
162
  This parameter is related to faster-whisper. Optional text to provide as a prefix for the first window.
163
 
164
- suppress_blank:
165
  This parameter is related to faster-whisper. Suppress blank outputs at the beginning of the sampling.
166
 
167
- suppress_tokens:
168
  This parameter is related to faster-whisper. List of token IDs to suppress. -1 will suppress a default set
169
  of symbols as defined in the model config.json file.
170
 
171
- max_initial_timestamp:
172
  This parameter is related to faster-whisper. The initial timestamp cannot be later than this.
173
 
174
- word_timestamps:
175
  This parameter is related to faster-whisper. Extract word-level timestamps using the cross-attention pattern
176
  and dynamic time warping, and include the timestamps for each word in each segment.
177
 
178
- prepend_punctuations:
179
  This parameter is related to faster-whisper. If word_timestamps is True, merge these punctuation symbols
180
  with the next word.
181
 
182
- append_punctuations:
183
  This parameter is related to faster-whisper. If word_timestamps is True, merge these punctuation symbols
184
  with the previous word.
185
 
186
- max_new_tokens:
187
  This parameter is related to faster-whisper. Maximum number of new tokens to generate per-chunk. If not set,
188
  the maximum will be set by the default max_length.
189
 
190
- chunk_length:
191
  This parameter is related to faster-whisper. The length of audio segments. If it is not None, it will overwrite the
192
  default chunk_length of the FeatureExtractor.
193
 
194
- hallucination_silence_threshold:
195
  This parameter is related to faster-whisper. When word_timestamps is True, skip silent periods longer than this threshold
196
  (in seconds) when a possible hallucination is detected.
197
 
198
- hotwords:
199
  This parameter is related to faster-whisper. Hotwords/hint phrases to provide the model with. Has no effect if prefix is not None.
200
 
201
- language_detection_threshold:
202
  This parameter is related to faster-whisper. If the maximum probability of the language tokens is higher than this value, the language is detected.
203
 
204
- language_detection_segments:
205
  This parameter is related to faster-whisper. Number of segments to consider for the language detection.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
  """
207
 
208
  def as_list(self) -> list:
@@ -273,6 +293,11 @@ class WhisperValues:
273
  hotwords: Optional[str]
274
  language_detection_threshold: Optional[float]
275
  language_detection_segments: int
 
 
 
 
 
276
  """
277
  A data class to use Whisper parameters.
278
  """
@@ -323,6 +348,12 @@ class WhisperValues:
323
  "diarization": {
324
  "is_diarize": self.is_diarize,
325
  "hf_token": self.hf_token
326
- }
 
 
 
 
 
 
327
  }
328
  return data
 
47
  hotwords: gr.Textbox
48
  language_detection_threshold: gr.Number
49
  language_detection_segments: gr.Number
50
+ is_bgm_separate: gr.Checkbox
51
+ uvr_model_size: gr.Dropdown
52
+ uvr_device: gr.Dropdown
53
+ uvr_segment_size: gr.Number
54
+ uvr_save_file: gr.Checkbox
55
  """
56
  A data class for Gradio components of the Whisper Parameters. Use "before" Gradio pre-processing.
57
  This data class is used to mitigate the key-value problem between Gradio components and function parameters.
 
153
  diarization_device: gr.Dropdown
154
  This parameter is related with whisperx. Device to run diarization model
155
 
156
+ length_penalty: gr.Number
157
  This parameter is related to faster-whisper. Exponential length penalty constant.
158
 
159
+ repetition_penalty: gr.Number
160
  This parameter is related to faster-whisper. Penalty applied to the score of previously generated tokens
161
  (set > 1 to penalize).
162
 
163
+ no_repeat_ngram_size: gr.Number
164
  This parameter is related to faster-whisper. Prevent repetitions of n-grams with this size (set 0 to disable).
165
 
166
+ prefix: gr.Textbox
167
  This parameter is related to faster-whisper. Optional text to provide as a prefix for the first window.
168
 
169
+ suppress_blank: gr.Checkbox
170
  This parameter is related to faster-whisper. Suppress blank outputs at the beginning of the sampling.
171
 
172
+ suppress_tokens: gr.Textbox
173
  This parameter is related to faster-whisper. List of token IDs to suppress. -1 will suppress a default set
174
  of symbols as defined in the model config.json file.
175
 
176
+ max_initial_timestamp: gr.Number
177
  This parameter is related to faster-whisper. The initial timestamp cannot be later than this.
178
 
179
+ word_timestamps: gr.Checkbox
180
  This parameter is related to faster-whisper. Extract word-level timestamps using the cross-attention pattern
181
  and dynamic time warping, and include the timestamps for each word in each segment.
182
 
183
+ prepend_punctuations: gr.Textbox
184
  This parameter is related to faster-whisper. If word_timestamps is True, merge these punctuation symbols
185
  with the next word.
186
 
187
+ append_punctuations: gr.Textbox
188
  This parameter is related to faster-whisper. If word_timestamps is True, merge these punctuation symbols
189
  with the previous word.
190
 
191
+ max_new_tokens: gr.Number
192
  This parameter is related to faster-whisper. Maximum number of new tokens to generate per-chunk. If not set,
193
  the maximum will be set by the default max_length.
194
 
195
+ chunk_length: gr.Number
196
  This parameter is related to faster-whisper. The length of audio segments. If it is not None, it will overwrite the
197
  default chunk_length of the FeatureExtractor.
198
 
199
+ hallucination_silence_threshold: gr.Number
200
  This parameter is related to faster-whisper. When word_timestamps is True, skip silent periods longer than this threshold
201
  (in seconds) when a possible hallucination is detected.
202
 
203
+ hotwords: gr.Textbox
204
  This parameter is related to faster-whisper. Hotwords/hint phrases to provide the model with. Has no effect if prefix is not None.
205
 
206
+ language_detection_threshold: gr.Number
207
  This parameter is related to faster-whisper. If the maximum probability of the language tokens is higher than this value, the language is detected.
208
 
209
+ language_detection_segments: gr.Number
210
  This parameter is related to faster-whisper. Number of segments to consider for the language detection.
211
+
212
+ is_separate_bgm: gr.Checkbox
213
+ This parameter is related to UVR. Boolean value that determines whether to separate bgm or not.
214
+
215
+ uvr_model_size: gr.Dropdown
216
+ This parameter is related to UVR. UVR model size.
217
+
218
+ uvr_device: gr.Dropdown
219
+ This parameter is related to UVR. Device to run UVR model.
220
+
221
+ uvr_segment_size: gr.Number
222
+ This parameter is related to UVR. Segment size for UVR model.
223
+
224
+ uvr_save_file: gr.Checkbox
225
+ This parameter is related to UVR. Boolean value that determines whether to save the separated file or not.
226
  """
227
 
228
  def as_list(self) -> list:
 
293
  hotwords: Optional[str]
294
  language_detection_threshold: Optional[float]
295
  language_detection_segments: int
296
+ is_bgm_separate: bool
297
+ uvr_model_size: str
298
+ uvr_device: str
299
+ uvr_segment_size: int
300
+ uvr_save_file: bool
301
  """
302
  A data class to use Whisper parameters.
303
  """
 
348
  "diarization": {
349
  "is_diarize": self.is_diarize,
350
  "hf_token": self.hf_token
351
+ },
352
+ "bgm_separation": {
353
+ "is_separate_bgm": self.is_bgm_separate,
354
+ "model_size": self.uvr_model_size,
355
+ "segment_size": self.uvr_segment_size,
356
+ "save_file": self.uvr_save_file
357
+ },
358
  }
359
  return data