Fixed the issues with Whisper unexpected keyword and original.srt format:
Browse files1. Fixed the issue with WhisperModel.transcribe() got an unexpected keyword argument 'whisperSegmentsFilter'.
2. Fixed the problem with the incorrect format of the generated original language (original.srt) subtitle file when the translation model is enabled.
- app.py +8 -6
- src/utils.py +5 -6
app.py
CHANGED
@@ -255,7 +255,7 @@ class WhisperTranscriber:
|
|
255 |
self.whisperSegmentsFilters: List[List] = []
|
256 |
inputFilter: bool = decodeOptions.pop("whisperSegmentsFilter", None)
|
257 |
inputFilters = []
|
258 |
-
for idx in range(
|
259 |
inputFilters.append(decodeOptions.pop(f"whisperSegmentsFilter{idx}", None))
|
260 |
inputFilters = filter(None, inputFilters)
|
261 |
if inputFilter:
|
@@ -1064,12 +1064,12 @@ def create_ui(app_config: ApplicationConfig):
|
|
1064 |
with gr.Column():
|
1065 |
simpleOutput = common_output()
|
1066 |
gr.Markdown(uiArticle)
|
1067 |
-
if translateModelMd is not None:
|
1068 |
-
with gr.Accordion("docs/translateModel.md", open=False):
|
1069 |
-
gr.Markdown(translateModelMd)
|
1070 |
if optionsMd is not None:
|
1071 |
with gr.Accordion("docs/options.md", open=False):
|
1072 |
gr.Markdown(optionsMd)
|
|
|
|
|
|
|
1073 |
if readmeMd is not None:
|
1074 |
with gr.Accordion("README.md", open=False):
|
1075 |
gr.Markdown(readmeMd)
|
@@ -1158,11 +1158,13 @@ def create_ui(app_config: ApplicationConfig):
|
|
1158 |
fullInputDict.update(common_translation_inputs())
|
1159 |
with gr.Column():
|
1160 |
fullOutput = common_output()
|
1161 |
-
|
1162 |
-
gr.Markdown(uiArticle)
|
1163 |
if optionsMd is not None:
|
1164 |
with gr.Accordion("docs/options.md", open=False):
|
1165 |
gr.Markdown(optionsMd)
|
|
|
|
|
|
|
1166 |
if readmeMd is not None:
|
1167 |
with gr.Accordion("README.md", open=False):
|
1168 |
gr.Markdown(readmeMd)
|
|
|
255 |
self.whisperSegmentsFilters: List[List] = []
|
256 |
inputFilter: bool = decodeOptions.pop("whisperSegmentsFilter", None)
|
257 |
inputFilters = []
|
258 |
+
for idx in range(1,len(self.app_config.whisper_segments_filters) + 1,1):
|
259 |
inputFilters.append(decodeOptions.pop(f"whisperSegmentsFilter{idx}", None))
|
260 |
inputFilters = filter(None, inputFilters)
|
261 |
if inputFilter:
|
|
|
1064 |
with gr.Column():
|
1065 |
simpleOutput = common_output()
|
1066 |
gr.Markdown(uiArticle)
|
|
|
|
|
|
|
1067 |
if optionsMd is not None:
|
1068 |
with gr.Accordion("docs/options.md", open=False):
|
1069 |
gr.Markdown(optionsMd)
|
1070 |
+
if translateModelMd is not None:
|
1071 |
+
with gr.Accordion("docs/translateModel.md", open=False):
|
1072 |
+
gr.Markdown(translateModelMd)
|
1073 |
if readmeMd is not None:
|
1074 |
with gr.Accordion("README.md", open=False):
|
1075 |
gr.Markdown(readmeMd)
|
|
|
1158 |
fullInputDict.update(common_translation_inputs())
|
1159 |
with gr.Column():
|
1160 |
fullOutput = common_output()
|
1161 |
+
gr.Markdown(uiArticle)
|
|
|
1162 |
if optionsMd is not None:
|
1163 |
with gr.Accordion("docs/options.md", open=False):
|
1164 |
gr.Markdown(optionsMd)
|
1165 |
+
if translateModelMd is not None:
|
1166 |
+
with gr.Accordion("docs/translateModel.md", open=False):
|
1167 |
+
gr.Markdown(translateModelMd)
|
1168 |
if readmeMd is not None:
|
1169 |
with gr.Accordion("README.md", open=False):
|
1170 |
gr.Markdown(readmeMd)
|
src/utils.py
CHANGED
@@ -133,12 +133,11 @@ def write_srt_original(transcript: Iterator[dict], file: TextIO,
|
|
133 |
if original is not None: print(f"{original}",
|
134 |
file=file,
|
135 |
flush=True)
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
flush=True)
|
142 |
|
143 |
def __subtitle_preprocessor_iterator(transcript: Iterator[dict], maxLineWidth: int = None, highlight_words: bool = False):
|
144 |
for segment in transcript:
|
|
|
133 |
if original is not None: print(f"{original}",
|
134 |
file=file,
|
135 |
flush=True)
|
136 |
+
|
137 |
+
text = segment['text'].replace('-->', '->')
|
138 |
+
print(f"{text}\n" if bilingual else "",
|
139 |
+
file=file,
|
140 |
+
flush=True)
|
|
|
141 |
|
142 |
def __subtitle_preprocessor_iterator(transcript: Iterator[dict], maxLineWidth: int = None, highlight_words: bool = False):
|
143 |
for segment in transcript:
|