Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,3 +1,5 @@
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
import os
|
3 |
import argparse
|
@@ -16,16 +18,18 @@ class App:
|
|
16 |
self.args = args
|
17 |
self.app = gr.Blocks(css=CSS, theme=self.args.theme)
|
18 |
self.whisper_inf = self.init_whisper()
|
|
|
|
|
19 |
self.nllb_inf = NLLBInference()
|
20 |
self.deepl_api = DeepLAPI()
|
21 |
-
self.log_initialization()
|
22 |
|
23 |
def init_whisper(self):
|
24 |
whisper_type = self.args.whisper_type.lower().strip()
|
|
|
25 |
if whisper_type in ["faster_whisper", "faster-whisper"]:
|
26 |
whisper_inf = FasterWhisperInference()
|
27 |
whisper_inf.model_dir = self.args.faster_whisper_model_dir
|
28 |
-
|
29 |
whisper_inf = WhisperInference()
|
30 |
whisper_inf.model_dir = self.args.whisper_model_dir
|
31 |
else:
|
@@ -33,30 +37,20 @@ class App:
|
|
33 |
whisper_inf.model_dir = self.args.faster_whisper_model_dir
|
34 |
return whisper_inf
|
35 |
|
36 |
-
def log_initialization(self):
|
37 |
-
print(f'Use "{self.args.whisper_type}" implementation')
|
38 |
-
print(f'Device "{self.whisper_inf.device}" is detected')
|
39 |
-
|
40 |
@staticmethod
|
41 |
def open_folder(folder_path: str):
|
42 |
if os.path.exists(folder_path):
|
43 |
-
os.system(f
|
44 |
else:
|
45 |
-
print(f
|
46 |
|
47 |
@staticmethod
|
48 |
def on_change_models(model_size: str):
|
49 |
translatable_model = ["large", "large-v1", "large-v2", "large-v3"]
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
result, output_file = self.whisper_inf.transcribe_file(file, file_format, timestamp, *whisper_params)
|
55 |
-
if not os.path.exists(output_file):
|
56 |
-
raise FileNotFoundError(f'Output file {output_file} does not exist.')
|
57 |
-
return result, output_file
|
58 |
-
except Exception as e:
|
59 |
-
return str(e), None
|
60 |
|
61 |
def launch(self):
|
62 |
with self.app:
|
@@ -64,279 +58,286 @@ class App:
|
|
64 |
with gr.Column():
|
65 |
gr.Markdown(MARKDOWN, elem_id="md_project")
|
66 |
with gr.Tabs():
|
67 |
-
with gr.TabItem("File"):
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
|
88 |
-
|
|
|
|
|
|
|
|
|
89 |
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
126 |
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
148 |
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
154 |
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
nb_beam_size = gr.Number(label="Beam Size", value=1, precision=0, interactive=True)
|
182 |
-
nb_log_prob_threshold = gr.Number(label="Log Probability Threshold", value=-1.0, interactive=True)
|
183 |
-
nb_no_speech_threshold = gr.Number(label="No Speech Threshold", value=0.6, interactive=True)
|
184 |
-
dd_compute_type = gr.Dropdown(choices=self.whisper_inf.available_compute_types, value=self.whisper_inf.current_compute_type, label="Compute Type", interactive=True)
|
185 |
-
nb_best_of = gr.Number(label="Best Of", value=5, interactive=True)
|
186 |
-
nb_patience = gr.Number(label="Patience", value=1, interactive=True)
|
187 |
-
cb_condition_on_previous_text = gr.Checkbox(label="Condition On Previous Text", value=True, interactive=True)
|
188 |
-
tb_initial_prompt = gr.Textbox(label="Initial Prompt", value=None, interactive=True)
|
189 |
-
sd_temperature = gr.Slider(label="Temperature", value=0, step=0.01, maximum=1.0, interactive=True)
|
190 |
-
nb_compression_ratio_threshold = gr.Number(label="Compression Ratio Threshold", value=2.4, interactive=True)
|
191 |
-
with gr.Row():
|
192 |
-
btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
|
193 |
-
with gr.Row():
|
194 |
-
tb_indicator = gr.Textbox(label="Output", scale=5)
|
195 |
-
files_subtitles = gr.Files(label="Downloadable output file", scale=3)
|
196 |
-
btn_openfolder = gr.Button('π', scale=1)
|
197 |
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
beam_size=nb_beam_size,
|
203 |
-
log_prob_threshold=nb_log_prob_threshold,
|
204 |
-
no_speech_threshold=nb_no_speech_threshold,
|
205 |
-
compute_type=dd_compute_type,
|
206 |
-
best_of=nb_best_of,
|
207 |
-
patience=nb_patience,
|
208 |
-
condition_on_previous_text=cb_condition_on_previous_text,
|
209 |
-
initial_prompt=tb_initial_prompt,
|
210 |
-
temperature=sd_temperature,
|
211 |
-
compression_ratio_threshold=nb_compression_ratio_threshold,
|
212 |
-
vad_filter=cb_vad_filter,
|
213 |
-
threshold=sd_threshold,
|
214 |
-
min_speech_duration_ms=nb_min_speech_duration_ms,
|
215 |
-
max_speech_duration_s=nb_max_speech_duration_s,
|
216 |
-
min_silence_duration_ms=nb_min_silence_duration_ms,
|
217 |
-
window_size_sample=nb_window_size_sample,
|
218 |
-
speech_pad_ms=nb_speech_pad_ms)
|
219 |
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
226 |
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
dd_model = gr.Dropdown(choices=self.whisper_inf.available_models, value="large-v2", label="Model")
|
232 |
-
dd_lang = gr.Dropdown(choices=["Automatic Detection"] + self.whisper_inf.available_langs, value="Automatic Detection", label="Language")
|
233 |
-
dd_file_format = gr.Dropdown(choices=["SRT", "WebVTT", "txt"], value="SRT", label="File Format")
|
234 |
-
with gr.Row():
|
235 |
-
cb_translate = gr.Checkbox(value=False, label="Translate to English?", interactive=True)
|
236 |
-
with gr.Accordion("VAD Options", open=False, visible=isinstance(self.whisper_inf, FasterWhisperInference)):
|
237 |
-
cb_vad_filter = gr.Checkbox(label="Enable Silero VAD Filter", value=False, interactive=True)
|
238 |
-
sd_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Speech Threshold", value=0.5)
|
239 |
-
nb_min_speech_duration_ms = gr.Number(label="Minimum Speech Duration (ms)", precision=0, value=250)
|
240 |
-
nb_max_speech_duration_s = gr.Number(label="Maximum Speech Duration (s)", value=9999)
|
241 |
-
nb_min_silence_duration_ms = gr.Number(label="Minimum Silence Duration (ms)", precision=0, value=2000)
|
242 |
-
nb_window_size_sample = gr.Number(label="Window Size (samples)", precision=0, value=1024)
|
243 |
-
nb_speech_pad_ms = gr.Number(label="Speech Padding (ms)", precision=0, value=400)
|
244 |
-
with gr.Accordion("Advanced_Parameters", open=False):
|
245 |
-
nb_beam_size = gr.Number(label="Beam Size", value=1, precision=0, interactive=True)
|
246 |
-
nb_log_prob_threshold = gr.Number(label="Log Probability Threshold", value=-1.0, interactive=True)
|
247 |
-
nb_no_speech_threshold = gr.Number(label="No Speech Threshold", value=0.6, interactive=True)
|
248 |
-
dd_compute_type = gr.Dropdown(choices=self.whisper_inf.available_compute_types, value=self.whisper_inf.current_compute_type, label="Compute Type", interactive=True)
|
249 |
-
nb_best_of = gr.Number(label="Best Of", value=5, interactive=True)
|
250 |
-
nb_patience = gr.Number(label="Patience", value=1, interactive=True)
|
251 |
-
cb_condition_on_previous_text = gr.Checkbox(label="Condition On Previous Text", value=True, interactive=True)
|
252 |
-
tb_initial_prompt = gr.Textbox(label="Initial Prompt", value=None, interactive=True)
|
253 |
-
sd_temperature = gr.Slider(label="Temperature", value=0, step=0.01, maximum=1.0, interactive=True)
|
254 |
-
with gr.Row():
|
255 |
-
btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
|
256 |
-
with gr.Row():
|
257 |
-
tb_indicator = gr.Textbox(label="Output", scale=5)
|
258 |
-
files_subtitles = gr.Files(label="Downloadable output file", scale=3)
|
259 |
-
btn_openfolder = gr.Button('π', scale=1)
|
260 |
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
is_translate=cb_translate,
|
265 |
-
beam_size=nb_beam_size,
|
266 |
-
log_prob_threshold=nb_log_prob_threshold,
|
267 |
-
no_speech_threshold=nb_no_speech_threshold,
|
268 |
-
compute_type=dd_compute_type,
|
269 |
-
best_of=nb_best_of,
|
270 |
-
patience=nb_patience,
|
271 |
-
condition_on_previous_text=cb_condition_on_previous_text,
|
272 |
-
initial_prompt=tb_initial_prompt,
|
273 |
-
temperature=sd_temperature,
|
274 |
-
compression_ratio_threshold=nb_compression_ratio_threshold,
|
275 |
-
vad_filter=cb_vad_filter,
|
276 |
-
threshold=sd_threshold,
|
277 |
-
min_speech_duration_ms=nb_min_speech_duration_ms,
|
278 |
-
max_speech_duration_s=nb_max_speech_duration_s,
|
279 |
-
min_silence_duration_ms=nb_min_silence_duration_ms,
|
280 |
-
window_size_sample=nb_window_size_sample,
|
281 |
-
speech_pad_ms=nb_speech_pad_ms)
|
282 |
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
288 |
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
with gr.Row():
|
297 |
-
dd_deepl_sourcelang = gr.Dropdown(label="Source Language", value="Automatic Detection", choices=list(self.deepl_api.available_source_langs.keys()))
|
298 |
-
dd_deepl_targetlang = gr.Dropdown(label="Target Language", value="English", choices=list(self.deepl_api.available_target_langs.keys()))
|
299 |
-
with gr.Row():
|
300 |
-
cb_deepl_ispro = gr.Checkbox(label="Pro User?", value=False)
|
301 |
-
with gr.Row():
|
302 |
-
btn_run = gr.Button("TRANSLATE SUBTITLE FILE", variant="primary")
|
303 |
-
with gr.Row():
|
304 |
-
tb_indicator = gr.Textbox(label
|
305 |
-
="Output", scale=5)
|
306 |
-
files_subtitles = gr.Files(label="Downloadable output file", scale=3)
|
307 |
-
btn_openfolder = gr.Button('π', scale=1)
|
308 |
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
-
dd_nllb_sourcelang = gr.Dropdown(label="Source Language", choices=self.nllb_inf.available_source_langs)
|
321 |
-
dd_nllb_targetlang = gr.Dropdown(label="Target Language", choices=self.nllb_inf.available_target_langs)
|
322 |
-
with gr.Row():
|
323 |
-
cb_timestamp = gr.Checkbox(value=True, label="Add a timestamp to the end of the filename", interactive=True)
|
324 |
-
with gr.Row():
|
325 |
-
btn_run = gr.Button("TRANSLATE SUBTITLE FILE", variant="primary")
|
326 |
-
with gr.Row():
|
327 |
-
tb_indicator = gr.Textbox(label="Output", scale=5)
|
328 |
-
files_subtitles = gr.Files(label="Downloadable output file", scale=3)
|
329 |
-
btn_openfolder = gr.Button('π', scale=1)
|
330 |
-
with gr.Column():
|
331 |
-
md_vram_table = gr.HTML(NLLB_VRAM_TABLE, elem_id="md_nllb_vram_table")
|
332 |
-
|
333 |
-
btn_run.click(fn=self.nllb_inf.translate_file,
|
334 |
-
inputs=[file_subs, dd_nllb_model, dd_nllb_sourcelang, dd_nllb_targetlang, cb_timestamp],
|
335 |
-
outputs=[tb_indicator, files_subtitles])
|
336 |
|
337 |
-
|
338 |
-
inputs=None,
|
339 |
-
outputs=None)
|
340 |
|
341 |
|
342 |
# Create the parser for command-line arguments
|
@@ -349,11 +350,13 @@ parser.add_argument('--username', type=str, default=None, help='Gradio authentic
|
|
349 |
parser.add_argument('--password', type=str, default=None, help='Gradio authentication password')
|
350 |
parser.add_argument('--theme', type=str, default=None, help='Gradio Blocks theme')
|
351 |
parser.add_argument('--colab', type=bool, default=False, nargs='?', const=True, help='Is colab user or not')
|
352 |
-
parser.add_argument('--api_open', type=bool, default=False, nargs='?', const=True, help='enable
|
353 |
parser.add_argument('--whisper_model_dir', type=str, default=os.path.join("models", "Whisper"), help='Directory path of the whisper model')
|
354 |
parser.add_argument('--faster_whisper_model_dir', type=str, default=os.path.join("models", "Whisper", "faster-whisper"), help='Directory path of the faster-whisper model')
|
355 |
_args = parser.parse_args()
|
356 |
|
357 |
if __name__ == "__main__":
|
358 |
app = App(args=_args)
|
359 |
-
app.launch()
|
|
|
|
|
|
1 |
+
I am creating a huggingface space for my pyhton application and here is the code can you analyze the code to see if it is perfectly running
|
2 |
+
|
3 |
import gradio as gr
|
4 |
import os
|
5 |
import argparse
|
|
|
18 |
self.args = args
|
19 |
self.app = gr.Blocks(css=CSS, theme=self.args.theme)
|
20 |
self.whisper_inf = self.init_whisper()
|
21 |
+
print(f"Use \"{self.args.whisper_type}\" implementation")
|
22 |
+
print(f"Device \"{self.whisper_inf.device}\" is detected")
|
23 |
self.nllb_inf = NLLBInference()
|
24 |
self.deepl_api = DeepLAPI()
|
|
|
25 |
|
26 |
def init_whisper(self):
|
27 |
whisper_type = self.args.whisper_type.lower().strip()
|
28 |
+
|
29 |
if whisper_type in ["faster_whisper", "faster-whisper"]:
|
30 |
whisper_inf = FasterWhisperInference()
|
31 |
whisper_inf.model_dir = self.args.faster_whisper_model_dir
|
32 |
+
if whisper_type in ["whisper"]:
|
33 |
whisper_inf = WhisperInference()
|
34 |
whisper_inf.model_dir = self.args.whisper_model_dir
|
35 |
else:
|
|
|
37 |
whisper_inf.model_dir = self.args.faster_whisper_model_dir
|
38 |
return whisper_inf
|
39 |
|
|
|
|
|
|
|
|
|
40 |
@staticmethod
|
41 |
def open_folder(folder_path: str):
|
42 |
if os.path.exists(folder_path):
|
43 |
+
os.system(f"start {folder_path}")
|
44 |
else:
|
45 |
+
print(f"The folder {folder_path} does not exist.")
|
46 |
|
47 |
@staticmethod
|
48 |
def on_change_models(model_size: str):
|
49 |
translatable_model = ["large", "large-v1", "large-v2", "large-v3"]
|
50 |
+
if model_size not in translatable_model:
|
51 |
+
return gr.Checkbox(visible=False, value=False, interactive=False)
|
52 |
+
else:
|
53 |
+
return gr.Checkbox(visible=True, value=False, label="Translate to English?", interactive=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
|
55 |
def launch(self):
|
56 |
with self.app:
|
|
|
58 |
with gr.Column():
|
59 |
gr.Markdown(MARKDOWN, elem_id="md_project")
|
60 |
with gr.Tabs():
|
61 |
+
with gr.TabItem("File"): # tab1
|
62 |
+
with gr.Row():
|
63 |
+
input_file = gr.Files(type="filepath", label="Upload File here")
|
64 |
+
with gr.Row():
|
65 |
+
dd_model = gr.Dropdown(choices=self.whisper_inf.available_models, value="large-v2",
|
66 |
+
label="Model")
|
67 |
+
dd_lang = gr.Dropdown(choices=["Automatic Detection"] + self.whisper_inf.available_langs,
|
68 |
+
value="Automatic Detection", label="Language")
|
69 |
+
dd_file_format = gr.Dropdown(["SRT", "WebVTT", "txt"], value="SRT", label="File Format")
|
70 |
+
with gr.Row():
|
71 |
+
cb_translate = gr.Checkbox(value=False, label="Translate to English?", interactive=True)
|
72 |
+
with gr.Row():
|
73 |
+
cb_timestamp = gr.Checkbox(value=True, label="Add a timestamp to the end of the filename", interactive=True)
|
74 |
+
with gr.Accordion("VAD Options", open=False, visible=isinstance(self.whisper_inf, FasterWhisperInference)):
|
75 |
+
cb_vad_filter = gr.Checkbox(label="Enable Silero VAD Filter", value=False, interactive=True)
|
76 |
+
sd_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Speech Threshold", value=0.5)
|
77 |
+
nb_min_speech_duration_ms = gr.Number(label="Minimum Speech Duration (ms)", precision=0, value=250)
|
78 |
+
nb_max_speech_duration_s = gr.Number(label="Maximum Speech Duration (s)", value=9999)
|
79 |
+
nb_min_silence_duration_ms = gr.Number(label="Minimum Silence Duration (ms)", precision=0, value=2000)
|
80 |
+
nb_window_size_sample = gr.Number(label="Window Size (samples)", precision=0, value=1024)
|
81 |
+
nb_speech_pad_ms = gr.Number(label="Speech Padding (ms)", precision=0, value=400)
|
82 |
+
with gr.Accordion("Advanced_Parameters", open=False):
|
83 |
+
nb_beam_size = gr.Number(label="Beam Size", value=1, precision=0, interactive=True)
|
84 |
+
nb_log_prob_threshold = gr.Number(label="Log Probability Threshold", value=-1.0, interactive=True)
|
85 |
+
nb_no_speech_threshold = gr.Number(label="No Speech Threshold", value=0.6, interactive=True)
|
86 |
+
dd_compute_type = gr.Dropdown(label="Compute Type", choices=self.whisper_inf.available_compute_types, value=self.whisper_inf.current_compute_type, interactive=True)
|
87 |
+
nb_best_of = gr.Number(label="Best Of", value=5, interactive=True)
|
88 |
+
nb_patience = gr.Number(label="Patience", value=1, interactive=True)
|
89 |
+
cb_condition_on_previous_text = gr.Checkbox(label="Condition On Previous Text", value=True, interactive=True)
|
90 |
+
tb_initial_prompt = gr.Textbox(label="Initial Prompt", value=None, interactive=True)
|
91 |
+
sd_temperature = gr.Slider(label="Temperature", value=0, step=0.01, maximum=1.0, interactive=True)
|
92 |
+
nb_compression_ratio_threshold = gr.Number(label="Compression Ratio Threshold", value=2.4, interactive=True)
|
93 |
+
with gr.Row():
|
94 |
+
btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
|
95 |
+
with gr.Row():
|
96 |
+
tb_indicator = gr.Textbox(label="Output", scale=5)
|
97 |
+
files_subtitles = gr.Files(label="Downloadable output file", scale=3, interactive=False)
|
98 |
+
btn_openfolder = gr.Button('π', scale=1)
|
99 |
|
100 |
+
params = [input_file, dd_file_format, cb_timestamp]
|
101 |
+
whisper_params = WhisperGradioComponents(model_size=dd_model,
|
102 |
+
lang=dd_lang,
|
103 |
+
is_translate=cb_translate,
|
104 |
+
beam_size=nb_beam_size,
|
105 |
+
log_prob_threshold=nb_log_prob_threshold,
|
106 |
+
no_speech_threshold=nb_no_speech_threshold,
|
107 |
+
compute_type=dd_compute_type,
|
108 |
+
best_of=nb_best_of,
|
109 |
+
patience=nb_patience,
|
110 |
+
condition_on_previous_text=cb_condition_on_previous_text,
|
111 |
+
initial_prompt=tb_initial_prompt,
|
112 |
+
temperature=sd_temperature,
|
113 |
+
compression_ratio_threshold=nb_compression_ratio_threshold,
|
114 |
+
vad_filter=cb_vad_filter,
|
115 |
+
threshold=sd_threshold,
|
116 |
+
min_speech_duration_ms=nb_min_speech_duration_ms,
|
117 |
+
max_speech_duration_s=nb_max_speech_duration_s,
|
118 |
+
min_silence_duration_ms=nb_min_silence_duration_ms,
|
119 |
+
window_size_sample=nb_window_size_sample,
|
120 |
+
speech_pad_ms=nb_speech_pad_ms)
|
121 |
|
122 |
+
btn_run.click(fn=self.whisper_inf.transcribe_file,
|
123 |
+
inputs=params + whisper_params.to_list(),
|
124 |
+
outputs=[tb_indicator, files_subtitles])
|
125 |
+
btn_openfolder.click(fn=lambda: self.open_folder("outputs"), inputs=None, outputs=None)
|
126 |
+
dd_model.change(fn=self.on_change_models, inputs=[dd_model], outputs=[cb_translate])
|
127 |
|
128 |
+
with gr.TabItem("Youtube"): # tab2
|
129 |
+
with gr.Row():
|
130 |
+
tb_youtubelink = gr.Textbox(label="Youtube Link")
|
131 |
+
with gr.Row(equal_height=True):
|
132 |
+
with gr.Column():
|
133 |
+
img_thumbnail = gr.Image(label="Youtube Thumbnail")
|
134 |
+
with gr.Column():
|
135 |
+
tb_title = gr.Label(label="Youtube Title")
|
136 |
+
tb_description = gr.Textbox(label="Youtube Description", max_lines=15)
|
137 |
+
with gr.Row():
|
138 |
+
dd_model = gr.Dropdown(choices=self.whisper_inf.available_models, value="large-v2",
|
139 |
+
label="Model")
|
140 |
+
dd_lang = gr.Dropdown(choices=["Automatic Detection"] + self.whisper_inf.available_langs,
|
141 |
+
value="Automatic Detection", label="Language")
|
142 |
+
dd_file_format = gr.Dropdown(choices=["SRT", "WebVTT", "txt"], value="SRT", label="File Format")
|
143 |
+
with gr.Row():
|
144 |
+
cb_translate = gr.Checkbox(value=False, label="Translate to English?", interactive=True)
|
145 |
+
with gr.Row():
|
146 |
+
cb_timestamp = gr.Checkbox(value=True, label="Add a timestamp to the end of the filename",
|
147 |
+
interactive=True)
|
148 |
+
with gr.Accordion("VAD Options", open=False, visible=isinstance(self.whisper_inf, FasterWhisperInference)):
|
149 |
+
cb_vad_filter = gr.Checkbox(label="Enable Silero VAD Filter", value=False, interactive=True)
|
150 |
+
sd_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Speech Threshold", value=0.5)
|
151 |
+
nb_min_speech_duration_ms = gr.Number(label="Minimum Speech Duration (ms)", precision=0, value=250)
|
152 |
+
nb_max_speech_duration_s = gr.Number(label="Maximum Speech Duration (s)", value=9999)
|
153 |
+
nb_min_silence_duration_ms = gr.Number(label="Minimum Silence Duration (ms)", precision=0, value=2000)
|
154 |
+
nb_window_size_sample = gr.Number(label="Window Size (samples)", precision=0, value=1024)
|
155 |
+
nb_speech_pad_ms = gr.Number(label="Speech Padding (ms)", precision=0, value=400)
|
156 |
+
with gr.Accordion("Advanced_Parameters", open=False):
|
157 |
+
nb_beam_size = gr.Number(label="Beam Size", value=1, precision=0, interactive=True)
|
158 |
+
nb_log_prob_threshold = gr.Number(label="Log Probability Threshold", value=-1.0, interactive=True)
|
159 |
+
nb_no_speech_threshold = gr.Number(label="No Speech Threshold", value=0.6, interactive=True)
|
160 |
+
dd_compute_type = gr.Dropdown(label="Compute Type", choices=self.whisper_inf.available_compute_types, value=self.whisper_inf.current_compute_type, interactive=True)
|
161 |
+
nb_best_of = gr.Number(label="Best Of", value=5, interactive=True)
|
162 |
+
nb_patience = gr.Number(label="Patience", value=1, interactive=True)
|
163 |
+
cb_condition_on_previous_text = gr.Checkbox(label="Condition On Previous Text", value=True, interactive=True)
|
164 |
+
tb_initial_prompt = gr.Textbox(label="Initial Prompt", value=None, interactive=True)
|
165 |
+
sd_temperature = gr.Slider(label="Temperature", value=0, step=0.01, maximum=1.0, interactive=True)
|
166 |
+
nb_compression_ratio_threshold = gr.Number(label="Compression Ratio Threshold", value=2.4, interactive=True)
|
167 |
+
with gr.Row():
|
168 |
+
btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
|
169 |
+
with gr.Row():
|
170 |
+
tb_indicator = gr.Textbox(label="Output", scale=5)
|
171 |
+
files_subtitles = gr.Files(label="Downloadable output file", scale=3)
|
172 |
+
btn_openfolder = gr.Button('π', scale=1)
|
173 |
|
174 |
+
params = [tb_youtubelink, dd_file_format, cb_timestamp]
|
175 |
+
whisper_params = WhisperGradioComponents(model_size=dd_model,
|
176 |
+
lang=dd_lang,
|
177 |
+
is_translate=cb_translate,
|
178 |
+
beam_size=nb_beam_size,
|
179 |
+
log_prob_threshold=nb_log_prob_threshold,
|
180 |
+
no_speech_threshold=nb_no_speech_threshold,
|
181 |
+
compute_type=dd_compute_type,
|
182 |
+
best_of=nb_best_of,
|
183 |
+
patience=nb_patience,
|
184 |
+
condition_on_previous_text=cb_condition_on_previous_text,
|
185 |
+
initial_prompt=tb_initial_prompt,
|
186 |
+
temperature=sd_temperature,
|
187 |
+
compression_ratio_threshold=nb_compression_ratio_threshold,
|
188 |
+
vad_filter=cb_vad_filter,
|
189 |
+
threshold=sd_threshold,
|
190 |
+
min_speech_duration_ms=nb_min_speech_duration_ms,
|
191 |
+
max_speech_duration_s=nb_max_speech_duration_s,
|
192 |
+
min_silence_duration_ms=nb_min_silence_duration_ms,
|
193 |
+
window_size_sample=nb_window_size_sample,
|
194 |
+
speech_pad_ms=nb_speech_pad_ms)
|
195 |
+
btn_run.click(fn=self.whisper_inf.transcribe_youtube,
|
196 |
+
inputs=params + whisper_params.to_list(),
|
197 |
+
outputs=[tb_indicator, files_subtitles])
|
198 |
+
tb_youtubelink.change(get_ytmetas, inputs=[tb_youtubelink],
|
199 |
+
outputs=[img_thumbnail, tb_title, tb_description])
|
200 |
+
btn_openfolder.click(fn=lambda: self.open_folder("outputs"), inputs=None, outputs=None)
|
201 |
+
dd_model.change(fn=self.on_change_models, inputs=[dd_model], outputs=[cb_translate])
|
202 |
|
203 |
+
with gr.TabItem("Mic"): # tab3
|
204 |
+
with gr.Row():
|
205 |
+
mic_input = gr.Microphone(label="Record with Mic", type="filepath", interactive=True)
|
206 |
+
with gr.Row():
|
207 |
+
dd_model = gr.Dropdown(choices=self.whisper_inf.available_models, value="large-v2",
|
208 |
+
label="Model")
|
209 |
+
dd_lang = gr.Dropdown(choices=["Automatic Detection"] + self.whisper_inf.available_langs,
|
210 |
+
value="Automatic Detection", label="Language")
|
211 |
+
dd_file_format = gr.Dropdown(["SRT", "WebVTT", "txt"], value="SRT", label="File Format")
|
212 |
+
with gr.Row():
|
213 |
+
cb_translate = gr.Checkbox(value=False, label="Translate to English?", interactive=True)
|
214 |
+
with gr.Accordion("VAD Options", open=False, visible=isinstance(self.whisper_inf, FasterWhisperInference)):
|
215 |
+
cb_vad_filter = gr.Checkbox(label="Enable Silero VAD Filter", value=False, interactive=True)
|
216 |
+
sd_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Speech Threshold", value=0.5)
|
217 |
+
nb_min_speech_duration_ms = gr.Number(label="Minimum Speech Duration (ms)", precision=0, value=250)
|
218 |
+
nb_max_speech_duration_s = gr.Number(label="Maximum Speech Duration (s)", value=9999)
|
219 |
+
nb_min_silence_duration_ms = gr.Number(label="Minimum Silence Duration (ms)", precision=0, value=2000)
|
220 |
+
nb_window_size_sample = gr.Number(label="Window Size (samples)", precision=0, value=1024)
|
221 |
+
nb_speech_pad_ms = gr.Number(label="Speech Padding (ms)", precision=0, value=400)
|
222 |
+
with gr.Accordion("Advanced_Parameters", open=False):
|
223 |
+
nb_beam_size = gr.Number(label="Beam Size", value=1, precision=0, interactive=True)
|
224 |
+
nb_log_prob_threshold = gr.Number(label="Log Probability Threshold", value=-1.0, interactive=True)
|
225 |
+
nb_no_speech_threshold = gr.Number(label="No Speech Threshold", value=0.6, interactive=True)
|
226 |
+
dd_compute_type = gr.Dropdown(label="Compute Type", choices=self.whisper_inf.available_compute_types, value=self.whisper_inf.current_compute_type, interactive=True)
|
227 |
+
nb_best_of = gr.Number(label="Best Of", value=5, interactive=True)
|
228 |
+
nb_patience = gr.Number(label="Patience", value=1, interactive=True)
|
229 |
+
cb_condition_on_previous_text = gr.Checkbox(label="Condition On Previous Text", value=True, interactive=True)
|
230 |
+
tb_initial_prompt = gr.Textbox(label="Initial Prompt", value=None, interactive=True)
|
231 |
+
sd_temperature = gr.Slider(label="Temperature", value=0, step=0.01, maximum=1.0, interactive=True)
|
232 |
+
with gr.Row():
|
233 |
+
btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
|
234 |
+
with gr.Row():
|
235 |
+
tb_indicator = gr.Textbox(label="Output", scale=5)
|
236 |
+
files_subtitles = gr.Files(label="Downloadable output file", scale=3)
|
237 |
+
btn_openfolder = gr.Button('π', scale=1)
|
238 |
|
239 |
+
params = [mic_input, dd_file_format]
|
240 |
+
whisper_params = WhisperGradioComponents(model_size=dd_model,
|
241 |
+
lang=dd_lang,
|
242 |
+
is_translate=cb_translate,
|
243 |
+
beam_size=nb_beam_size,
|
244 |
+
log_prob_threshold=nb_log_prob_threshold,
|
245 |
+
no_speech_threshold=nb_no_speech_threshold,
|
246 |
+
compute_type=dd_compute_type,
|
247 |
+
best_of=nb_best_of,
|
248 |
+
patience=nb_patience,
|
249 |
+
condition_on_previous_text=cb_condition_on_previous_text,
|
250 |
+
initial_prompt=tb_initial_prompt,
|
251 |
+
temperature=sd_temperature,
|
252 |
+
compression_ratio_threshold=nb_compression_ratio_threshold,
|
253 |
+
vad_filter=cb_vad_filter,
|
254 |
+
threshold=sd_threshold,
|
255 |
+
min_speech_duration_ms=nb_min_speech_duration_ms,
|
256 |
+
max_speech_duration_s=nb_max_speech_duration_s,
|
257 |
+
min_silence_duration_ms=nb_min_silence_duration_ms,
|
258 |
+
window_size_sample=nb_window_size_sample,
|
259 |
+
speech_pad_ms=nb_speech_pad_ms)
|
260 |
+
btn_run.click(fn=self.whisper_inf.transcribe_mic,
|
261 |
+
inputs=params + whisper_params.to_list(),
|
262 |
+
outputs=[tb_indicator, files_subtitles])
|
263 |
+
btn_openfolder.click(fn=lambda: self.open_folder("outputs"), inputs=None, outputs=None)
|
264 |
+
dd_model.change(fn=self.on_change_models, inputs=[dd_model], outputs=[cb_translate])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
265 |
|
266 |
+
with gr.TabItem("T2T Translation"): # tab 4
|
267 |
+
with gr.Row():
|
268 |
+
file_subs = gr.Files(type="filepath", label="Upload Subtitle Files to translate here",
|
269 |
+
file_types=['.vtt', '.srt'])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
270 |
|
271 |
+
with gr.TabItem("DeepL API"): # sub tab1
|
272 |
+
with gr.Row():
|
273 |
+
tb_authkey = gr.Textbox(label="Your Auth Key (API KEY)",
|
274 |
+
value="")
|
275 |
+
with gr.Row():
|
276 |
+
dd_deepl_sourcelang = gr.Dropdown(label="Source Language", value="Automatic Detection",
|
277 |
+
choices=list(
|
278 |
+
self.deepl_api.available_source_langs.keys()))
|
279 |
+
dd_deepl_targetlang = gr.Dropdown(label="Target Language", value="English",
|
280 |
+
choices=list(
|
281 |
+
self.deepl_api.available_target_langs.keys()))
|
282 |
+
with gr.Row():
|
283 |
+
cb_deepl_ispro = gr.Checkbox(label="Pro User?", value=False)
|
284 |
+
with gr.Row():
|
285 |
+
btn_run = gr.Button("TRANSLATE SUBTITLE FILE", variant="primary")
|
286 |
+
with gr.Row():
|
287 |
+
tb_indicator = gr.Textbox(label="Output", scale=5)
|
288 |
+
files_subtitles = gr.Files(label="Downloadable output file", scale=3)
|
289 |
+
btn_openfolder = gr.Button('π', scale=1)
|
290 |
|
291 |
+
btn_run.click(fn=self.deepl_api.translate_deepl,
|
292 |
+
inputs=[tb_authkey, file_subs, dd_deepl_sourcelang, dd_deepl_targetlang,
|
293 |
+
cb_deepl_ispro],
|
294 |
+
outputs=[tb_indicator, files_subtitles])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
295 |
|
296 |
+
btn_openfolder.click(fn=lambda: self.open_folder(os.path.join("outputs", "translations")),
|
297 |
+
inputs=None,
|
298 |
+
outputs=None)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
299 |
|
300 |
+
with gr.TabItem("NLLB"): # sub tab2
|
301 |
+
with gr.Row():
|
302 |
+
dd_nllb_model = gr.Dropdown(label="Model", value="facebook/nllb-200-1.3B",
|
303 |
+
choices=self.nllb_inf.available_models)
|
304 |
+
dd_nllb_sourcelang = gr.Dropdown(label="Source Language",
|
305 |
+
choices=self.nllb_inf.available_source_langs)
|
306 |
+
dd_nllb_targetlang = gr.Dropdown(label="Target Language",
|
307 |
+
choices=self.nllb_inf.available_target_langs)
|
308 |
+
with gr.Row():
|
309 |
+
cb_timestamp = gr.Checkbox(value=True, label="Add a timestamp to the end of the filename",
|
310 |
+
interactive=True)
|
311 |
+
with gr.Row():
|
312 |
+
btn_run = gr.Button("TRANSLATE SUBTITLE FILE", variant="primary")
|
313 |
+
with gr.Row():
|
314 |
+
tb_indicator = gr.Textbox(label="Output", scale=5)
|
315 |
+
files_subtitles = gr.Files(label="Downloadable output file", scale=3)
|
316 |
+
btn_openfolder = gr.Button('π', scale=1)
|
317 |
+
with gr.Column():
|
318 |
+
md_vram_table = gr.HTML(NLLB_VRAM_TABLE, elem_id="md_nllb_vram_table")
|
319 |
|
320 |
+
btn_run.click(fn=self.nllb_inf.translate_file,
|
321 |
+
inputs=[file_subs, dd_nllb_model, dd_nllb_sourcelang, dd_nllb_targetlang, cb_timestamp],
|
322 |
+
outputs=[tb_indicator, files_subtitles])
|
323 |
|
324 |
+
btn_openfolder.click(fn=lambda: self.open_folder(os.path.join("outputs", "translations")),
|
325 |
+
inputs=None,
|
326 |
+
outputs=None)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
327 |
|
328 |
+
# Launch the app with optional gradio settings
|
329 |
+
launch_args = {}
|
330 |
+
if self.args.share:
|
331 |
+
launch_args['share'] = self.args.share
|
332 |
+
if self.args.server_name:
|
333 |
+
launch_args['server_name'] = self.args.server_name
|
334 |
+
if self.args.server_port:
|
335 |
+
launch_args['server_port'] = self.args.server_port
|
336 |
+
if self.args.username and self.args.password:
|
337 |
+
launch_args['auth'] = (self.args.username, self.args.password)
|
338 |
+
launch_args['inbrowser'] = True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
339 |
|
340 |
+
self.app.queue(api_open=False).launch(**launch_args)
|
|
|
|
|
341 |
|
342 |
|
343 |
# Create the parser for command-line arguments
|
|
|
350 |
parser.add_argument('--password', type=str, default=None, help='Gradio authentication password')
|
351 |
parser.add_argument('--theme', type=str, default=None, help='Gradio Blocks theme')
|
352 |
parser.add_argument('--colab', type=bool, default=False, nargs='?', const=True, help='Is colab user or not')
|
353 |
+
parser.add_argument('--api_open', type=bool, default=False, nargs='?', const=True, help='enable api or not')
|
354 |
parser.add_argument('--whisper_model_dir', type=str, default=os.path.join("models", "Whisper"), help='Directory path of the whisper model')
|
355 |
parser.add_argument('--faster_whisper_model_dir', type=str, default=os.path.join("models", "Whisper", "faster-whisper"), help='Directory path of the faster-whisper model')
|
356 |
_args = parser.parse_args()
|
357 |
|
358 |
if __name__ == "__main__":
|
359 |
app = App(args=_args)
|
360 |
+
app.launch()
|
361 |
+
|
362 |
+
|