Spaces:

IRISLAB
/

VideoTranscription

Running

App Files Files Community

IRISLAB commited on Jun 26

Commit

875b30b

•

1 Parent(s): 533b363

Update app.py

Browse files

Files changed (1) hide show

app.py +281 -278

app.py CHANGED Viewed

@@ -1,3 +1,5 @@
 import gradio as gr
 import os
 import argparse
@@ -16,16 +18,18 @@ class App:
         self.args = args
         self.app = gr.Blocks(css=CSS, theme=self.args.theme)
         self.whisper_inf = self.init_whisper()
         self.nllb_inf = NLLBInference()
         self.deepl_api = DeepLAPI()
-        self.log_initialization()
     def init_whisper(self):
         whisper_type = self.args.whisper_type.lower().strip()
         if whisper_type in ["faster_whisper", "faster-whisper"]:
             whisper_inf = FasterWhisperInference()
             whisper_inf.model_dir = self.args.faster_whisper_model_dir
-        elif whisper_type in ["whisper"]:
             whisper_inf = WhisperInference()
             whisper_inf.model_dir = self.args.whisper_model_dir
         else:
@@ -33,30 +37,20 @@ class App:
             whisper_inf.model_dir = self.args.faster_whisper_model_dir
         return whisper_inf
-    def log_initialization(self):
-        print(f'Use "{self.args.whisper_type}" implementation')
-        print(f'Device "{self.whisper_inf.device}" is detected')
     @staticmethod
     def open_folder(folder_path: str):
         if os.path.exists(folder_path):
-            os.system(f'start {folder_path}')
         else:
-            print(f'The folder {folder_path} does not exist.')
     @staticmethod
     def on_change_models(model_size: str):
         translatable_model = ["large", "large-v1", "large-v2", "large-v3"]
-        return gr.Checkbox(visible=model_size in translatable_model, value=False, label="Translate to English?", interactive=True)
-    def transcribe_file_wrapper(self, file, file_format, timestamp, *whisper_params):
-        try:
-            result, output_file = self.whisper_inf.transcribe_file(file, file_format, timestamp, *whisper_params)
-            if not os.path.exists(output_file):
-                raise FileNotFoundError(f'Output file {output_file} does not exist.')
-            return result, output_file
-        except Exception as e:
-            return str(e), None
     def launch(self):
         with self.app:
@@ -64,279 +58,286 @@ class App:
                 with gr.Column():
                     gr.Markdown(MARKDOWN, elem_id="md_project")
             with gr.Tabs():
-                with gr.TabItem("File"):
-                    self.build_file_tab()
-                with gr.TabItem("Youtube"):
-                    self.build_youtube_tab()
-                with gr.TabItem("Mic"):
-                    self.build_mic_tab()
-                with gr.TabItem("T2T Translation"):
-                    self.build_t2t_translation_tab()
-        # Launch the app with optional Gradio settings
-        launch_args = {}
-        if self.args.share:
-            launch_args['share'] = self.args.share
-        if self.args.server_name:
-            launch_args['server_name'] = self.args.server_name
-        if self.args.server_port:
-            launch_args['server_port'] = self.args.server_port
-        if self.args.username and self.args.password:
-            launch_args['auth'] = (self.args.username, self.args.password)
-        launch_args['inbrowser'] = True
-        self.app.queue(api_open=self.args.api_open).launch(**launch_args)
-    def build_file_tab(self):
-        with gr.Row():
-            input_file = gr.Files(type="filepath", label="Upload File here")
-        with gr.Row():
-            dd_model = gr.Dropdown(choices=self.whisper_inf.available_models, value="large-v2", label="Model")
-            dd_lang = gr.Dropdown(choices=["Automatic Detection"] + self.whisper_inf.available_langs, value="Automatic Detection", label="Language")
-            dd_file_format = gr.Dropdown(choices=["SRT", "WebVTT", "txt"], value="SRT", label="File Format")
-        with gr.Row():
-            cb_translate = gr.Checkbox(value=False, label="Translate to English?", interactive=True)
-        with gr.Row():
-            cb_timestamp = gr.Checkbox(value=True, label="Add a timestamp to the end of the filename", interactive=True)
-        with gr.Accordion("VAD Options", open=False, visible=isinstance(self.whisper_inf, FasterWhisperInference)):
-            cb_vad_filter = gr.Checkbox(label="Enable Silero VAD Filter", value=False, interactive=True)
-            sd_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Speech Threshold", value=0.5)
-            nb_min_speech_duration_ms = gr.Number(label="Minimum Speech Duration (ms)", precision=0, value=250)
-            nb_max_speech_duration_s = gr.Number(label="Maximum Speech Duration (s)", value=9999)
-            nb_min_silence_duration_ms = gr.Number(label="Minimum Silence Duration (ms)", precision=0, value=2000)
-            nb_window_size_sample = gr.Number(label="Window Size (samples)", precision=0, value=1024)
-            nb_speech_pad_ms = gr.Number(label="Speech Padding (ms)", precision=0, value=400)
-        with gr.Accordion("Advanced_Parameters", open=False):
-            nb_beam_size = gr.Number(label="Beam Size", value=1, precision=0, interactive=True)
-            nb_log_prob_threshold = gr.Number(label="Log Probability Threshold", value=-1.0, interactive=True)
-            nb_no_speech_threshold = gr.Number(label="No Speech Threshold", value=0.6, interactive=True)
-            dd_compute_type = gr.Dropdown(choices=self.whisper_inf.available_compute_types, value=self.whisper_inf.current_compute_type, label="Compute Type", interactive=True)
-            nb_best_of = gr.Number(label="Best Of", value=5, interactive=True)
-            nb_patience = gr.Number(label="Patience", value=1, interactive=True)
-            cb_condition_on_previous_text = gr.Checkbox(label="Condition On Previous Text", value=True, interactive=True)
-            tb_initial_prompt = gr.Textbox(label="Initial Prompt", value=None, interactive=True)
-            sd_temperature = gr.Slider(label="Temperature", value=0, step=0.01, maximum=1.0, interactive=True)
-            nb_compression_ratio_threshold = gr.Number(label="Compression Ratio Threshold", value=2.4, interactive=True)
-        with gr.Row():
-            btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
-        with gr.Row():
-            tb_indicator = gr.Textbox(label="Output", scale=5)
-            files_subtitles = gr.Files(label="Downloadable output file", scale=3, interactive=False)
-            btn_openfolder = gr.Button('📂', scale=1)
-        params = [input_file, dd_file_format, cb_timestamp]
-        whisper_params = WhisperGradioComponents(model_size=dd_model,
-                                                 lang=dd_lang,
-                                                 is_translate=cb_translate,
-                                                 beam_size=nb_beam_size,
-                                                 log_prob_threshold=nb_log_prob_threshold,
-                                                 no_speech_threshold=nb_no_speech_threshold,
-                                                 compute_type=dd_compute_type,
-                                                 best_of=nb_best_of,
-                                                 patience=nb_patience,
-                                                 condition_on_previous_text=cb_condition_on_previous_text,
-                                                 initial_prompt=tb_initial_prompt,
-                                                 temperature=sd_temperature,
-                                                 compression_ratio_threshold=nb_compression_ratio_threshold,
-                                                 vad_filter=cb_vad_filter,
-                                                 threshold=sd_threshold,
-                                                 min_speech_duration_ms=nb_min_speech_duration_ms,
-                                                 max_speech_duration_s=nb_max_speech_duration_s,
-                                                 min_silence_duration_ms=nb_min_silence_duration_ms,
-                                                 window_size_sample=nb_window_size_sample,
-                                                 speech_pad_ms=nb_speech_pad_ms)
-        btn_run.click(fn=self.transcribe_file_wrapper,
-                      inputs=params + whisper_params.to_list(),
-                      outputs=[tb_indicator, files_subtitles])
-        btn_openfolder.click(fn=lambda: self.open_folder("outputs"), inputs=None, outputs=None)
-        dd_model.change(fn=self.on_change_models, inputs=[dd_model], outputs=[cb_translate])
-    def build_youtube_tab(self):
-        with gr.Row():
-            tb_youtubelink = gr.Textbox(label="Youtube Link")
-        with gr.Row(equal_height=True):
-            with gr.Column():
-                img_thumbnail = gr.Image(label="Youtube Thumbnail")
-            with gr.Column():
-                tb_title = gr.Label(label="Youtube Title")
-                tb_description = gr.Textbox(label="Youtube Description", max_lines=15)
-        with gr.Row():
-            dd_model = gr.Dropdown(choices=self.whisper_inf.available_models, value="large-v2", label="Model")
-            dd_lang = gr.Dropdown(choices=["Automatic Detection"] + self.whisper_inf.available_langs, value="Automatic Detection", label="Language")
-            dd_file_format = gr.Dropdown(choices=["SRT", "WebVTT", "txt"], value="SRT", label="File Format")
-        with gr.Row():
-            cb_translate = gr.Checkbox(value=False, label="Translate to English?", interactive=True)
-        with gr.Row():
-            cb_timestamp = gr.Checkbox(value=True, label="Add a timestamp to the end of the filename", interactive=True)
-        with gr.Accordion("VAD Options", open=False, visible=isinstance(self.whisper_inf, FasterWhisperInference)):
-            cb_vad_filter = gr.Checkbox(label="Enable Silero VAD Filter", value=False, interactive=True)
-            sd_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Speech Threshold", value=0.5)
-            nb_min_speech_duration_ms = gr.Number(label="Minimum Speech Duration (ms)", precision=0, value=250)
-            nb_max_speech_duration_s = gr.Number(label="Maximum Speech Duration (s)", value=9999)
-            nb_min_silence_duration_ms = gr.Number(label="Minimum Silence Duration (ms)", precision=0, value=2000)
-            nb_window_size_sample = gr.Number(label="Window Size (samples)", precision=0, value=1024)
-            nb_speech_pad_ms = gr.Number(label="Speech Padding (ms)", precision=0, value=400)
-        with gr.Accordion("Advanced_Parameters", open=False):
-            nb_beam_size = gr.Number(label="Beam Size", value=1, precision=0, interactive=True)
-            nb_log_prob_threshold = gr.Number(label="Log Probability Threshold", value=-1.0, interactive=True)
-            nb_no_speech_threshold = gr.Number(label="No Speech Threshold", value=0.6, interactive=True)
-            dd_compute_type = gr.Dropdown(choices=self.whisper_inf.available_compute_types, value=self.whisper_inf.current_compute_type, label="Compute Type", interactive=True)
-            nb_best_of = gr.Number(label="Best Of", value=5, interactive=True)
-            nb_patience = gr.Number(label="Patience", value=1, interactive=True)
-            cb_condition_on_previous_text = gr.Checkbox(label="Condition On Previous Text", value=True, interactive=True)
-            tb_initial_prompt = gr.Textbox(label="Initial Prompt", value=None, interactive=True)
-            sd_temperature = gr.Slider(label="Temperature", value=0, step=0.01, maximum=1.0, interactive=True)
-            nb_compression_ratio_threshold = gr.Number(label="Compression Ratio Threshold", value=2.4, interactive=True)
-        with gr.Row():
-            btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
-        with gr.Row():
-            tb_indicator = gr.Textbox(label="Output", scale=5)
-            files_subtitles = gr.Files(label="Downloadable output file", scale=3)
-            btn_openfolder = gr.Button('📂', scale=1)
-        params = [tb_youtubelink, dd_file_format, cb_timestamp]
-        whisper_params = WhisperGradioComponents(model_size=dd_model,
-                                                 lang=dd_lang,
-                                                 is_translate=cb_translate,
-                                                 beam_size=nb_beam_size,
-                                                 log_prob_threshold=nb_log_prob_threshold,
-                                                 no_speech_threshold=nb_no_speech_threshold,
-                                                 compute_type=dd_compute_type,
-                                                 best_of=nb_best_of,
-                                                 patience=nb_patience,
-                                                 condition_on_previous_text=cb_condition_on_previous_text,
-                                                 initial_prompt=tb_initial_prompt,
-                                                 temperature=sd_temperature,
-                                                 compression_ratio_threshold=nb_compression_ratio_threshold,
-                                                 vad_filter=cb_vad_filter,
-                                                 threshold=sd_threshold,
-                                                 min_speech_duration_ms=nb_min_speech_duration_ms,
-                                                 max_speech_duration_s=nb_max_speech_duration_s,
-                                                 min_silence_duration_ms=nb_min_silence_duration_ms,
-                                                 window_size_sample=nb_window_size_sample,
-                                                 speech_pad_ms=nb_speech_pad_ms)
-        btn_run.click(fn=self.whisper_inf.transcribe_youtube,
-                      inputs=params + whisper_params.to_list(),
-                      outputs=[tb_indicator, files_subtitles])
-        tb_youtubelink.change(get_ytmetas, inputs=[tb_youtubelink], outputs=[img_thumbnail, tb_title, tb_description])
-        btn_openfolder.click(fn=lambda: self.open_folder("outputs"), inputs=None, outputs=None)
-        dd_model.change(fn=self.on_change_models, inputs=[dd_model], outputs=[cb_translate])
-    def build_mic_tab(self):
-        with gr.Row():
-            mic_input = gr.Microphone(label="Record with Mic", type="filepath", interactive=True)
-        with gr.Row():
-            dd_model = gr.Dropdown(choices=self.whisper_inf.available_models, value="large-v2", label="Model")
-            dd_lang = gr.Dropdown(choices=["Automatic Detection"] + self.whisper_inf.available_langs, value="Automatic Detection", label="Language")
-            dd_file_format = gr.Dropdown(choices=["SRT", "WebVTT", "txt"], value="SRT", label="File Format")
-        with gr.Row():
-            cb_translate = gr.Checkbox(value=False, label="Translate to English?", interactive=True)
-        with gr.Accordion("VAD Options", open=False, visible=isinstance(self.whisper_inf, FasterWhisperInference)):
-            cb_vad_filter = gr.Checkbox(label="Enable Silero VAD Filter", value=False, interactive=True)
-            sd_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Speech Threshold", value=0.5)
-            nb_min_speech_duration_ms = gr.Number(label="Minimum Speech Duration (ms)", precision=0, value=250)
-            nb_max_speech_duration_s = gr.Number(label="Maximum Speech Duration (s)", value=9999)
-            nb_min_silence_duration_ms = gr.Number(label="Minimum Silence Duration (ms)", precision=0, value=2000)
-            nb_window_size_sample = gr.Number(label="Window Size (samples)", precision=0, value=1024)
-            nb_speech_pad_ms = gr.Number(label="Speech Padding (ms)", precision=0, value=400)
-        with gr.Accordion("Advanced_Parameters", open=False):
-            nb_beam_size = gr.Number(label="Beam Size", value=1, precision=0, interactive=True)
-            nb_log_prob_threshold = gr.Number(label="Log Probability Threshold", value=-1.0, interactive=True)
-            nb_no_speech_threshold = gr.Number(label="No Speech Threshold", value=0.6, interactive=True)
-            dd_compute_type = gr.Dropdown(choices=self.whisper_inf.available_compute_types, value=self.whisper_inf.current_compute_type, label="Compute Type", interactive=True)
-            nb_best_of = gr.Number(label="Best Of", value=5, interactive=True)
-            nb_patience = gr.Number(label="Patience", value=1, interactive=True)
-            cb_condition_on_previous_text = gr.Checkbox(label="Condition On Previous Text", value=True, interactive=True)
-            tb_initial_prompt = gr.Textbox(label="Initial Prompt", value=None, interactive=True)
-            sd_temperature = gr.Slider(label="Temperature", value=0, step=0.01, maximum=1.0, interactive=True)
-        with gr.Row():
-            btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
-        with gr.Row():
-            tb_indicator = gr.Textbox(label="Output", scale=5)
-            files_subtitles = gr.Files(label="Downloadable output file", scale=3)
-            btn_openfolder = gr.Button('📂', scale=1)
-        params = [mic_input, dd_file_format]
-        whisper_params = WhisperGradioComponents(model_size=dd_model,
-                                                 lang=dd_lang,
-                                                 is_translate=cb_translate,
-                                                 beam_size=nb_beam_size,
-                                                 log_prob_threshold=nb_log_prob_threshold,
-                                                 no_speech_threshold=nb_no_speech_threshold,
-                                                 compute_type=dd_compute_type,
-                                                 best_of=nb_best_of,
-                                                 patience=nb_patience,
-                                                 condition_on_previous_text=cb_condition_on_previous_text,
-                                                 initial_prompt=tb_initial_prompt,
-                                                 temperature=sd_temperature,
-                                                 compression_ratio_threshold=nb_compression_ratio_threshold,
-                                                 vad_filter=cb_vad_filter,
-                                                 threshold=sd_threshold,
-                                                 min_speech_duration_ms=nb_min_speech_duration_ms,
-                                                 max_speech_duration_s=nb_max_speech_duration_s,
-                                                 min_silence_duration_ms=nb_min_silence_duration_ms,
-                                                 window_size_sample=nb_window_size_sample,
-                                                 speech_pad_ms=nb_speech_pad_ms)
-        btn_run.click(fn=self.whisper_inf.transcribe_mic,
-                      inputs=params + whisper_params.to_list(),
-                      outputs=[tb_indicator, files_subtitles])
-        btn_openfolder.click(fn=lambda: self.open_folder("outputs"), inputs=None, outputs=None)
-        dd_model.change(fn=self.on_change_models, inputs=[dd_model], outputs=[cb_translate])
-    def build_t2t_translation_tab(self):
-        with gr.Row():
-            file_subs = gr.Files(type="filepath", label="Upload Subtitle Files to translate here", file_types=['.vtt', '.srt'])
-        with gr.TabItem("DeepL API"):
-            with gr.Row():
-                tb_authkey = gr.Textbox(label="Your Auth Key (API KEY)")
-            with gr.Row():
-                dd_deepl_sourcelang = gr.Dropdown(label="Source Language", value="Automatic Detection", choices=list(self.deepl_api.available_source_langs.keys()))
-                dd_deepl_targetlang = gr.Dropdown(label="Target Language", value="English", choices=list(self.deepl_api.available_target_langs.keys()))
-            with gr.Row():
-                cb_deepl_ispro = gr.Checkbox(label="Pro User?", value=False)
-            with gr.Row():
-                btn_run = gr.Button("TRANSLATE SUBTITLE FILE", variant="primary")
-            with gr.Row():
-                tb_indicator = gr.Textbox(label
-                                          ="Output", scale=5)
-                files_subtitles = gr.Files(label="Downloadable output file", scale=3)
-                btn_openfolder = gr.Button('📂', scale=1)
-            btn_run.click(fn=self.deepl_api.translate_deepl,
-                          inputs=[tb_authkey, file_subs, dd_deepl_sourcelang, dd_deepl_targetlang, cb_deepl_ispro],
-                          outputs=[tb_indicator, files_subtitles])
-            btn_openfolder.click(fn=lambda: self.open_folder(os.path.join("outputs", "translations")),
-                                 inputs=None,
-                                 outputs=None)
-        with gr.TabItem("NLLB"):
-            with gr.Row():
-                dd_nllb_model = gr.Dropdown(label="Model", value="facebook/nllb-200-1.3B", choices=self.nllb_inf.available_models)
-                dd_nllb_sourcelang = gr.Dropdown(label="Source Language", choices=self.nllb_inf.available_source_langs)
-                dd_nllb_targetlang = gr.Dropdown(label="Target Language", choices=self.nllb_inf.available_target_langs)
-            with gr.Row():
-                cb_timestamp = gr.Checkbox(value=True, label="Add a timestamp to the end of the filename", interactive=True)
-            with gr.Row():
-                btn_run = gr.Button("TRANSLATE SUBTITLE FILE", variant="primary")
-            with gr.Row():
-                tb_indicator = gr.Textbox(label="Output", scale=5)
-                files_subtitles = gr.Files(label="Downloadable output file", scale=3)
-                btn_openfolder = gr.Button('📂', scale=1)
-            with gr.Column():
-                md_vram_table = gr.HTML(NLLB_VRAM_TABLE, elem_id="md_nllb_vram_table")
-            btn_run.click(fn=self.nllb_inf.translate_file,
-                          inputs=[file_subs, dd_nllb_model, dd_nllb_sourcelang, dd_nllb_targetlang, cb_timestamp],
-                          outputs=[tb_indicator, files_subtitles])
-            btn_openfolder.click(fn=lambda: self.open_folder(os.path.join("outputs", "translations")),
-                                 inputs=None,
-                                 outputs=None)
 # Create the parser for command-line arguments
@@ -349,11 +350,13 @@ parser.add_argument('--username', type=str, default=None, help='Gradio authentic
 parser.add_argument('--password', type=str, default=None, help='Gradio authentication password')
 parser.add_argument('--theme', type=str, default=None, help='Gradio Blocks theme')
 parser.add_argument('--colab', type=bool, default=False, nargs='?', const=True, help='Is colab user or not')
-parser.add_argument('--api_open', type=bool, default=False, nargs='?', const=True, help='enable API or not')
 parser.add_argument('--whisper_model_dir', type=str, default=os.path.join("models", "Whisper"), help='Directory path of the whisper model')
 parser.add_argument('--faster_whisper_model_dir', type=str, default=os.path.join("models", "Whisper", "faster-whisper"), help='Directory path of the faster-whisper model')
 _args = parser.parse_args()
 if __name__ == "__main__":
     app = App(args=_args)
-    app.launch()

+I am creating a huggingface space for my pyhton application and here is the code can you analyze the code to see if it is perfectly running
 import gradio as gr
 import os
 import argparse
         self.args = args
         self.app = gr.Blocks(css=CSS, theme=self.args.theme)
         self.whisper_inf = self.init_whisper()
+        print(f"Use \"{self.args.whisper_type}\" implementation")
+        print(f"Device \"{self.whisper_inf.device}\" is detected")
         self.nllb_inf = NLLBInference()
         self.deepl_api = DeepLAPI()
     def init_whisper(self):
         whisper_type = self.args.whisper_type.lower().strip()
         if whisper_type in ["faster_whisper", "faster-whisper"]:
             whisper_inf = FasterWhisperInference()
             whisper_inf.model_dir = self.args.faster_whisper_model_dir
+        if whisper_type in ["whisper"]:
             whisper_inf = WhisperInference()
             whisper_inf.model_dir = self.args.whisper_model_dir
         else:
             whisper_inf.model_dir = self.args.faster_whisper_model_dir
         return whisper_inf
     @staticmethod
     def open_folder(folder_path: str):
         if os.path.exists(folder_path):
+            os.system(f"start {folder_path}")
         else:
+            print(f"The folder {folder_path} does not exist.")
     @staticmethod
     def on_change_models(model_size: str):
         translatable_model = ["large", "large-v1", "large-v2", "large-v3"]
+        if model_size not in translatable_model:
+            return gr.Checkbox(visible=False, value=False, interactive=False)
+        else:
+            return gr.Checkbox(visible=True, value=False, label="Translate to English?", interactive=True)
     def launch(self):
         with self.app:
                 with gr.Column():
                     gr.Markdown(MARKDOWN, elem_id="md_project")
             with gr.Tabs():
+                with gr.TabItem("File"):  # tab1
+                    with gr.Row():
+                        input_file = gr.Files(type="filepath", label="Upload File here")
+                    with gr.Row():
+                        dd_model = gr.Dropdown(choices=self.whisper_inf.available_models, value="large-v2",
+                                               label="Model")
+                        dd_lang = gr.Dropdown(choices=["Automatic Detection"] + self.whisper_inf.available_langs,
+                                              value="Automatic Detection", label="Language")
+                        dd_file_format = gr.Dropdown(["SRT", "WebVTT", "txt"], value="SRT", label="File Format")
+                    with gr.Row():
+                        cb_translate = gr.Checkbox(value=False, label="Translate to English?", interactive=True)
+                    with gr.Row():
+                        cb_timestamp = gr.Checkbox(value=True, label="Add a timestamp to the end of the filename", interactive=True)
+                    with gr.Accordion("VAD Options", open=False, visible=isinstance(self.whisper_inf, FasterWhisperInference)):
+                        cb_vad_filter = gr.Checkbox(label="Enable Silero VAD Filter", value=False, interactive=True)
+                        sd_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Speech Threshold", value=0.5)
+                        nb_min_speech_duration_ms = gr.Number(label="Minimum Speech Duration (ms)", precision=0, value=250)
+                        nb_max_speech_duration_s = gr.Number(label="Maximum Speech Duration (s)", value=9999)
+                        nb_min_silence_duration_ms = gr.Number(label="Minimum Silence Duration (ms)", precision=0, value=2000)
+                        nb_window_size_sample = gr.Number(label="Window Size (samples)", precision=0, value=1024)
+                        nb_speech_pad_ms = gr.Number(label="Speech Padding (ms)", precision=0, value=400)
+                    with gr.Accordion("Advanced_Parameters", open=False):
+                        nb_beam_size = gr.Number(label="Beam Size", value=1, precision=0, interactive=True)
+                        nb_log_prob_threshold = gr.Number(label="Log Probability Threshold", value=-1.0, interactive=True)
+                        nb_no_speech_threshold = gr.Number(label="No Speech Threshold", value=0.6, interactive=True)
+                        dd_compute_type = gr.Dropdown(label="Compute Type", choices=self.whisper_inf.available_compute_types, value=self.whisper_inf.current_compute_type, interactive=True)
+                        nb_best_of = gr.Number(label="Best Of", value=5, interactive=True)
+                        nb_patience = gr.Number(label="Patience", value=1, interactive=True)
+                        cb_condition_on_previous_text = gr.Checkbox(label="Condition On Previous Text", value=True, interactive=True)
+                        tb_initial_prompt = gr.Textbox(label="Initial Prompt", value=None, interactive=True)
+                        sd_temperature = gr.Slider(label="Temperature", value=0, step=0.01, maximum=1.0, interactive=True)
+                        nb_compression_ratio_threshold = gr.Number(label="Compression Ratio Threshold", value=2.4, interactive=True)
+                    with gr.Row():
+                        btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
+                    with gr.Row():
+                        tb_indicator = gr.Textbox(label="Output", scale=5)
+                        files_subtitles = gr.Files(label="Downloadable output file", scale=3, interactive=False)
+                        btn_openfolder = gr.Button('📂', scale=1)
+                    params = [input_file, dd_file_format, cb_timestamp]
+                    whisper_params = WhisperGradioComponents(model_size=dd_model,
+                                                             lang=dd_lang,
+                                                             is_translate=cb_translate,
+                                                             beam_size=nb_beam_size,
+                                                             log_prob_threshold=nb_log_prob_threshold,
+                                                             no_speech_threshold=nb_no_speech_threshold,
+                                                             compute_type=dd_compute_type,
+                                                             best_of=nb_best_of,
+                                                             patience=nb_patience,
+                                                             condition_on_previous_text=cb_condition_on_previous_text,
+                                                             initial_prompt=tb_initial_prompt,
+                                                             temperature=sd_temperature,
+                                                             compression_ratio_threshold=nb_compression_ratio_threshold,
+                                                             vad_filter=cb_vad_filter,
+                                                             threshold=sd_threshold,
+                                                             min_speech_duration_ms=nb_min_speech_duration_ms,
+                                                             max_speech_duration_s=nb_max_speech_duration_s,
+                                                             min_silence_duration_ms=nb_min_silence_duration_ms,
+                                                             window_size_sample=nb_window_size_sample,
+                                                             speech_pad_ms=nb_speech_pad_ms)
+                    btn_run.click(fn=self.whisper_inf.transcribe_file,
+                                  inputs=params + whisper_params.to_list(),
+                                  outputs=[tb_indicator, files_subtitles])
+                    btn_openfolder.click(fn=lambda: self.open_folder("outputs"), inputs=None, outputs=None)
+                    dd_model.change(fn=self.on_change_models, inputs=[dd_model], outputs=[cb_translate])
+                with gr.TabItem("Youtube"):  # tab2
+                    with gr.Row():
+                        tb_youtubelink = gr.Textbox(label="Youtube Link")
+                    with gr.Row(equal_height=True):
+                        with gr.Column():
+                            img_thumbnail = gr.Image(label="Youtube Thumbnail")
+                        with gr.Column():
+                            tb_title = gr.Label(label="Youtube Title")
+                            tb_description = gr.Textbox(label="Youtube Description", max_lines=15)
+                    with gr.Row():
+                        dd_model = gr.Dropdown(choices=self.whisper_inf.available_models, value="large-v2",
+                                               label="Model")
+                        dd_lang = gr.Dropdown(choices=["Automatic Detection"] + self.whisper_inf.available_langs,
+                                              value="Automatic Detection", label="Language")
+                        dd_file_format = gr.Dropdown(choices=["SRT", "WebVTT", "txt"], value="SRT", label="File Format")
+                    with gr.Row():
+                        cb_translate = gr.Checkbox(value=False, label="Translate to English?", interactive=True)
+                    with gr.Row():
+                        cb_timestamp = gr.Checkbox(value=True, label="Add a timestamp to the end of the filename",
+                                                   interactive=True)
+                    with gr.Accordion("VAD Options", open=False, visible=isinstance(self.whisper_inf, FasterWhisperInference)):
+                        cb_vad_filter = gr.Checkbox(label="Enable Silero VAD Filter", value=False, interactive=True)
+                        sd_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Speech Threshold", value=0.5)
+                        nb_min_speech_duration_ms = gr.Number(label="Minimum Speech Duration (ms)", precision=0, value=250)
+                        nb_max_speech_duration_s = gr.Number(label="Maximum Speech Duration (s)", value=9999)
+                        nb_min_silence_duration_ms = gr.Number(label="Minimum Silence Duration (ms)", precision=0, value=2000)
+                        nb_window_size_sample = gr.Number(label="Window Size (samples)", precision=0, value=1024)
+                        nb_speech_pad_ms = gr.Number(label="Speech Padding (ms)", precision=0, value=400)
+                    with gr.Accordion("Advanced_Parameters", open=False):
+                        nb_beam_size = gr.Number(label="Beam Size", value=1, precision=0, interactive=True)
+                        nb_log_prob_threshold = gr.Number(label="Log Probability Threshold", value=-1.0, interactive=True)
+                        nb_no_speech_threshold = gr.Number(label="No Speech Threshold", value=0.6, interactive=True)
+                        dd_compute_type = gr.Dropdown(label="Compute Type", choices=self.whisper_inf.available_compute_types, value=self.whisper_inf.current_compute_type, interactive=True)
+                        nb_best_of = gr.Number(label="Best Of", value=5, interactive=True)
+                        nb_patience = gr.Number(label="Patience", value=1, interactive=True)
+                        cb_condition_on_previous_text = gr.Checkbox(label="Condition On Previous Text", value=True, interactive=True)
+                        tb_initial_prompt = gr.Textbox(label="Initial Prompt", value=None, interactive=True)
+                        sd_temperature = gr.Slider(label="Temperature", value=0, step=0.01, maximum=1.0, interactive=True)
+                        nb_compression_ratio_threshold = gr.Number(label="Compression Ratio Threshold", value=2.4, interactive=True)
+                    with gr.Row():
+                        btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
+                    with gr.Row():
+                        tb_indicator = gr.Textbox(label="Output", scale=5)
+                        files_subtitles = gr.Files(label="Downloadable output file", scale=3)
+                        btn_openfolder = gr.Button('📂', scale=1)
+                    params = [tb_youtubelink, dd_file_format, cb_timestamp]
+                    whisper_params = WhisperGradioComponents(model_size=dd_model,
+                                                             lang=dd_lang,
+                                                             is_translate=cb_translate,
+                                                             beam_size=nb_beam_size,
+                                                             log_prob_threshold=nb_log_prob_threshold,
+                                                             no_speech_threshold=nb_no_speech_threshold,
+                                                             compute_type=dd_compute_type,
+                                                             best_of=nb_best_of,
+                                                             patience=nb_patience,
+                                                             condition_on_previous_text=cb_condition_on_previous_text,
+                                                             initial_prompt=tb_initial_prompt,
+                                                             temperature=sd_temperature,
+                                                             compression_ratio_threshold=nb_compression_ratio_threshold,
+                                                             vad_filter=cb_vad_filter,
+                                                             threshold=sd_threshold,
+                                                             min_speech_duration_ms=nb_min_speech_duration_ms,
+                                                             max_speech_duration_s=nb_max_speech_duration_s,
+                                                             min_silence_duration_ms=nb_min_silence_duration_ms,
+                                                             window_size_sample=nb_window_size_sample,
+                                                             speech_pad_ms=nb_speech_pad_ms)
+                    btn_run.click(fn=self.whisper_inf.transcribe_youtube,
+                                  inputs=params + whisper_params.to_list(),
+                                  outputs=[tb_indicator, files_subtitles])
+                    tb_youtubelink.change(get_ytmetas, inputs=[tb_youtubelink],
+                                          outputs=[img_thumbnail, tb_title, tb_description])
+                    btn_openfolder.click(fn=lambda: self.open_folder("outputs"), inputs=None, outputs=None)
+                    dd_model.change(fn=self.on_change_models, inputs=[dd_model], outputs=[cb_translate])
+                with gr.TabItem("Mic"):  # tab3
+                    with gr.Row():
+                        mic_input = gr.Microphone(label="Record with Mic", type="filepath", interactive=True)
+                    with gr.Row():
+                        dd_model = gr.Dropdown(choices=self.whisper_inf.available_models, value="large-v2",
+                                               label="Model")
+                        dd_lang = gr.Dropdown(choices=["Automatic Detection"] + self.whisper_inf.available_langs,
+                                              value="Automatic Detection", label="Language")
+                        dd_file_format = gr.Dropdown(["SRT", "WebVTT", "txt"], value="SRT", label="File Format")
+                    with gr.Row():
+                        cb_translate = gr.Checkbox(value=False, label="Translate to English?", interactive=True)
+                    with gr.Accordion("VAD Options", open=False, visible=isinstance(self.whisper_inf, FasterWhisperInference)):
+                        cb_vad_filter = gr.Checkbox(label="Enable Silero VAD Filter", value=False, interactive=True)
+                        sd_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Speech Threshold", value=0.5)
+                        nb_min_speech_duration_ms = gr.Number(label="Minimum Speech Duration (ms)", precision=0, value=250)
+                        nb_max_speech_duration_s = gr.Number(label="Maximum Speech Duration (s)", value=9999)
+                        nb_min_silence_duration_ms = gr.Number(label="Minimum Silence Duration (ms)", precision=0, value=2000)
+                        nb_window_size_sample = gr.Number(label="Window Size (samples)", precision=0, value=1024)
+                        nb_speech_pad_ms = gr.Number(label="Speech Padding (ms)", precision=0, value=400)
+                    with gr.Accordion("Advanced_Parameters", open=False):
+                        nb_beam_size = gr.Number(label="Beam Size", value=1, precision=0, interactive=True)
+                        nb_log_prob_threshold = gr.Number(label="Log Probability Threshold", value=-1.0, interactive=True)
+                        nb_no_speech_threshold = gr.Number(label="No Speech Threshold", value=0.6, interactive=True)
+                        dd_compute_type = gr.Dropdown(label="Compute Type", choices=self.whisper_inf.available_compute_types, value=self.whisper_inf.current_compute_type, interactive=True)
+                        nb_best_of = gr.Number(label="Best Of", value=5, interactive=True)
+                        nb_patience = gr.Number(label="Patience", value=1, interactive=True)
+                        cb_condition_on_previous_text = gr.Checkbox(label="Condition On Previous Text", value=True, interactive=True)
+                        tb_initial_prompt = gr.Textbox(label="Initial Prompt", value=None, interactive=True)
+                        sd_temperature = gr.Slider(label="Temperature", value=0, step=0.01, maximum=1.0, interactive=True)
+                    with gr.Row():
+                        btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
+                    with gr.Row():
+                        tb_indicator = gr.Textbox(label="Output", scale=5)
+                        files_subtitles = gr.Files(label="Downloadable output file", scale=3)
+                        btn_openfolder = gr.Button('📂', scale=1)
+                    params = [mic_input, dd_file_format]
+                    whisper_params = WhisperGradioComponents(model_size=dd_model,
+                                                             lang=dd_lang,
+                                                             is_translate=cb_translate,
+                                                             beam_size=nb_beam_size,
+                                                             log_prob_threshold=nb_log_prob_threshold,
+                                                             no_speech_threshold=nb_no_speech_threshold,
+                                                             compute_type=dd_compute_type,
+                                                             best_of=nb_best_of,
+                                                             patience=nb_patience,
+                                                             condition_on_previous_text=cb_condition_on_previous_text,
+                                                             initial_prompt=tb_initial_prompt,
+                                                             temperature=sd_temperature,
+                                                             compression_ratio_threshold=nb_compression_ratio_threshold,
+                                                             vad_filter=cb_vad_filter,
+                                                             threshold=sd_threshold,
+                                                             min_speech_duration_ms=nb_min_speech_duration_ms,
+                                                             max_speech_duration_s=nb_max_speech_duration_s,
+                                                             min_silence_duration_ms=nb_min_silence_duration_ms,
+                                                             window_size_sample=nb_window_size_sample,
+                                                             speech_pad_ms=nb_speech_pad_ms)
+                    btn_run.click(fn=self.whisper_inf.transcribe_mic,
+                                  inputs=params + whisper_params.to_list(),
+                                  outputs=[tb_indicator, files_subtitles])
+                    btn_openfolder.click(fn=lambda: self.open_folder("outputs"), inputs=None, outputs=None)
+                    dd_model.change(fn=self.on_change_models, inputs=[dd_model], outputs=[cb_translate])
+                with gr.TabItem("T2T Translation"):  # tab 4
+                    with gr.Row():
+                        file_subs = gr.Files(type="filepath", label="Upload Subtitle Files to translate here",
+                                             file_types=['.vtt', '.srt'])
+                    with gr.TabItem("DeepL API"):  # sub tab1
+                        with gr.Row():
+                            tb_authkey = gr.Textbox(label="Your Auth Key (API KEY)",
+                                                    value="")
+                        with gr.Row():
+                            dd_deepl_sourcelang = gr.Dropdown(label="Source Language", value="Automatic Detection",
+                                                              choices=list(
+                                                                  self.deepl_api.available_source_langs.keys()))
+                            dd_deepl_targetlang = gr.Dropdown(label="Target Language", value="English",
+                                                              choices=list(
+                                                                  self.deepl_api.available_target_langs.keys()))
+                        with gr.Row():
+                            cb_deepl_ispro = gr.Checkbox(label="Pro User?", value=False)
+                        with gr.Row():
+                            btn_run = gr.Button("TRANSLATE SUBTITLE FILE", variant="primary")
+                        with gr.Row():
+                            tb_indicator = gr.Textbox(label="Output", scale=5)
+                            files_subtitles = gr.Files(label="Downloadable output file", scale=3)
+                            btn_openfolder = gr.Button('📂', scale=1)
+                    btn_run.click(fn=self.deepl_api.translate_deepl,
+                                  inputs=[tb_authkey, file_subs, dd_deepl_sourcelang, dd_deepl_targetlang,
+                                          cb_deepl_ispro],
+                                  outputs=[tb_indicator, files_subtitles])
+                    btn_openfolder.click(fn=lambda: self.open_folder(os.path.join("outputs", "translations")),
+                                         inputs=None,
+                                         outputs=None)
+                    with gr.TabItem("NLLB"):  # sub tab2
+                        with gr.Row():
+                            dd_nllb_model = gr.Dropdown(label="Model", value="facebook/nllb-200-1.3B",
+                                                        choices=self.nllb_inf.available_models)
+                            dd_nllb_sourcelang = gr.Dropdown(label="Source Language",
+                                                             choices=self.nllb_inf.available_source_langs)
+                            dd_nllb_targetlang = gr.Dropdown(label="Target Language",
+                                                             choices=self.nllb_inf.available_target_langs)
+                        with gr.Row():
+                            cb_timestamp = gr.Checkbox(value=True, label="Add a timestamp to the end of the filename",
+                                                       interactive=True)
+                        with gr.Row():
+                            btn_run = gr.Button("TRANSLATE SUBTITLE FILE", variant="primary")
+                        with gr.Row():
+                            tb_indicator = gr.Textbox(label="Output", scale=5)
+                            files_subtitles = gr.Files(label="Downloadable output file", scale=3)
+                            btn_openfolder = gr.Button('📂', scale=1)
+                        with gr.Column():
+                            md_vram_table = gr.HTML(NLLB_VRAM_TABLE, elem_id="md_nllb_vram_table")
+                    btn_run.click(fn=self.nllb_inf.translate_file,
+                                  inputs=[file_subs, dd_nllb_model, dd_nllb_sourcelang, dd_nllb_targetlang, cb_timestamp],
+                                  outputs=[tb_indicator, files_subtitles])
+                    btn_openfolder.click(fn=lambda: self.open_folder(os.path.join("outputs", "translations")),
+                                         inputs=None,
+                                         outputs=None)
+        # Launch the app with optional gradio settings
+        launch_args = {}
+        if self.args.share:
+            launch_args['share'] = self.args.share
+        if self.args.server_name:
+            launch_args['server_name'] = self.args.server_name
+        if self.args.server_port:
+            launch_args['server_port'] = self.args.server_port
+        if self.args.username and self.args.password:
+            launch_args['auth'] = (self.args.username, self.args.password)
+        launch_args['inbrowser'] = True
+        self.app.queue(api_open=False).launch(**launch_args)
 # Create the parser for command-line arguments
 parser.add_argument('--password', type=str, default=None, help='Gradio authentication password')
 parser.add_argument('--theme', type=str, default=None, help='Gradio Blocks theme')
 parser.add_argument('--colab', type=bool, default=False, nargs='?', const=True, help='Is colab user or not')
+parser.add_argument('--api_open', type=bool, default=False, nargs='?', const=True, help='enable api or not')
 parser.add_argument('--whisper_model_dir', type=str, default=os.path.join("models", "Whisper"), help='Directory path of the whisper model')
 parser.add_argument('--faster_whisper_model_dir', type=str, default=os.path.join("models", "Whisper", "faster-whisper"), help='Directory path of the faster-whisper model')
 _args = parser.parse_args()
 if __name__ == "__main__":
     app = App(args=_args)
+    app.launch()