transcribe_audio

Running

App Files Files Community

cstr commited on Oct 2, 2024

Commit

d290706

verified ·

1 Parent(s): 60c0a37

+px +ux -tc

Browse files

Files changed (1) hide show

app.py +161 -78

app.py CHANGED Viewed

@@ -45,13 +45,16 @@ from faster_whisper.transcribe import BatchedInferencePipeline
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
 logging.info(f"Using device: {device}")
-def download_audio(url, method_choice):
     """
-    Downloads audio from a given URL using the specified method.
     Args:
         url (str): The URL of the audio.
         method_choice (str): The method to use for downloading audio.
     Returns:
         tuple: (path to the downloaded audio file, is_temp_file), or (None, False) if failed.
@@ -60,19 +63,19 @@ def download_audio(url, method_choice):
     logging.info(f"Downloading audio from URL: {url} using method: {method_choice}")
     try:
         if 'youtube.com' in parsed_url.netloc or 'youtu.be' in parsed_url.netloc:
-            audio_file = download_youtube_audio(url, method_choice)
             if not audio_file:
                 error_msg = f"Failed to download audio from {url} using method {method_choice}. Ensure yt-dlp is up to date."
                 logging.error(error_msg)
                 return None, False
         elif parsed_url.scheme == 'rtsp':
-            audio_file = download_rtsp_audio(url)
             if not audio_file:
                 error_msg = f"Failed to download RTSP audio from {url}"
                 logging.error(error_msg)
                 return None, False
         else:
-            audio_file = download_direct_audio(url, method_choice)
             if not audio_file:
                 error_msg = f"Failed to download audio from {url} using method {method_choice}"
                 logging.error(error_msg)
@@ -84,13 +87,16 @@ def download_audio(url, method_choice):
         return None, False
-def download_youtube_audio(url, method_choice):
     """
     Downloads audio from a YouTube URL using the specified method.
     Args:
         url (str): The YouTube URL.
         method_choice (str): The method to use for downloading.
     Returns:
         str: Path to the downloaded audio file, or None if failed.
@@ -102,17 +108,20 @@ def download_youtube_audio(url, method_choice):
     method = methods.get(method_choice, yt_dlp_method)
     try:
         logging.info(f"Attempting to download YouTube audio using {method_choice}")
-        return method(url)
     except Exception as e:
         logging.error(f"Error downloading using {method_choice}: {str(e)}")
         return None
-def yt_dlp_method(url):
     """
     Downloads YouTube audio using yt-dlp and saves it to a temporary file.
     Args:
         url (str): The YouTube URL.
     Returns:
         str: Path to the downloaded audio file, or None if failed.
@@ -133,6 +142,8 @@ def yt_dlp_method(url):
         'logger': MyLogger(),  # Use a custom logger to capture yt-dlp logs
         'progress_hooks': [my_hook],  # Hook to capture download progress and errors
     }
     try:
         with yt_dlp.YoutubeDL(ydl_opts) as ydl:
             info = ydl.extract_info(url, download=True)
@@ -174,12 +185,15 @@ def my_hook(d):
     elif d['status'] == 'error':
         logging.error(f"Download error: {d['filename']}")
-def pytube_method(url):
     """
     Downloads audio from a YouTube URL using pytube and saves it to a temporary file.
     Args:
         url (str): The YouTube URL.
     Returns:
         str: Path to the downloaded audio file, or None if failed.
@@ -187,7 +201,13 @@ def pytube_method(url):
     logging.info("Using pytube method")
     from pytube import YouTube
     try:
-        yt = YouTube(url)
         audio_stream = yt.streams.filter(only_audio=True).first()
         if audio_stream is None:
             error_msg = "No audio streams available with pytube."
@@ -205,12 +225,13 @@ def pytube_method(url):
         return None
-def download_rtsp_audio(url):
     """
     Downloads audio from an RTSP URL using FFmpeg.
     Args:
         url (str): The RTSP URL.
     Returns:
         str: Path to the downloaded audio file, or None if failed.
@@ -218,8 +239,12 @@ def download_rtsp_audio(url):
     logging.info("Using FFmpeg to download RTSP stream")
     output_file = tempfile.mktemp(suffix='.mp3')
     command = ['ffmpeg', '-i', url, '-acodec', 'libmp3lame', '-ab', '192k', '-y', output_file]
     try:
-        subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
         logging.info(f"Downloaded RTSP audio to: {output_file}")
         return output_file
     except subprocess.CalledProcessError as e:
@@ -229,13 +254,16 @@ def download_rtsp_audio(url):
         logging.error(f"Error downloading RTSP audio: {str(e)}")
         return None
-def download_direct_audio(url, method_choice):
     """
     Downloads audio from a direct URL using the specified method.
     Args:
         url (str): The direct URL of the audio file.
         method_choice (str): The method to use for downloading.
     Returns:
         str: Path to the downloaded audio file, or None if failed.
@@ -250,7 +278,7 @@ def download_direct_audio(url, method_choice):
     }
     method = methods.get(method_choice, requests_method)
     try:
-        audio_file = method(url)
         if not audio_file or not os.path.exists(audio_file):
             error_msg = f"Failed to download direct audio from {url} using method {method_choice}"
             logging.error(error_msg)
@@ -260,18 +288,30 @@ def download_direct_audio(url, method_choice):
         logging.error(f"Error downloading direct audio with {method_choice}: {str(e)}")
         return None
-def requests_method(url):
     """
     Downloads audio using the requests library.
     Args:
         url (str): The URL of the audio file.
     Returns:
         str: Path to the downloaded audio file, or None if failed.
     """
     try:
-        response = requests.get(url, stream=True)
         if response.status_code == 200:
             with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
                 for chunk in response.iter_content(chunk_size=8192):
@@ -286,12 +326,15 @@ def requests_method(url):
         logging.error(f"Error in requests_method: {str(e)}")
         return None
-def wget_method(url):
     """
     Downloads audio using the wget command-line tool.
     Args:
         url (str): The URL of the audio file.
     Returns:
         str: Path to the downloaded audio file, or None if failed.
@@ -299,8 +342,12 @@ def wget_method(url):
     logging.info("Using wget method")
     output_file = tempfile.mktemp(suffix='.mp3')
     command = ['wget', '-O', output_file, url]
     try:
-        subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
         logging.info(f"Downloaded audio to: {output_file}")
         return output_file
     except subprocess.CalledProcessError as e:
@@ -310,12 +357,15 @@ def wget_method(url):
         logging.error(f"Error in wget_method: {str(e)}")
         return None
-def yt_dlp_direct_method(url):
     """
     Downloads audio using yt-dlp (supports various protocols and sites).
     Args:
         url (str): The URL of the audio or webpage containing audio.
     Returns:
         str: Path to the downloaded audio file, or None if failed.
@@ -333,6 +383,8 @@ def yt_dlp_direct_method(url):
             'preferredquality': '192',
         }],
     }
     try:
         with yt_dlp.YoutubeDL(ydl_opts) as ydl:
             ydl.download([url])
@@ -342,12 +394,15 @@ def yt_dlp_direct_method(url):
         logging.error(f"Error in yt_dlp_direct_method: {str(e)}")
         return None
-def ffmpeg_method(url):
     """
     Downloads audio using FFmpeg.
     Args:
         url (str): The URL of the audio file.
     Returns:
         str: Path to the downloaded audio file, or None if failed.
@@ -355,8 +410,12 @@ def ffmpeg_method(url):
     logging.info("Using ffmpeg method")
     output_file = tempfile.mktemp(suffix='.mp3')
     command = ['ffmpeg', '-i', url, '-vn', '-acodec', 'libmp3lame', '-q:a', '2', output_file]
     try:
-        subprocess.run(command, check=True, capture_output=True, text=True)
         logging.info(f"Downloaded and converted audio to: {output_file}")
         return output_file
     except subprocess.CalledProcessError as e:
@@ -366,12 +425,15 @@ def ffmpeg_method(url):
         logging.error(f"Error in ffmpeg_method: {str(e)}")
         return None
-def aria2_method(url):
     """
     Downloads audio using aria2.
     Args:
         url (str): The URL of the audio file.
     Returns:
         str: Path to the downloaded audio file, or None if failed.
@@ -379,6 +441,8 @@ def aria2_method(url):
     logging.info("Using aria2 method")
     output_file = tempfile.mktemp(suffix='.mp3')
     command = ['aria2c', '--split=4', '--max-connection-per-server=4', '--out', output_file, url]
     try:
         subprocess.run(command, check=True, capture_output=True, text=True)
         logging.info(f"Downloaded audio to: {output_file}")
@@ -402,8 +466,8 @@ def trim_audio(audio_path, start_time, end_time):
     Returns:
         str: Path to the trimmed audio file.
-    Raises:
-        gr.Error: If invalid start or end times are provided.
     """
     try:
         logging.info(f"Trimming audio from {start_time} to {end_time}")
@@ -464,12 +528,16 @@ def get_model_options(pipeline_type):
 # Dictionary to store loaded models
 loaded_models = {}
-def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, download_method, start_time=None, end_time=None, verbose=False):
     """
     Transcribes audio from a given source using the specified pipeline and model.
     Args:
-        input_source (str or file): URL of audio, path to local file, or uploaded file object.
         pipeline_type (str): Type of pipeline to use ('faster-batched', 'faster-sequenced', or 'transformers').
         model_id (str): The ID of the model to use.
         dtype (str): Data type for model computations ('int8', 'float16', or 'float32').
@@ -478,6 +546,7 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
         start_time (float, optional): Start time in seconds for trimming audio.
         end_time (float, optional): End time in seconds for trimming audio.
         verbose (bool, optional): Whether to output verbose logging.
     Yields:
         Tuple[str, str, str or None]: Metrics and messages, transcription text, path to transcription file.
@@ -494,29 +563,28 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
         if verbose:
             yield verbose_messages, "", None
-        # Determine if input_source is a URL, file path, or uploaded audio
         audio_path = None
         is_temp_file = False
-        if isinstance(input_source, str):
-            if input_source.startswith('http://') or input_source.startswith('https://'):
-                # Input source is a URL
-                audio_path, is_temp_file = download_audio(input_source, download_method)
-                if not audio_path:
-                    error_msg = f"Error downloading audio from {input_source} using method {download_method}. Check logs for details."
-                    logging.error(error_msg)
-                    yield verbose_messages + error_msg, "", None
-                    return
-            elif os.path.exists(input_source):
-                # Input source is a local file path
-                audio_path = input_source
                 is_temp_file = False
-        elif isinstance(input_source, dict) and 'path' in input_source:
-            # Input source is an uploaded audio file
-            audio_path = input_source['path']
-            is_temp_file = False
         else:
-            error_msg = "No valid audio source provided."
             logging.error(error_msg)
             yield verbose_messages + error_msg, "", None
             return
@@ -601,9 +669,15 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
         for segment in segments:
             if pipeline_type in ["faster-batched", "faster-sequenced"]:
-                transcription_segment = f"[{segment.start:.2f}s -> {segment.end:.2f}s] {segment.text}\n"
             else:
-                transcription_segment = f"[{segment['timestamp'][0]:.2f}s -> {segment['timestamp'][1]:.2f}s] {segment['text']}\n"
             transcription += transcription_segment
             if verbose:
                 yield verbose_messages + metrics_output, transcription, None
@@ -627,39 +701,49 @@ with gr.Blocks() as iface:
     gr.Markdown("Transcribe audio using multiple pipelines and (Faster) Whisper models.")
     with gr.Row():
-        input_source = gr.Audio(label="Audio Source (Upload a file or enter a URL/YouTube URL)")
-        pipeline_type = gr.Dropdown(
-            choices=["faster-batched", "faster-sequenced", "transformers"],
-            label="Pipeline Type",
-            value="faster-batched"
-        )
-        model_id = gr.Dropdown(
-            label="Model",
-            choices=get_model_options("faster-batched"),
-            value=get_model_options("faster-batched")[0]
-        )
-    with gr.Row():
-        dtype = gr.Dropdown(choices=["int8", "float16", "float32"], label="Data Type", value="int8")
-        batch_size = gr.Slider(minimum=1, maximum=32, step=1, value=16, label="Batch Size")
-        download_method = gr.Dropdown(
-            choices=["yt-dlp", "pytube", "youtube-dl", "yt-dlp-alt", "ffmpeg", "aria2", "wget"],
-            label="Download Method",
-            value="yt-dlp"
-        )
     with gr.Row():
-        start_time = gr.Number(label="Start Time (seconds)", value=None, minimum=0)
-        end_time = gr.Number(label="End Time (seconds)", value=None, minimum=0)
-        verbose = gr.Checkbox(label="Verbose Output", value=True)  # Set to True by default
     transcribe_button = gr.Button("Transcribe")
     with gr.Row():
         metrics_output = gr.Textbox(label="Transcription Metrics and Verbose Messages", lines=10)
         transcription_output = gr.Textbox(label="Transcription", lines=10)
         transcription_file = gr.File(label="Download Transcription")
     def update_model_dropdown(pipeline_type):
         """
         Updates the model dropdown choices based on the selected pipeline type.
@@ -681,7 +765,7 @@ with gr.Blocks() as iface:
             logging.error(f"Error in update_model_dropdown: {str(e)}")
             return gr.update(choices=["Error"], value="Error", visible=True)
-    # event handler for pipeline_type change
     pipeline_type.change(update_model_dropdown, inputs=[pipeline_type], outputs=[model_id])
     def transcribe_with_progress(*args):
@@ -690,17 +774,16 @@ with gr.Blocks() as iface:
     transcribe_button.click(
         transcribe_with_progress,
-        inputs=[input_source, pipeline_type, model_id, dtype, batch_size, download_method, start_time, end_time, verbose],
         outputs=[metrics_output, transcription_output, transcription_file]
     )
     gr.Examples(
         examples=[
-            ["https://www.youtube.com/watch?v=daQ_hqA6HDo", "faster-batched", "cstr/whisper-large-v3-turbo-int8_float32", "int8", 16, "yt-dlp", None, None, True],
-            ["https://mcdn.podbean.com/mf/web/dir5wty678b6g4vg/HoP_453_-_The_Price_is_Right_-_Law_and_Economics_in_the_Second_Scholastic5yxzh.mp3", "faster-sequenced", "deepdml/faster-whisper-large-v3-turbo-ct2", "float16", 1, "ffmpeg", 0, 300, True],
-            ["path/to/local/audio.mp3", "transformers", "openai/whisper-large-v3", "float16", 16, "yt-dlp", 60, 180, True]
         ],
-        inputs=[input_source, pipeline_type, model_id, dtype, batch_size, download_method, start_time, end_time, verbose],
     )
 iface.launch(share=False, debug=True)

 device = "cuda:0" if torch.cuda.is_available() else "cpu"
 logging.info(f"Using device: {device}")
+def download_audio(url, method_choice, proxy_url, proxy_username, proxy_password):
     """
+    Downloads audio from a given URL using the specified method and proxy settings.
     Args:
         url (str): The URL of the audio.
         method_choice (str): The method to use for downloading audio.
+        proxy_url (str): Proxy URL if needed.
+        proxy_username (str): Proxy username.
+        proxy_password (str): Proxy password.
     Returns:
         tuple: (path to the downloaded audio file, is_temp_file), or (None, False) if failed.
     logging.info(f"Downloading audio from URL: {url} using method: {method_choice}")
     try:
         if 'youtube.com' in parsed_url.netloc or 'youtu.be' in parsed_url.netloc:
+            audio_file = download_youtube_audio(url, method_choice, proxy_url, proxy_username, proxy_password)
             if not audio_file:
                 error_msg = f"Failed to download audio from {url} using method {method_choice}. Ensure yt-dlp is up to date."
                 logging.error(error_msg)
                 return None, False
         elif parsed_url.scheme == 'rtsp':
+            audio_file = download_rtsp_audio(url, proxy_url)
             if not audio_file:
                 error_msg = f"Failed to download RTSP audio from {url}"
                 logging.error(error_msg)
                 return None, False
         else:
+            audio_file = download_direct_audio(url, method_choice, proxy_url, proxy_username, proxy_password)
             if not audio_file:
                 error_msg = f"Failed to download audio from {url} using method {method_choice}"
                 logging.error(error_msg)
         return None, False
+def download_youtube_audio(url, method_choice, proxy_url, proxy_username, proxy_password):
     """
     Downloads audio from a YouTube URL using the specified method.
     Args:
         url (str): The YouTube URL.
         method_choice (str): The method to use for downloading.
+        proxy_url (str): Proxy URL if needed.
+        proxy_username (str): Proxy username.
+        proxy_password (str): Proxy password.
     Returns:
         str: Path to the downloaded audio file, or None if failed.
     method = methods.get(method_choice, yt_dlp_method)
     try:
         logging.info(f"Attempting to download YouTube audio using {method_choice}")
+        return method(url, proxy_url, proxy_username, proxy_password)
     except Exception as e:
         logging.error(f"Error downloading using {method_choice}: {str(e)}")
         return None
+def yt_dlp_method(url, proxy_url, proxy_username, proxy_password):
     """
     Downloads YouTube audio using yt-dlp and saves it to a temporary file.
     Args:
         url (str): The YouTube URL.
+        proxy_url (str): Proxy URL if needed.
+        proxy_username (str): Proxy username.
+        proxy_password (str): Proxy password.
     Returns:
         str: Path to the downloaded audio file, or None if failed.
         'logger': MyLogger(),  # Use a custom logger to capture yt-dlp logs
         'progress_hooks': [my_hook],  # Hook to capture download progress and errors
     }
+    if proxy_url and len(proxy_url.strip()) > 0:
+        ydl_opts['proxy'] = proxy_url
     try:
         with yt_dlp.YoutubeDL(ydl_opts) as ydl:
             info = ydl.extract_info(url, download=True)
     elif d['status'] == 'error':
         logging.error(f"Download error: {d['filename']}")
+def pytube_method(url, proxy_url, proxy_username, proxy_password):
     """
     Downloads audio from a YouTube URL using pytube and saves it to a temporary file.
     Args:
         url (str): The YouTube URL.
+        proxy_url (str): Proxy URL if needed.
+        proxy_username (str): Proxy username.
+        proxy_password (str): Proxy password.
     Returns:
         str: Path to the downloaded audio file, or None if failed.
     logging.info("Using pytube method")
     from pytube import YouTube
     try:
+        proxies = None
+        if proxy_url and len(proxy_url.strip()) > 0:
+            proxies = {
+                "http": proxy_url,
+                "https": proxy_url
+            }
+        yt = YouTube(url, proxies=proxies)
         audio_stream = yt.streams.filter(only_audio=True).first()
         if audio_stream is None:
             error_msg = "No audio streams available with pytube."
         return None
+def download_rtsp_audio(url, proxy_url):
     """
     Downloads audio from an RTSP URL using FFmpeg.
     Args:
         url (str): The RTSP URL.
+        proxy_url (str): Proxy URL if needed.
     Returns:
         str: Path to the downloaded audio file, or None if failed.
     logging.info("Using FFmpeg to download RTSP stream")
     output_file = tempfile.mktemp(suffix='.mp3')
     command = ['ffmpeg', '-i', url, '-acodec', 'libmp3lame', '-ab', '192k', '-y', output_file]
+    env = os.environ.copy()
+    if proxy_url and len(proxy_url.strip()) > 0:
+        env['http_proxy'] = proxy_url
+        env['https_proxy'] = proxy_url
     try:
+        subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env)
         logging.info(f"Downloaded RTSP audio to: {output_file}")
         return output_file
     except subprocess.CalledProcessError as e:
         logging.error(f"Error downloading RTSP audio: {str(e)}")
         return None
+def download_direct_audio(url, method_choice, proxy_url, proxy_username, proxy_password):
     """
     Downloads audio from a direct URL using the specified method.
     Args:
         url (str): The direct URL of the audio file.
         method_choice (str): The method to use for downloading.
+        proxy_url (str): Proxy URL if needed.
+        proxy_username (str): Proxy username.
+        proxy_password (str): Proxy password.
     Returns:
         str: Path to the downloaded audio file, or None if failed.
     }
     method = methods.get(method_choice, requests_method)
     try:
+        audio_file = method(url, proxy_url, proxy_username, proxy_password)
         if not audio_file or not os.path.exists(audio_file):
             error_msg = f"Failed to download direct audio from {url} using method {method_choice}"
             logging.error(error_msg)
         logging.error(f"Error downloading direct audio with {method_choice}: {str(e)}")
         return None
+def requests_method(url, proxy_url, proxy_username, proxy_password):
     """
     Downloads audio using the requests library.
     Args:
         url (str): The URL of the audio file.
+        proxy_url (str): Proxy URL if needed.
+        proxy_username (str): Proxy username.
+        proxy_password (str): Proxy password.
     Returns:
         str: Path to the downloaded audio file, or None if failed.
     """
     try:
+        proxies = None
+        auth = None
+        if proxy_url and len(proxy_url.strip()) > 0:
+            proxies = {
+                "http": proxy_url,
+                "https": proxy_url
+            }
+            if proxy_username and proxy_password:
+                auth = (proxy_username, proxy_password)
+        response = requests.get(url, stream=True, proxies=proxies, auth=auth)
         if response.status_code == 200:
             with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
                 for chunk in response.iter_content(chunk_size=8192):
         logging.error(f"Error in requests_method: {str(e)}")
         return None
+def wget_method(url, proxy_url, proxy_username, proxy_password):
     """
     Downloads audio using the wget command-line tool.
     Args:
         url (str): The URL of the audio file.
+        proxy_url (str): Proxy URL if needed.
+        proxy_username (str): Proxy username.
+        proxy_password (str): Proxy password.
     Returns:
         str: Path to the downloaded audio file, or None if failed.
     logging.info("Using wget method")
     output_file = tempfile.mktemp(suffix='.mp3')
     command = ['wget', '-O', output_file, url]
+    env = os.environ.copy()
+    if proxy_url and len(proxy_url.strip()) > 0:
+        env['http_proxy'] = proxy_url
+        env['https_proxy'] = proxy_url
     try:
+        subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env)
         logging.info(f"Downloaded audio to: {output_file}")
         return output_file
     except subprocess.CalledProcessError as e:
         logging.error(f"Error in wget_method: {str(e)}")
         return None
+def yt_dlp_direct_method(url, proxy_url, proxy_username, proxy_password):
     """
     Downloads audio using yt-dlp (supports various protocols and sites).
     Args:
         url (str): The URL of the audio or webpage containing audio.
+        proxy_url (str): Proxy URL if needed.
+        proxy_username (str): Proxy username.
+        proxy_password (str): Proxy password.
     Returns:
         str: Path to the downloaded audio file, or None if failed.
             'preferredquality': '192',
         }],
     }
+    if proxy_url and len(proxy_url.strip()) > 0:
+        ydl_opts['proxy'] = proxy_url
     try:
         with yt_dlp.YoutubeDL(ydl_opts) as ydl:
             ydl.download([url])
         logging.error(f"Error in yt_dlp_direct_method: {str(e)}")
         return None
+def ffmpeg_method(url, proxy_url, proxy_username, proxy_password):
     """
     Downloads audio using FFmpeg.
     Args:
         url (str): The URL of the audio file.
+        proxy_url (str): Proxy URL if needed.
+        proxy_username (str): Proxy username.
+        proxy_password (str): Proxy password.
     Returns:
         str: Path to the downloaded audio file, or None if failed.
     logging.info("Using ffmpeg method")
     output_file = tempfile.mktemp(suffix='.mp3')
     command = ['ffmpeg', '-i', url, '-vn', '-acodec', 'libmp3lame', '-q:a', '2', output_file]
+    env = os.environ.copy()
+    if proxy_url and len(proxy_url.strip()) > 0:
+        env['http_proxy'] = proxy_url
+        env['https_proxy'] = proxy_url
     try:
+        subprocess.run(command, check=True, capture_output=True, text=True, env=env)
         logging.info(f"Downloaded and converted audio to: {output_file}")
         return output_file
     except subprocess.CalledProcessError as e:
         logging.error(f"Error in ffmpeg_method: {str(e)}")
         return None
+def aria2_method(url, proxy_url, proxy_username, proxy_password):
     """
     Downloads audio using aria2.
     Args:
         url (str): The URL of the audio file.
+        proxy_url (str): Proxy URL if needed.
+        proxy_username (str): Proxy username.
+        proxy_password (str): Proxy password.
     Returns:
         str: Path to the downloaded audio file, or None if failed.
     logging.info("Using aria2 method")
     output_file = tempfile.mktemp(suffix='.mp3')
     command = ['aria2c', '--split=4', '--max-connection-per-server=4', '--out', output_file, url]
+    if proxy_url and len(proxy_url.strip()) > 0:
+        command.extend(['--all-proxy', proxy_url])
     try:
         subprocess.run(command, check=True, capture_output=True, text=True)
         logging.info(f"Downloaded audio to: {output_file}")
     Returns:
         str: Path to the trimmed audio file.
+        Raises:
+            gr.Error: If invalid start or end times are provided.
     """
     try:
         logging.info(f"Trimming audio from {start_time} to {end_time}")
 # Dictionary to store loaded models
 loaded_models = {}
+def transcribe_audio(audio_upload, audio_url, proxy_url, proxy_username, proxy_password, pipeline_type, model_id, dtype, batch_size, download_method, start_time=None, end_time=None, verbose=False, include_timecodes=False):
     """
     Transcribes audio from a given source using the specified pipeline and model.
     Args:
+        audio_upload (file): Uploaded audio file.
+        audio_url (str): URL of audio.
+        proxy_url (str): Proxy URL if needed.
+        proxy_username (str): Proxy username.
+        proxy_password (str): Proxy password.
         pipeline_type (str): Type of pipeline to use ('faster-batched', 'faster-sequenced', or 'transformers').
         model_id (str): The ID of the model to use.
         dtype (str): Data type for model computations ('int8', 'float16', or 'float32').
         start_time (float, optional): Start time in seconds for trimming audio.
         end_time (float, optional): End time in seconds for trimming audio.
         verbose (bool, optional): Whether to output verbose logging.
+        include_timecodes (bool, optional): Whether to include timecodes in the transcription.
     Yields:
         Tuple[str, str, str or None]: Metrics and messages, transcription text, path to transcription file.
         if verbose:
             yield verbose_messages, "", None
+        # Determine the audio source
         audio_path = None
         is_temp_file = False
+        if audio_upload is not None:
+            if isinstance(audio_upload, dict) and 'name' in audio_upload:
+                # audio_upload is a dict with file info
+                audio_path = audio_upload['name']
+                is_temp_file = False
+            elif isinstance(audio_upload, str) and os.path.exists(audio_upload):
+                audio_path = audio_upload
                 is_temp_file = False
+        elif audio_url is not None and len(audio_url.strip()) > 0:
+            # audio_url is provided
+            audio_path, is_temp_file = download_audio(audio_url, download_method, proxy_url, proxy_username, proxy_password)
+            if not audio_path:
+                error_msg = f"Error downloading audio from {audio_url} using method {download_method}. Check logs for details."
+                logging.error(error_msg)
+                yield verbose_messages + error_msg, "", None
+                return
         else:
+            error_msg = "No audio source provided. Please upload an audio file or enter a URL."
             logging.error(error_msg)
             yield verbose_messages + error_msg, "", None
             return
         for segment in segments:
             if pipeline_type in ["faster-batched", "faster-sequenced"]:
+                if include_timecodes:
+                    transcription_segment = f"[{segment.start:.2f}s -> {segment.end:.2f}s] {segment.text}\n"
+                else:
+                    transcription_segment = f"{segment.text}\n"
             else:
+                if include_timecodes:
+                    transcription_segment = f"[{segment['timestamp'][0]:.2f}s -> {segment['timestamp'][1]:.2f}s] {segment['text']}\n"
+                else:
+                    transcription_segment = f"{segment['text']}\n"
             transcription += transcription_segment
             if verbose:
                 yield verbose_messages + metrics_output, transcription, None
     gr.Markdown("Transcribe audio using multiple pipelines and (Faster) Whisper models.")
     with gr.Row():
+        audio_upload = gr.Audio(label="Upload or Record Audio", source="upload")
+        audio_url = gr.Textbox(label="Or Enter URL of audio file or YouTube link")
     with gr.Row():
+        proxy_url = gr.Textbox(label="Proxy URL", placeholder="Enter proxy URL if needed", value="", lines=1)
+        proxy_username = gr.Textbox(label="Proxy Username", placeholder="Proxy username (optional)", value="", lines=1)
+        proxy_password = gr.Textbox(label="Proxy Password", placeholder="Proxy password (optional)", value="", lines=1, type="password")
     transcribe_button = gr.Button("Transcribe")
+    with gr.Accordion("Advanced Options", open=False):
+        with gr.Row():
+            pipeline_type = gr.Dropdown(
+                choices=["faster-batched", "faster-sequenced", "transformers"],
+                label="Pipeline Type",
+                value="faster-batched"
+            )
+            model_id = gr.Dropdown(
+                label="Model",
+                choices=get_model_options("faster-batched"),
+                value="cstr/whisper-large-v3-turbo-int8_float32"
+            )
+        with gr.Row():
+            dtype = gr.Dropdown(choices=["int8", "float16", "float32"], label="Data Type", value="int8")
+            batch_size = gr.Slider(minimum=1, maximum=32, step=1, value=16, label="Batch Size")
+            download_method = gr.Dropdown(
+                choices=["yt-dlp", "pytube", "youtube-dl", "yt-dlp-alt", "ffmpeg", "aria2", "wget"],
+                label="Download Method",
+                value="yt-dlp"
+            )
+        with gr.Row():
+            start_time = gr.Number(label="Start Time (seconds)", value=None, minimum=0)
+            end_time = gr.Number(label="End Time (seconds)", value=None, minimum=0)
+            verbose = gr.Checkbox(label="Verbose Output", value=False)
+            include_timecodes = gr.Checkbox(label="Include timecodes in transcription", value=False)
     with gr.Row():
         metrics_output = gr.Textbox(label="Transcription Metrics and Verbose Messages", lines=10)
         transcription_output = gr.Textbox(label="Transcription", lines=10)
         transcription_file = gr.File(label="Download Transcription")
     def update_model_dropdown(pipeline_type):
         """
         Updates the model dropdown choices based on the selected pipeline type.
             logging.error(f"Error in update_model_dropdown: {str(e)}")
             return gr.update(choices=["Error"], value="Error", visible=True)
+    # Event handler for pipeline_type change
     pipeline_type.change(update_model_dropdown, inputs=[pipeline_type], outputs=[model_id])
     def transcribe_with_progress(*args):
     transcribe_button.click(
         transcribe_with_progress,
+        inputs=[audio_upload, audio_url, proxy_url, proxy_username, proxy_password, pipeline_type, model_id, dtype, batch_size, download_method, start_time, end_time, verbose, include_timecodes],
         outputs=[metrics_output, transcription_output, transcription_file]
     )
     gr.Examples(
         examples=[
+            [None, "https://www.youtube.com/watch?v=daQ_hqA6HDo", "", "", "", "faster-batched", "cstr/whisper-large-v3-turbo-int8_float32", "int8", 16, "yt-dlp", None, None, False, False],
+            [None, "https://mcdn.podbean.com/mf/web/dir5wty678b6g4vg/HoP_453.mp3", "", "", "", "faster-sequenced", "SYSTRAN/faster-whisper-large-v1", "float16", 1, "ffmpeg", 0, 300, False, False],
         ],
+        inputs=[audio_upload, audio_url, proxy_url, proxy_username, proxy_password, pipeline_type, model_id, dtype, batch_size, download_method, start_time, end_time, verbose, include_timecodes],
     )
 iface.launch(share=False, debug=True)