Spaces:

danilotpnta
/

Youtube-Whisper

Runtime error

App Files Files Community

danilotpnta commited on Oct 7, 2024

Commit

f62c279

1 Parent(s): 48a5d9c

update: app and added yt_dlp for faster download

Browse files

Files changed (4) hide show

.gitignore +1 -0
app.py +40 -22
download_video.py +37 -93
requirements.txt +2 -3

.gitignore CHANGED Viewed

@@ -166,3 +166,4 @@ cython_debug/
 .DS_Store
 *.mp4
 *.m4v

 .DS_Store
 *.mp4
 *.m4v
+thumbnail.jpg

app.py CHANGED Viewed

@@ -1,37 +1,36 @@
 import whisper
 import gradio as gr
-import os
 import warnings
 warnings.filterwarnings("ignore", category=FutureWarning, module="torch")
-from download_video import download_mp3_selenium
 # Function to download the audio, title, and thumbnail from YouTube
 def download_video_info(url):
     try:
-        # Call the function to download video and get title, thumbnail, and logs
-        title, thumbnail_url, logs_output = download_mp3_selenium(url)
-        audio_file = "downloaded_video.mp4"  # Path to the downloaded audio (MP4)
-        return audio_file, title, thumbnail_url, logs_output
     except Exception as e:
         return None, None, None, str(e)
 # Function to transcribe the downloaded audio using Whisper
-def transcribe_audio(audio_path, model_size="base"):
     model = whisper.load_model(model_size)
-    result = model.transcribe(audio_path)
     return result['text']
-# Split logic: First fetch title, thumbnail, and logs, then transcribe
-def get_video_info_and_transcribe(youtube_url, model_size="base"):
-    # Fetch title, thumbnail, and logs first
-    audio_path, title, thumbnail_url, logs_output = download_video_info(youtube_url)
     # If fetching video info fails
     if not audio_path or not os.path.exists(audio_path):
-        return gr.update(value=f"Error fetching video: {thumbnail_url}"), None, None, gr.update(value=logs_output)
     # Show title and thumbnail to the user while the transcription is happening
     title_output = gr.update(value=title)
@@ -43,15 +42,36 @@ def get_video_info_and_transcribe(youtube_url, model_size="base"):
         thumbnail_output = gr.update(visible=False)  # Hide if no thumbnail
     # Start transcription
-    transcription = transcribe_audio(audio_path, model_size)
-    return title_output, thumbnail_output, gr.update(value=transcription), gr.update(value=logs_output)
 # Gradio interface setup using gradio.components
-with gr.Blocks() as interface:
     with gr.Row():
         youtube_url = gr.Textbox(label="YouTube Link", elem_id="yt_link", scale=5)
         model_size = gr.Dropdown(choices=["tiny", "base", "small", "medium", "large"], label="Model Size", value="base", scale=1)
     title_output = gr.Textbox(label="Video Title", interactive=False)
@@ -59,16 +79,14 @@ with gr.Blocks() as interface:
         thumbnail_output = gr.Image(label="Thumbnail", interactive=False, scale=1)
         transcription_output = gr.Textbox(label="Transcription", interactive=False, scale=1)
-    logs_output = gr.Textbox(label="ChromeDriver Logs", interactive=False)
     transcribe_button = gr.Button("Transcribe")
     transcribe_button.click(
         get_video_info_and_transcribe,
-        inputs=[youtube_url, model_size],
-        outputs=[title_output, thumbnail_output, transcription_output, logs_output]
     )
 # Launch the app
 if __name__ == "__main__":
-    interface.launch(server_name="0.0.0.0", server_port=7860)

+import os
 import whisper
 import gradio as gr
+from download_video import download_mp3_yt_dlp
 import warnings
 warnings.filterwarnings("ignore", category=FutureWarning, module="torch")
 # Function to download the audio, title, and thumbnail from YouTube
 def download_video_info(url):
     try:
+        # Call the function to download video and get title, thumbnail
+        title, thumbnail_url = download_mp3_yt_dlp(url)
+        audio_file = "downloaded_video.mp3"  # Path to the downloaded audio (MP3)
+        return audio_file, title, thumbnail_url
     except Exception as e:
         return None, None, None, str(e)
 # Function to transcribe the downloaded audio using Whisper
+def transcribe_audio(audio_path, model_size="base", language="en"):
     model = whisper.load_model(model_size)
+    result = model.transcribe(audio_path, language=language)
     return result['text']
+# Split logic: First fetch title and thumbnail, then transcribe
+def get_video_info_and_transcribe(youtube_url, model_size="base", language="en"):
+    # Fetch title and thumbnail first
+    audio_path, title, thumbnail_url = download_video_info(youtube_url)
     # If fetching video info fails
     if not audio_path or not os.path.exists(audio_path):
+        return gr.update(value="Error fetching video."), None, None
     # Show title and thumbnail to the user while the transcription is happening
     title_output = gr.update(value=title)
         thumbnail_output = gr.update(visible=False)  # Hide if no thumbnail
     # Start transcription
+    transcription = transcribe_audio(audio_path, model_size, language)
+    return title_output, thumbnail_output, gr.update(value=transcription)
 # Gradio interface setup using gradio.components
+with gr.Blocks() as demo:
+    title = "<center><h1>YouTube Whisper ⚡️ </h1></center>"
+    gr.HTML(title)
+    gr.Markdown(
+    """
+    This tool lets you transcribe YouTube videos in multiple languages using **[Whisper](https://openai.com/research/whisper)**, an open-source speech recognition (ASR) model developed by OpenAI.
+    ### Key Features:
+    - **Fast transcription**: Using the **base** model, transcribing a **3 minute** video takes approximately **30 seconds**.
+    - **Multiple language support**: Choose from **English**, **Spanish**, **French**, and more!
+    - **Simple workflow**:
+        1. Paste a YouTube link.
+        2. Select the model size and language.
+        3. Click "Transcribe" to get the text from the video.
+    _Transcription times may vary based on model size and video length._
+    """)
     with gr.Row():
         youtube_url = gr.Textbox(label="YouTube Link", elem_id="yt_link", scale=5)
         model_size = gr.Dropdown(choices=["tiny", "base", "small", "medium", "large"], label="Model Size", value="base", scale=1)
+        language = gr.Dropdown(choices=["en", "es", "fr", "de", "it", "ja"], label="Language", value="en", scale=1)
     title_output = gr.Textbox(label="Video Title", interactive=False)
         thumbnail_output = gr.Image(label="Thumbnail", interactive=False, scale=1)
         transcription_output = gr.Textbox(label="Transcription", interactive=False, scale=1)
     transcribe_button = gr.Button("Transcribe")
     transcribe_button.click(
         get_video_info_and_transcribe,
+        inputs=[youtube_url, model_size, language],
+        outputs=[title_output, thumbnail_output, transcription_output]
     )
 # Launch the app
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)

download_video.py CHANGED Viewed

@@ -1,102 +1,46 @@
-from selenium import webdriver
-from selenium.webdriver.chrome.service import Service
-from selenium.webdriver.common.by import By
-from selenium.webdriver.common.keys import Keys
-from selenium.webdriver.support.ui import WebDriverWait
-from selenium.webdriver.support import expected_conditions as EC
-import os
 import requests
-def download_mp3_selenium(youtube_url):
-    # Set up the Selenium WebDriver
-    options = webdriver.ChromeOptions()
-    options.add_argument("--headless")
-    options.add_argument("--no-sandbox")
-    options.add_argument('--disable-dev-shm-usage')
-    options.add_argument('--disable-gpu')  # Disable GPU to ensure it runs in cloud environments
-    log_contents = ""  # Initialize log_contents
-    driver = webdriver.Chrome(options=options)
-    # Set up WebDriverWait (with a timeout of 10 seconds)
-    wait = WebDriverWait(driver, 20)
-    # Open the YouTube video page
-    driver.get(youtube_url)
-    # Wait for the title to be available
-    wait.until(EC.title_contains("YouTube"))
-    # Scrape the title
-    title = driver.title  # This gives you the video title
-    # Wait for the thumbnail to load and scrape it
-    thumbnail_meta = wait.until(EC.presence_of_element_located((By.XPATH, "//meta[@property='og:image']")))
-    thumbnail_url = thumbnail_meta.get_attribute('content')
-    # Open the YouTube downloader site
-    driver.get("https://yt1d.com/en/")
-    # Wait until the page is loaded completely by checking an element presence
-    wait.until(EC.presence_of_element_located((By.ID, "txt-url")))
-    # Input the YouTube URL into the downloader
-    input_box = driver.find_element(By.ID, "txt-url")
-    input_box.send_keys(youtube_url)
-    input_box.send_keys(Keys.RETURN)
-    # Wait for the MP3 download button to appear
-    mp3_download_button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button[data-ftype='mp3']")))
-    onclick_attr = mp3_download_button.get_attribute("onclick")
-    # Extract parameters from the JavaScript function call
-    params = onclick_attr.split("'")
-    if len(params) >= 7:
-        mp3_download_url = params[1]  # Extracted base download URL
-        # Wait for the final download URL to be available after JavaScript modifications
-        final_link = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "a[href*='googlevideo.com/videoplayback']")))
-        mp3_download_url = final_link.get_attribute("href")
-        print(f"Final MP3 Download URL: {mp3_download_url}")
-        response = requests.get(mp3_download_url, stream=True)
-        # Check if the request was successful
         if response.status_code == 200:
-            # Write the video content to a file
-            output_file = "downloaded_video.mp4"
-            with open(output_file, "wb") as f:
-                for chunk in response.iter_content(chunk_size=1024):
-                    if chunk:
-                        f.write(chunk)
-            print(f"Video downloaded successfully as {output_file}")
         else:
-            print(f"Failed to download video. HTTP Status Code: {response.status_code}")
-    else:
-        print("Failed to extract MP3 download link from the page.")
-    # Close the browser
-    driver.quit()
-    # Check and print ChromeDriver logs
-    log_file_path = '/tmp/chromedriver.log'
-    # Create a log file if it doesn't exist
-    if not os.path.exists(log_file_path):
-        with open(log_file_path, 'w') as log_file:
-            log_file.write("ChromeDriver log file created.")
-    if os.path.exists(log_file_path):
-        with open(log_file_path, 'r') as log_file:
-            log_contents = log_file.read()
-            print("ChromeDriver Log Contents:\n", log_contents)
-    else:
-        print("ChromeDriver log not found.")
-    # Return the title and thumbnail for display
-    return title, thumbnail_url, log_contents
 # Example usage:
 # youtube_url = "https://youtu.be/MAZyQ-38b8M?si=q0dai-wF6FQz6MGN"
-# title, thumbnail_url = download_mp3_selenium(youtube_url)
 # print(f"Title: {title}")
-# print(f"Thumbnail: {thumbnail_url}")

+import yt_dlp as youtube_dl
 import requests
+def download_mp3_yt_dlp(youtube_url):
+    # Set up yt-dlp options
+    ydl_opts = {
+        'format': 'bestaudio/best',
+        'postprocessors': [{
+            'key': 'FFmpegExtractAudio',
+            'preferredcodec': 'mp3',
+            'preferredquality': '192',
+        }],
+        'outtmpl': 'downloaded_video.%(ext)s',
+        'quiet': False,
+        'no_warnings': True,
+        'progress_hooks': [lambda d: print(f"Downloading {d['filename']}: {d['_percent_str']}")],
+    }
+    # Extract video info including title and thumbnail
+    with youtube_dl.YoutubeDL() as ydl:
+        info_dict = ydl.extract_info(youtube_url, download=False)
+        title = info_dict.get('title', 'Unknown Title')
+        thumbnail_url = info_dict.get('thumbnail', None)
+    # Download the MP3 using yt-dlp
+    with youtube_dl.YoutubeDL(ydl_opts) as ydl:
+        ydl.download([youtube_url])
+    # Fetch the thumbnail for display
+    if thumbnail_url:
+        response = requests.get(thumbnail_url)
         if response.status_code == 200:
+            with open('thumbnail.jpg', 'wb') as f:
+                f.write(response.content)
+            print(f"Thumbnail downloaded successfully.")
         else:
+            print(f"Failed to download thumbnail. HTTP Status Code: {response.status_code}")
+    # Return the title and thumbnail URL
+    return title, thumbnail_url
 # Example usage:
 # youtube_url = "https://youtu.be/MAZyQ-38b8M?si=q0dai-wF6FQz6MGN"
+# title, thumbnail_url = download_mp3_yt_dlp(youtube_url)
 # print(f"Title: {title}")
+# print(f"Thumbnail: {thumbnail_url}")

requirements.txt CHANGED Viewed

@@ -1,6 +1,5 @@
-selenium
-webdriver-manager
 requests
 gradio
 openai-whisper @ git+https://github.com/openai/whisper.git
-tqdm

 requests
 gradio
 openai-whisper @ git+https://github.com/openai/whisper.git
+tqdm
+yt_dlp