Youtube-Whisper / download_video.py
danilotpnta's picture
update
48a5d9c
raw
history blame
4.05 kB
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import os
import requests
def download_mp3_selenium(youtube_url):
# Set up the Selenium WebDriver
options = webdriver.ChromeOptions()
options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument('--disable-dev-shm-usage')
options.add_argument('--disable-gpu') # Disable GPU to ensure it runs in cloud environments
log_contents = "" # Initialize log_contents
driver = webdriver.Chrome(options=options)
# Set up WebDriverWait (with a timeout of 10 seconds)
wait = WebDriverWait(driver, 20)
# Open the YouTube video page
driver.get(youtube_url)
# Wait for the title to be available
wait.until(EC.title_contains("YouTube"))
# Scrape the title
title = driver.title # This gives you the video title
# Wait for the thumbnail to load and scrape it
thumbnail_meta = wait.until(EC.presence_of_element_located((By.XPATH, "//meta[@property='og:image']")))
thumbnail_url = thumbnail_meta.get_attribute('content')
# Open the YouTube downloader site
driver.get("https://yt1d.com/en/")
# Wait until the page is loaded completely by checking an element presence
wait.until(EC.presence_of_element_located((By.ID, "txt-url")))
# Input the YouTube URL into the downloader
input_box = driver.find_element(By.ID, "txt-url")
input_box.send_keys(youtube_url)
input_box.send_keys(Keys.RETURN)
# Wait for the MP3 download button to appear
mp3_download_button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button[data-ftype='mp3']")))
onclick_attr = mp3_download_button.get_attribute("onclick")
# Extract parameters from the JavaScript function call
params = onclick_attr.split("'")
if len(params) >= 7:
mp3_download_url = params[1] # Extracted base download URL
# Wait for the final download URL to be available after JavaScript modifications
final_link = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "a[href*='googlevideo.com/videoplayback']")))
mp3_download_url = final_link.get_attribute("href")
print(f"Final MP3 Download URL: {mp3_download_url}")
response = requests.get(mp3_download_url, stream=True)
# Check if the request was successful
if response.status_code == 200:
# Write the video content to a file
output_file = "downloaded_video.mp4"
with open(output_file, "wb") as f:
for chunk in response.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
print(f"Video downloaded successfully as {output_file}")
else:
print(f"Failed to download video. HTTP Status Code: {response.status_code}")
else:
print("Failed to extract MP3 download link from the page.")
# Close the browser
driver.quit()
# Check and print ChromeDriver logs
log_file_path = '/tmp/chromedriver.log'
# Create a log file if it doesn't exist
if not os.path.exists(log_file_path):
with open(log_file_path, 'w') as log_file:
log_file.write("ChromeDriver log file created.")
if os.path.exists(log_file_path):
with open(log_file_path, 'r') as log_file:
log_contents = log_file.read()
print("ChromeDriver Log Contents:\n", log_contents)
else:
print("ChromeDriver log not found.")
# Return the title and thumbnail for display
return title, thumbnail_url, log_contents
# Example usage:
# youtube_url = "https://youtu.be/MAZyQ-38b8M?si=q0dai-wF6FQz6MGN"
# title, thumbnail_url = download_mp3_selenium(youtube_url)
# print(f"Title: {title}")
# print(f"Thumbnail: {thumbnail_url}")