Spaces:
Runtime error
Runtime error
from selenium import webdriver | |
from selenium.webdriver.chrome.service import Service | |
from selenium.webdriver.common.by import By | |
from selenium.webdriver.common.keys import Keys | |
from selenium.webdriver.support.ui import WebDriverWait | |
from selenium.webdriver.support import expected_conditions as EC | |
import os | |
import requests | |
def download_mp3_selenium(youtube_url): | |
# Set up the Selenium WebDriver | |
options = webdriver.ChromeOptions() | |
options.add_argument("--headless") | |
options.add_argument("--no-sandbox") | |
options.add_argument('--disable-dev-shm-usage') | |
options.add_argument('--disable-gpu') # Disable GPU to ensure it runs in cloud environments | |
log_contents = "" # Initialize log_contents | |
driver = webdriver.Chrome(options=options) | |
# Set up WebDriverWait (with a timeout of 10 seconds) | |
wait = WebDriverWait(driver, 20) | |
# Open the YouTube video page | |
driver.get(youtube_url) | |
# Wait for the title to be available | |
wait.until(EC.title_contains("YouTube")) | |
# Scrape the title | |
title = driver.title # This gives you the video title | |
# Wait for the thumbnail to load and scrape it | |
thumbnail_meta = wait.until(EC.presence_of_element_located((By.XPATH, "//meta[@property='og:image']"))) | |
thumbnail_url = thumbnail_meta.get_attribute('content') | |
# Open the YouTube downloader site | |
driver.get("https://yt1d.com/en/") | |
# Wait until the page is loaded completely by checking an element presence | |
wait.until(EC.presence_of_element_located((By.ID, "txt-url"))) | |
# Input the YouTube URL into the downloader | |
input_box = driver.find_element(By.ID, "txt-url") | |
input_box.send_keys(youtube_url) | |
input_box.send_keys(Keys.RETURN) | |
# Wait for the MP3 download button to appear | |
mp3_download_button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button[data-ftype='mp3']"))) | |
onclick_attr = mp3_download_button.get_attribute("onclick") | |
# Extract parameters from the JavaScript function call | |
params = onclick_attr.split("'") | |
if len(params) >= 7: | |
mp3_download_url = params[1] # Extracted base download URL | |
# Wait for the final download URL to be available after JavaScript modifications | |
final_link = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "a[href*='googlevideo.com/videoplayback']"))) | |
mp3_download_url = final_link.get_attribute("href") | |
print(f"Final MP3 Download URL: {mp3_download_url}") | |
response = requests.get(mp3_download_url, stream=True) | |
# Check if the request was successful | |
if response.status_code == 200: | |
# Write the video content to a file | |
output_file = "downloaded_video.mp4" | |
with open(output_file, "wb") as f: | |
for chunk in response.iter_content(chunk_size=1024): | |
if chunk: | |
f.write(chunk) | |
print(f"Video downloaded successfully as {output_file}") | |
else: | |
print(f"Failed to download video. HTTP Status Code: {response.status_code}") | |
else: | |
print("Failed to extract MP3 download link from the page.") | |
# Close the browser | |
driver.quit() | |
# Check and print ChromeDriver logs | |
log_file_path = '/tmp/chromedriver.log' | |
# Create a log file if it doesn't exist | |
if not os.path.exists(log_file_path): | |
with open(log_file_path, 'w') as log_file: | |
log_file.write("ChromeDriver log file created.") | |
if os.path.exists(log_file_path): | |
with open(log_file_path, 'r') as log_file: | |
log_contents = log_file.read() | |
print("ChromeDriver Log Contents:\n", log_contents) | |
else: | |
print("ChromeDriver log not found.") | |
# Return the title and thumbnail for display | |
return title, thumbnail_url, log_contents | |
# Example usage: | |
# youtube_url = "https://youtu.be/MAZyQ-38b8M?si=q0dai-wF6FQz6MGN" | |
# title, thumbnail_url = download_mp3_selenium(youtube_url) | |
# print(f"Title: {title}") | |
# print(f"Thumbnail: {thumbnail_url}") |