summryai / data_loader.py
Amitjadhav01's picture
AMIT's python files
b574243 verified
raw
history blame
1.92 kB
import os
import logging
import subprocess
import time
def download_audio(youtube_url):
"""Downloads audio from the given YouTube URL and saves it to audio_files directory."""
# Create audio_files directory if it doesn't exist
if not os.path.exists('audio_files'):
os.makedirs('audio_files')
# Use yt-dlp to download audio
command = [
'yt-dlp',
'-x', # Extract audio
'--audio-format', 'wav', # Convert to WAV format
'-o', 'audio_files/%(title)s.%(ext)s', # Output format
youtube_url
]
result = subprocess.run(command, capture_output=True, text=True)
if result.returncode != 0:
logging.error(f'Error downloading audio: {result.stderr}')
raise Exception('Failed to download audio')
# Wait a moment for the file to be created
time.sleep(1)
def get_audio_filename():
"""Returns the latest downloaded audio filename from audio_files directory."""
audio_files = os.listdir('audio_files')
if audio_files:
# Sort by modification time, return the latest .wav file
audio_files.sort(key=lambda x: os.path.getmtime(os.path.join('audio_files', x)))
for file in audio_files:
if file.endswith('.wav'):
return os.path.join('audio_files', file)
return None
if __name__ == "__main__":
youtube_url = input("Enter the YouTube URL: ")
try:
download_audio(youtube_url)
# Get the latest audio file (optional step, can be used later for transcription)
audio_file = get_audio_filename()
if audio_file:
print(f"Audio file downloaded: {audio_file}")
else:
logging.error('No audio file found after download.')
raise Exception('No audio file found.')
except Exception as e:
logging.error(f'An error occurred: {e}')