Ramakrushna
/

summryai

Model card Files Files and versions Community

summryai / data_loader.py

Amitjadhav01's picture

AMIT's python files

b574243 verified 11 days ago

1.92 kB

	import os
	import logging
	import subprocess
	import time

	def download_audio(youtube_url):
	"""Downloads audio from the given YouTube URL and saves it to audio_files directory."""
	# Create audio_files directory if it doesn't exist
	if not os.path.exists('audio_files'):
	os.makedirs('audio_files')

	# Use yt-dlp to download audio
	command = [
	'yt-dlp',
	'-x', # Extract audio
	'--audio-format', 'wav', # Convert to WAV format
	'-o', 'audio_files/%(title)s.%(ext)s', # Output format
	youtube_url
	]

	result = subprocess.run(command, capture_output=True, text=True)

	if result.returncode != 0:
	logging.error(f'Error downloading audio: {result.stderr}')
	raise Exception('Failed to download audio')

	# Wait a moment for the file to be created
	time.sleep(1)

	def get_audio_filename():
	"""Returns the latest downloaded audio filename from audio_files directory."""
	audio_files = os.listdir('audio_files')
	if audio_files:
	# Sort by modification time, return the latest .wav file
	audio_files.sort(key=lambda x: os.path.getmtime(os.path.join('audio_files', x)))
	for file in audio_files:
	if file.endswith('.wav'):
	return os.path.join('audio_files', file)
	return None

	if __name__ == "__main__":
	youtube_url = input("Enter the YouTube URL: ")
	try:
	download_audio(youtube_url)

	# Get the latest audio file (optional step, can be used later for transcription)
	audio_file = get_audio_filename()
	if audio_file:
	print(f"Audio file downloaded: {audio_file}")
	else:
	logging.error('No audio file found after download.')
	raise Exception('No audio file found.')
	except Exception as e:
	logging.error(f'An error occurred: {e}')