Spaces:
Runtime error
Runtime error
import yt_dlp | |
import whisper | |
from pydub import AudioSegment | |
import os | |
import time | |
# Function to download audio from YouTube video | |
def download_audio_from_youtube(url): | |
ydl_opts = { | |
'format': 'bestaudio/best', | |
'outtmpl': 'video.%(ext)s', | |
'postprocessors': [{ | |
'key': 'FFmpegExtractAudio', | |
'preferredcodec': 'mp3', | |
'preferredquality': '192', | |
}], | |
} | |
with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
ydl.download([url]) | |
return 'video.mp3' | |
# Function to split audio into chunks | |
def split_audio(file_path, chunk_length_ms=60000): | |
audio = AudioSegment.from_file(file_path) | |
chunks = [audio[i:i + chunk_length_ms] for i in range(0, len(audio), chunk_length_ms)] | |
chunk_files = [] | |
for i, chunk in enumerate(chunks): | |
chunk_file = f"chunk_{i}.mp3" | |
chunk.export(chunk_file, format="mp3") | |
chunk_files.append(chunk_file) | |
return chunk_files | |
# Function to convert audio to text using Whisper model | |
def transcribe_audio(file_path): | |
model = whisper.load_model("small") | |
result = model.transcribe(file_path) | |
return result["text"] | |
# Example usage | |
# video_url = "https://youtu.be/x6KuoHihktM?si=VoqHMP8emTnXKJHa" | |
video_url=input() | |
# Step 1: Download audio from YouTube | |
print(f"Downloading audio from URL: {video_url}") | |
audio_path = download_audio_from_youtube(video_url) | |
# Step 2: Split the audio into chunks | |
print(f"Splitting audio into chunks from path: {audio_path}") | |
chunk_files = split_audio(audio_path) | |
# Step 3: Transcribe each chunk and combine results | |
print("Transcribing audio chunks...") | |
transcriptions = [] | |
for chunk_file in chunk_files: | |
print(f"Transcribing chunk: {chunk_file}") | |
transcriptions.append(transcribe_audio(chunk_file)) | |
os.remove(chunk_file) # Clean up chunk file after transcription | |
time.sleep(1) # Introduce a delay to avoid rate limiting | |
transcribed_text = " ".join(transcriptions) | |
print(f"Transcribed text: {transcribed_text}") | |