import yt_dlp import whisper from pydub import AudioSegment import os import time # Function to download audio from YouTube video def download_audio_from_youtube(url): ydl_opts = { 'format': 'bestaudio/best', 'outtmpl': 'video.%(ext)s', 'postprocessors': [{ 'key': 'FFmpegExtractAudio', 'preferredcodec': 'mp3', 'preferredquality': '192', }], } with yt_dlp.YoutubeDL(ydl_opts) as ydl: ydl.download([url]) return 'video.mp3' # Function to split audio into chunks def split_audio(file_path, chunk_length_ms=60000): audio = AudioSegment.from_file(file_path) chunks = [audio[i:i + chunk_length_ms] for i in range(0, len(audio), chunk_length_ms)] chunk_files = [] for i, chunk in enumerate(chunks): chunk_file = f"chunk_{i}.mp3" chunk.export(chunk_file, format="mp3") chunk_files.append(chunk_file) return chunk_files # Function to convert audio to text using Whisper model def transcribe_audio(file_path): model = whisper.load_model("small") result = model.transcribe(file_path) return result["text"] # Example usage # video_url = "https://youtu.be/x6KuoHihktM?si=VoqHMP8emTnXKJHa" video_url=input() # Step 1: Download audio from YouTube print(f"Downloading audio from URL: {video_url}") audio_path = download_audio_from_youtube(video_url) # Step 2: Split the audio into chunks print(f"Splitting audio into chunks from path: {audio_path}") chunk_files = split_audio(audio_path) # Step 3: Transcribe each chunk and combine results print("Transcribing audio chunks...") transcriptions = [] for chunk_file in chunk_files: print(f"Transcribing chunk: {chunk_file}") transcriptions.append(transcribe_audio(chunk_file)) os.remove(chunk_file) # Clean up chunk file after transcription time.sleep(1) # Introduce a delay to avoid rate limiting transcribed_text = " ".join(transcriptions) print(f"Transcribed text: {transcribed_text}")