manohar02's picture
Create app.py
2bfd134 verified
raw
history blame
2 kB
import yt_dlp
import whisper
from pydub import AudioSegment
import os
import time
# Function to download audio from YouTube video
def download_audio_from_youtube(url):
ydl_opts = {
'format': 'bestaudio/best',
'outtmpl': 'video.%(ext)s',
'postprocessors': [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'mp3',
'preferredquality': '192',
}],
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.download([url])
return 'video.mp3'
# Function to split audio into chunks
def split_audio(file_path, chunk_length_ms=60000):
audio = AudioSegment.from_file(file_path)
chunks = [audio[i:i + chunk_length_ms] for i in range(0, len(audio), chunk_length_ms)]
chunk_files = []
for i, chunk in enumerate(chunks):
chunk_file = f"chunk_{i}.mp3"
chunk.export(chunk_file, format="mp3")
chunk_files.append(chunk_file)
return chunk_files
# Function to convert audio to text using Whisper model
def transcribe_audio(file_path):
model = whisper.load_model("small")
result = model.transcribe(file_path)
return result["text"]
# Example usage
# video_url = "https://youtu.be/x6KuoHihktM?si=VoqHMP8emTnXKJHa"
video_url=input()
# Step 1: Download audio from YouTube
print(f"Downloading audio from URL: {video_url}")
audio_path = download_audio_from_youtube(video_url)
# Step 2: Split the audio into chunks
print(f"Splitting audio into chunks from path: {audio_path}")
chunk_files = split_audio(audio_path)
# Step 3: Transcribe each chunk and combine results
print("Transcribing audio chunks...")
transcriptions = []
for chunk_file in chunk_files:
print(f"Transcribing chunk: {chunk_file}")
transcriptions.append(transcribe_audio(chunk_file))
os.remove(chunk_file) # Clean up chunk file after transcription
time.sleep(1) # Introduce a delay to avoid rate limiting
transcribed_text = " ".join(transcriptions)
print(f"Transcribed text: {transcribed_text}")