Spaces:

rajesh1729
/

youtube-video-transcription-with-whisper

Running

youtube-video-transcription-with-whisper

File size: 5,053 Bytes

import yt_dlp
import os
import gradio as gr
from transformers import pipeline
import whisper
import random
import time

def get_audio(url):
    try:
        # Configure yt-dlp options without browser cookies
        ydl_opts = {
            'format': 'bestaudio/best',
            'postprocessors': [{
                'key': 'FFmpegExtractAudio',
                'preferredcodec': 'mp3',
                'preferredquality': '192',
            }],
            'outtmpl': 'audio_download.%(ext)s',
            'quiet': True,
            'no_warnings': True,
            # Add basic user agent
            'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36',
            # Add other options to help avoid restrictions
            'extractor_args': {'youtube': {
                'player_client': ['android', 'web'],
                'skip': ['dash', 'hls']
            }},
            # Add network options
            'socket_timeout': 30,
            'retries': 3,
        }
        
        # Add small delay to avoid rate limiting
        time.sleep(random.uniform(1, 2))
        
        # Download the audio
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            info = ydl.extract_info(url, download=False)
            duration = info.get('duration', 0)
            
            # Check video duration (optional)
            if duration > 1800:  # 30 minutes
                raise gr.Error("Video is too long. Please use videos under 30 minutes.")
                
            ydl.download([url])
            
        return 'audio_download.mp3'
        
    except Exception as e:
        if 'Sign in to confirm' in str(e):
            raise gr.Error("This video requires age verification. Please try a different video.")
        elif 'Private video' in str(e):
            raise gr.Error("This video is private. Please try a public video.")
        elif 'Video unavailable' in str(e):
            raise gr.Error("This video is unavailable. Please check the URL and try again.")
        else:
            raise gr.Error(f"Error downloading audio: {str(e)}")

# Load models
model = whisper.load_model("base")
summarizer = pipeline("summarization")

def get_text(url):
    try:
        # Validate URL
        if not url.startswith('https://www.youtube.com/') and not url.startswith('https://youtu.be/'):
            raise gr.Error("Please enter a valid YouTube URL")
            
        audio_file = get_audio(url)
        result = model.transcribe(audio_file)
        
        # Cleanup
        try:
            os.remove(audio_file)
        except:
            pass
            
        return result['text']
    except Exception as e:
        return f"Error: {str(e)}"

def get_summary(url):
    try:
        article = get_text(url)
        if isinstance(article, str) and article.startswith("Error:"):
            return article
            
        # Handle empty or short text
        if not article or len(article.split()) < 30:
            return "Text too short to summarize. Please try a longer video."
            
        # Split long text into chunks
        max_chunk_length = 1000
        chunks = [article[i:i+max_chunk_length] for i in range(0, len(article), max_chunk_length)]
        summaries = []
        
        for chunk in chunks:
            summary = summarizer(chunk, max_length=130, min_length=30, do_sample=False)
            summaries.append(summary[0]['summary_text'])
            
        return " ".join(summaries)
    except Exception as e:
        return f"Error: {str(e)}"

# Create Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("<h1><center>YouTube Video Transcription with OpenAI's Whisper</center></h1>")
    gr.Markdown("<center>Enter the link of any YouTube video to get the transcription and summary. Please use videos under 30 minutes in length.</center>")
    
    with gr.Tab('Get the transcription of any Youtube video'):
        with gr.Row():
            input_text_1 = gr.Textbox(
                placeholder='Enter the Youtube video URL (e.g., https://www.youtube.com/watch?v=...)',
                label='URL'
            )
            output_text_1 = gr.Textbox(
                placeholder='Transcription of the video',
                label='Transcription'
            )
        result_button_1 = gr.Button('Get Transcription')
        
    with gr.Tab('Summary of Youtube video'):
        with gr.Row():
            input_text = gr.Textbox(
                placeholder='Enter the Youtube video URL (e.g., https://www.youtube.com/watch?v=...)',
                label='URL'
            )
            output_text = gr.Textbox(
                placeholder='Summary text of the Youtube Video',
                label='Summary'
            )
        result_button = gr.Button('Get Summary')

    result_button.click(get_summary, inputs=input_text, outputs=output_text)
    result_button_1.click(get_text, inputs=input_text_1, outputs=output_text_1)

# Launch with appropriate settings
demo.launch(debug=True)