File size: 5,053 Bytes
342ece4 312fe18 342ece4 f702841 28b5116 342ece4 f702841 342ece4 f702841 342ece4 f702841 342ece4 f702841 342ece4 f702841 342ece4 f702841 342ece4 f702841 342ece4 28b5116 342ece4 f702841 342ece4 28b5116 342ece4 f702841 342ece4 28b5116 342ece4 f702841 342ece4 f702841 342ece4 f702841 342ece4 28b5116 f702841 3fb6cf7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
import yt_dlp
import os
import gradio as gr
from transformers import pipeline
import whisper
import random
import time
def get_audio(url):
try:
# Configure yt-dlp options without browser cookies
ydl_opts = {
'format': 'bestaudio/best',
'postprocessors': [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'mp3',
'preferredquality': '192',
}],
'outtmpl': 'audio_download.%(ext)s',
'quiet': True,
'no_warnings': True,
# Add basic user agent
'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36',
# Add other options to help avoid restrictions
'extractor_args': {'youtube': {
'player_client': ['android', 'web'],
'skip': ['dash', 'hls']
}},
# Add network options
'socket_timeout': 30,
'retries': 3,
}
# Add small delay to avoid rate limiting
time.sleep(random.uniform(1, 2))
# Download the audio
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(url, download=False)
duration = info.get('duration', 0)
# Check video duration (optional)
if duration > 1800: # 30 minutes
raise gr.Error("Video is too long. Please use videos under 30 minutes.")
ydl.download([url])
return 'audio_download.mp3'
except Exception as e:
if 'Sign in to confirm' in str(e):
raise gr.Error("This video requires age verification. Please try a different video.")
elif 'Private video' in str(e):
raise gr.Error("This video is private. Please try a public video.")
elif 'Video unavailable' in str(e):
raise gr.Error("This video is unavailable. Please check the URL and try again.")
else:
raise gr.Error(f"Error downloading audio: {str(e)}")
# Load models
model = whisper.load_model("base")
summarizer = pipeline("summarization")
def get_text(url):
try:
# Validate URL
if not url.startswith('https://www.youtube.com/') and not url.startswith('https://youtu.be/'):
raise gr.Error("Please enter a valid YouTube URL")
audio_file = get_audio(url)
result = model.transcribe(audio_file)
# Cleanup
try:
os.remove(audio_file)
except:
pass
return result['text']
except Exception as e:
return f"Error: {str(e)}"
def get_summary(url):
try:
article = get_text(url)
if isinstance(article, str) and article.startswith("Error:"):
return article
# Handle empty or short text
if not article or len(article.split()) < 30:
return "Text too short to summarize. Please try a longer video."
# Split long text into chunks
max_chunk_length = 1000
chunks = [article[i:i+max_chunk_length] for i in range(0, len(article), max_chunk_length)]
summaries = []
for chunk in chunks:
summary = summarizer(chunk, max_length=130, min_length=30, do_sample=False)
summaries.append(summary[0]['summary_text'])
return " ".join(summaries)
except Exception as e:
return f"Error: {str(e)}"
# Create Gradio interface
with gr.Blocks() as demo:
gr.Markdown("<h1><center>YouTube Video Transcription with OpenAI's Whisper</center></h1>")
gr.Markdown("<center>Enter the link of any YouTube video to get the transcription and summary. Please use videos under 30 minutes in length.</center>")
with gr.Tab('Get the transcription of any Youtube video'):
with gr.Row():
input_text_1 = gr.Textbox(
placeholder='Enter the Youtube video URL (e.g., https://www.youtube.com/watch?v=...)',
label='URL'
)
output_text_1 = gr.Textbox(
placeholder='Transcription of the video',
label='Transcription'
)
result_button_1 = gr.Button('Get Transcription')
with gr.Tab('Summary of Youtube video'):
with gr.Row():
input_text = gr.Textbox(
placeholder='Enter the Youtube video URL (e.g., https://www.youtube.com/watch?v=...)',
label='URL'
)
output_text = gr.Textbox(
placeholder='Summary text of the Youtube Video',
label='Summary'
)
result_button = gr.Button('Get Summary')
result_button.click(get_summary, inputs=input_text, outputs=output_text)
result_button_1.click(get_text, inputs=input_text_1, outputs=output_text_1)
# Launch with appropriate settings
demo.launch(debug=True) |