rajesh1729 commited on
Commit
f702841
·
verified ·
1 Parent(s): 342ece4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -14
app.py CHANGED
@@ -3,30 +3,59 @@ import os
3
  import gradio as gr
4
  from transformers import pipeline
5
  import whisper
 
 
6
 
7
  def get_audio(url):
8
  try:
9
- # Configure yt-dlp options
10
  ydl_opts = {
11
- 'format': 'bestaudio/best', # Choose best quality audio
12
  'postprocessors': [{
13
  'key': 'FFmpegExtractAudio',
14
  'preferredcodec': 'mp3',
15
  'preferredquality': '192',
16
  }],
17
- 'outtmpl': 'audio_download.%(ext)s', # Output template
18
- 'quiet': True, # Less output
19
- 'no_warnings': True # No warnings
 
 
 
 
 
 
 
 
 
 
20
  }
21
 
 
 
 
22
  # Download the audio
23
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
 
 
 
 
 
 
 
24
  ydl.download([url])
25
 
26
- return 'audio_download.mp3' # Return the filename
27
 
28
  except Exception as e:
29
- raise gr.Error(f"Error downloading audio: {str(e)}")
 
 
 
 
 
 
 
30
 
31
  # Load models
32
  model = whisper.load_model("base")
@@ -34,6 +63,10 @@ summarizer = pipeline("summarization")
34
 
35
  def get_text(url):
36
  try:
 
 
 
 
37
  audio_file = get_audio(url)
38
  result = model.transcribe(audio_file)
39
 
@@ -50,29 +83,57 @@ def get_text(url):
50
  def get_summary(url):
51
  try:
52
  article = get_text(url)
53
- summary = summarizer(article)
54
- return summary[0]['summary_text']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  except Exception as e:
56
  return f"Error: {str(e)}"
57
 
58
  # Create Gradio interface
59
  with gr.Blocks() as demo:
60
  gr.Markdown("<h1><center>YouTube Video Transcription with OpenAI's Whisper</center></h1>")
61
- gr.Markdown("<center>Enter the link of any YouTube video to get the transcription and summary.</center>")
62
 
63
  with gr.Tab('Get the transcription of any Youtube video'):
64
  with gr.Row():
65
- input_text_1 = gr.Textbox(placeholder='Enter the Youtube video URL', label='URL')
66
- output_text_1 = gr.Textbox(placeholder='Transcription of the video', label='Transcription')
 
 
 
 
 
 
67
  result_button_1 = gr.Button('Get Transcription')
68
 
69
  with gr.Tab('Summary of Youtube video'):
70
  with gr.Row():
71
- input_text = gr.Textbox(placeholder='Enter the Youtube video URL', label='URL')
72
- output_text = gr.Textbox(placeholder='Summary text of the Youtube Video', label='Summary')
 
 
 
 
 
 
73
  result_button = gr.Button('Get Summary')
74
 
75
  result_button.click(get_summary, inputs=input_text, outputs=output_text)
76
  result_button_1.click(get_text, inputs=input_text_1, outputs=output_text_1)
77
 
 
78
  demo.launch(debug=True)
 
3
  import gradio as gr
4
  from transformers import pipeline
5
  import whisper
6
+ import random
7
+ import time
8
 
9
  def get_audio(url):
10
  try:
11
+ # Configure yt-dlp options without browser cookies
12
  ydl_opts = {
13
+ 'format': 'bestaudio/best',
14
  'postprocessors': [{
15
  'key': 'FFmpegExtractAudio',
16
  'preferredcodec': 'mp3',
17
  'preferredquality': '192',
18
  }],
19
+ 'outtmpl': 'audio_download.%(ext)s',
20
+ 'quiet': True,
21
+ 'no_warnings': True,
22
+ # Add basic user agent
23
+ 'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36',
24
+ # Add other options to help avoid restrictions
25
+ 'extractor_args': {'youtube': {
26
+ 'player_client': ['android', 'web'],
27
+ 'skip': ['dash', 'hls']
28
+ }},
29
+ # Add network options
30
+ 'socket_timeout': 30,
31
+ 'retries': 3,
32
  }
33
 
34
+ # Add small delay to avoid rate limiting
35
+ time.sleep(random.uniform(1, 2))
36
+
37
  # Download the audio
38
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
39
+ info = ydl.extract_info(url, download=False)
40
+ duration = info.get('duration', 0)
41
+
42
+ # Check video duration (optional)
43
+ if duration > 1800: # 30 minutes
44
+ raise gr.Error("Video is too long. Please use videos under 30 minutes.")
45
+
46
  ydl.download([url])
47
 
48
+ return 'audio_download.mp3'
49
 
50
  except Exception as e:
51
+ if 'Sign in to confirm' in str(e):
52
+ raise gr.Error("This video requires age verification. Please try a different video.")
53
+ elif 'Private video' in str(e):
54
+ raise gr.Error("This video is private. Please try a public video.")
55
+ elif 'Video unavailable' in str(e):
56
+ raise gr.Error("This video is unavailable. Please check the URL and try again.")
57
+ else:
58
+ raise gr.Error(f"Error downloading audio: {str(e)}")
59
 
60
  # Load models
61
  model = whisper.load_model("base")
 
63
 
64
  def get_text(url):
65
  try:
66
+ # Validate URL
67
+ if not url.startswith('https://www.youtube.com/') and not url.startswith('https://youtu.be/'):
68
+ raise gr.Error("Please enter a valid YouTube URL")
69
+
70
  audio_file = get_audio(url)
71
  result = model.transcribe(audio_file)
72
 
 
83
  def get_summary(url):
84
  try:
85
  article = get_text(url)
86
+ if isinstance(article, str) and article.startswith("Error:"):
87
+ return article
88
+
89
+ # Handle empty or short text
90
+ if not article or len(article.split()) < 30:
91
+ return "Text too short to summarize. Please try a longer video."
92
+
93
+ # Split long text into chunks
94
+ max_chunk_length = 1000
95
+ chunks = [article[i:i+max_chunk_length] for i in range(0, len(article), max_chunk_length)]
96
+ summaries = []
97
+
98
+ for chunk in chunks:
99
+ summary = summarizer(chunk, max_length=130, min_length=30, do_sample=False)
100
+ summaries.append(summary[0]['summary_text'])
101
+
102
+ return " ".join(summaries)
103
  except Exception as e:
104
  return f"Error: {str(e)}"
105
 
106
  # Create Gradio interface
107
  with gr.Blocks() as demo:
108
  gr.Markdown("<h1><center>YouTube Video Transcription with OpenAI's Whisper</center></h1>")
109
+ gr.Markdown("<center>Enter the link of any YouTube video to get the transcription and summary. Please use videos under 30 minutes in length.</center>")
110
 
111
  with gr.Tab('Get the transcription of any Youtube video'):
112
  with gr.Row():
113
+ input_text_1 = gr.Textbox(
114
+ placeholder='Enter the Youtube video URL (e.g., https://www.youtube.com/watch?v=...)',
115
+ label='URL'
116
+ )
117
+ output_text_1 = gr.Textbox(
118
+ placeholder='Transcription of the video',
119
+ label='Transcription'
120
+ )
121
  result_button_1 = gr.Button('Get Transcription')
122
 
123
  with gr.Tab('Summary of Youtube video'):
124
  with gr.Row():
125
+ input_text = gr.Textbox(
126
+ placeholder='Enter the Youtube video URL (e.g., https://www.youtube.com/watch?v=...)',
127
+ label='URL'
128
+ )
129
+ output_text = gr.Textbox(
130
+ placeholder='Summary text of the Youtube Video',
131
+ label='Summary'
132
+ )
133
  result_button = gr.Button('Get Summary')
134
 
135
  result_button.click(get_summary, inputs=input_text, outputs=output_text)
136
  result_button_1.click(get_text, inputs=input_text_1, outputs=output_text_1)
137
 
138
+ # Launch with appropriate settings
139
  demo.launch(debug=True)