AnalysisWithMSR commited on
Commit
655b975
·
verified ·
1 Parent(s): b12cec2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -36
app.py CHANGED
@@ -13,17 +13,19 @@ from urllib.parse import urlparse, parse_qs
13
  import os
14
  import gradio as gr
15
 
16
- # Ensure your API keys are set as environment variables
17
  youtube_api_key = os.getenv("YOUTUBE_API_KEY")
18
  openai_api_key = os.getenv("OPENAI_API_KEY")
19
  openai.api_key = openai_api_key
20
 
 
21
  if not youtube_api_key:
22
  raise ValueError("YOUTUBE_API_KEY is not set. Please set it as an environment variable.")
23
 
24
  if not openai_api_key:
25
  raise ValueError("OPENAI_API_KEY is not set. Please set it as an environment variable.")
26
 
 
27
  def extract_video_id(url):
28
  """Extracts the video ID from a YouTube URL."""
29
  try:
@@ -36,8 +38,10 @@ def extract_video_id(url):
36
  else:
37
  return None
38
  except Exception as e:
 
39
  return None
40
 
 
41
  def get_video_duration(video_id, api_key):
42
  """Fetches the video duration in minutes."""
43
  try:
@@ -54,8 +58,10 @@ def get_video_duration(video_id, api_key):
54
  else:
55
  return None
56
  except Exception as e:
 
57
  return None
58
 
 
59
  def download_and_transcribe_with_whisper(youtube_url):
60
  """Downloads audio from YouTube and transcribes it using Whisper."""
61
  try:
@@ -71,63 +77,57 @@ def download_and_transcribe_with_whisper(youtube_url):
71
  'preferredquality': '192',
72
  }],
73
  }
74
-
75
- # Download audio using yt-dlp
76
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
77
  ydl.download([youtube_url])
78
-
79
- # Convert to wav for Whisper
80
  audio = AudioSegment.from_file(temp_audio_file)
81
  wav_file = os.path.join(temp_dir, "audio.wav")
82
  audio.export(wav_file, format="wav")
83
-
84
- # Run Whisper transcription
85
  model = whisper.load_model("large")
86
  result = model.transcribe(wav_file)
87
- transcript = result['text']
88
- return transcript
89
-
90
  except Exception as e:
 
91
  return None
92
 
 
93
  def get_transcript_from_youtube_api(video_id, video_length):
94
  """Fetches transcript using YouTube API if available."""
95
  try:
96
  transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
97
-
98
  for transcript in transcript_list:
99
  if not transcript.is_generated:
100
  segments = transcript.fetch()
101
  return " ".join(segment['text'] for segment in segments)
102
-
103
- if video_length > 15:
104
  auto_transcript = transcript_list.find_generated_transcript(['en'])
105
  if auto_transcript:
106
  segments = auto_transcript.fetch()
107
  return " ".join(segment['text'] for segment in segments)
108
-
109
  return None
110
-
111
  except Exception as e:
 
112
  return None
113
 
 
114
  def get_transcript(youtube_url):
115
- """Gets transcript from YouTube API or Whisper if unavailable."""
116
  video_id = extract_video_id(youtube_url)
117
  if not video_id:
118
- return "Invalid or unsupported YouTube URL."
119
-
120
  video_length = get_video_duration(video_id, youtube_api_key)
121
  if video_length is not None:
122
  transcript = get_transcript_from_youtube_api(video_id, video_length)
123
  if transcript:
124
  return transcript
125
  return download_and_transcribe_with_whisper(youtube_url)
126
- else:
127
- return "Error fetching video duration."
128
 
129
- def summarize_text_huggingface(text):
130
- """Summarizes text using a Hugging Face summarization model."""
 
131
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=0 if torch.cuda.is_available() else -1)
132
  max_input_length = 1024
133
  chunk_overlap = 100
@@ -141,8 +141,9 @@ def summarize_text_huggingface(text):
141
  ]
142
  return " ".join(summaries)
143
 
144
- def generate_optimized_content(summarized_transcript):
145
- """Generates optimized video metadata using OpenAI's GPT model."""
 
146
  prompt = f"""
147
  Analyze the following summarized YouTube video transcript and:
148
  1. Extract the top 10 keywords.
@@ -151,9 +152,9 @@ def generate_optimized_content(summarized_transcript):
151
  4. Generate related tags for the video.
152
 
153
  Summarized Transcript:
154
- {summarized_transcript}
155
 
156
- Provide the results in the following JSON format:
157
  {{
158
  "keywords": ["keyword1", "keyword2", ..., "keyword10"],
159
  "title": "Generated Title",
@@ -161,35 +162,35 @@ def generate_optimized_content(summarized_transcript):
161
  "tags": ["tag1", "tag2", ..., "tag10"]
162
  }}
163
  """
164
-
165
  try:
166
- response = openai.chat.completions.create(
167
  model="gpt-3.5-turbo",
168
  messages=[
169
- {"role": "system", "content": "You are a helpful assistant."},
170
  {"role": "user", "content": prompt}
171
  ]
172
  )
173
- return json.loads(response.choices[0].message.content)
174
  except Exception as e:
175
  return {"error": str(e)}
176
 
 
177
  def process_video(youtube_url):
178
- """Processes a YouTube URL to generate optimized metadata."""
179
  transcript = get_transcript(youtube_url)
180
  if not transcript:
181
- return {"error": "Could not fetch the transcript. Please try another video."}
 
 
182
 
183
- summary = summarize_text_huggingface(transcript)
184
- optimized_content = generate_optimized_content(summary)
185
- return optimized_content
186
 
 
187
  iface = gr.Interface(
188
  fn=process_video,
189
  inputs=gr.Textbox(label="Enter a YouTube video URL"),
190
  outputs=gr.JSON(label="Optimized Content"),
191
  title="YouTube Video Optimization Tool",
192
- description="Enter a YouTube URL to generate optimized titles, descriptions, and tags."
193
  )
194
 
195
  if __name__ == "__main__":
 
13
  import os
14
  import gradio as gr
15
 
16
+ # Set up API keys (ensure these are provided as environment variables)
17
  youtube_api_key = os.getenv("YOUTUBE_API_KEY")
18
  openai_api_key = os.getenv("OPENAI_API_KEY")
19
  openai.api_key = openai_api_key
20
 
21
+ # Validate API keys
22
  if not youtube_api_key:
23
  raise ValueError("YOUTUBE_API_KEY is not set. Please set it as an environment variable.")
24
 
25
  if not openai_api_key:
26
  raise ValueError("OPENAI_API_KEY is not set. Please set it as an environment variable.")
27
 
28
+
29
  def extract_video_id(url):
30
  """Extracts the video ID from a YouTube URL."""
31
  try:
 
38
  else:
39
  return None
40
  except Exception as e:
41
+ print(f"Error parsing URL: {e}")
42
  return None
43
 
44
+
45
  def get_video_duration(video_id, api_key):
46
  """Fetches the video duration in minutes."""
47
  try:
 
58
  else:
59
  return None
60
  except Exception as e:
61
+ print(f"Error fetching video duration: {e}")
62
  return None
63
 
64
+
65
  def download_and_transcribe_with_whisper(youtube_url):
66
  """Downloads audio from YouTube and transcribes it using Whisper."""
67
  try:
 
77
  'preferredquality': '192',
78
  }],
79
  }
80
+ # Download audio
 
81
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
82
  ydl.download([youtube_url])
83
+ # Convert to WAV
 
84
  audio = AudioSegment.from_file(temp_audio_file)
85
  wav_file = os.path.join(temp_dir, "audio.wav")
86
  audio.export(wav_file, format="wav")
87
+ # Transcribe using Whisper
 
88
  model = whisper.load_model("large")
89
  result = model.transcribe(wav_file)
90
+ return result['text']
 
 
91
  except Exception as e:
92
+ print(f"Error during transcription: {e}")
93
  return None
94
 
95
+
96
  def get_transcript_from_youtube_api(video_id, video_length):
97
  """Fetches transcript using YouTube API if available."""
98
  try:
99
  transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
 
100
  for transcript in transcript_list:
101
  if not transcript.is_generated:
102
  segments = transcript.fetch()
103
  return " ".join(segment['text'] for segment in segments)
104
+ if video_length > 15: # Use generated transcript for longer videos
 
105
  auto_transcript = transcript_list.find_generated_transcript(['en'])
106
  if auto_transcript:
107
  segments = auto_transcript.fetch()
108
  return " ".join(segment['text'] for segment in segments)
 
109
  return None
 
110
  except Exception as e:
111
+ print(f"Error fetching transcript: {e}")
112
  return None
113
 
114
+
115
  def get_transcript(youtube_url):
116
+ """Gets transcript using YouTube API or Whisper."""
117
  video_id = extract_video_id(youtube_url)
118
  if not video_id:
119
+ return "Invalid YouTube URL."
 
120
  video_length = get_video_duration(video_id, youtube_api_key)
121
  if video_length is not None:
122
  transcript = get_transcript_from_youtube_api(video_id, video_length)
123
  if transcript:
124
  return transcript
125
  return download_and_transcribe_with_whisper(youtube_url)
126
+ return "Error fetching video duration."
 
127
 
128
+
129
+ def summarize_text(text):
130
+ """Summarizes text using Hugging Face pipeline."""
131
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=0 if torch.cuda.is_available() else -1)
132
  max_input_length = 1024
133
  chunk_overlap = 100
 
141
  ]
142
  return " ".join(summaries)
143
 
144
+
145
+ def generate_optimized_content(summary):
146
+ """Generates optimized content using OpenAI GPT."""
147
  prompt = f"""
148
  Analyze the following summarized YouTube video transcript and:
149
  1. Extract the top 10 keywords.
 
152
  4. Generate related tags for the video.
153
 
154
  Summarized Transcript:
155
+ {summary}
156
 
157
+ Provide the results in JSON format:
158
  {{
159
  "keywords": ["keyword1", "keyword2", ..., "keyword10"],
160
  "title": "Generated Title",
 
162
  "tags": ["tag1", "tag2", ..., "tag10"]
163
  }}
164
  """
 
165
  try:
166
+ response = openai.ChatCompletion.create(
167
  model="gpt-3.5-turbo",
168
  messages=[
169
+ {"role": "system", "content": "You are an SEO expert."},
170
  {"role": "user", "content": prompt}
171
  ]
172
  )
173
+ return json.loads(response['choices'][0]['message']['content'])
174
  except Exception as e:
175
  return {"error": str(e)}
176
 
177
+
178
  def process_video(youtube_url):
179
+ """Processes video and returns optimized metadata."""
180
  transcript = get_transcript(youtube_url)
181
  if not transcript:
182
+ return {"error": "Could not fetch the transcript."}
183
+ summary = summarize_text(transcript)
184
+ return generate_optimized_content(summary)
185
 
 
 
 
186
 
187
+ # Gradio Interface
188
  iface = gr.Interface(
189
  fn=process_video,
190
  inputs=gr.Textbox(label="Enter a YouTube video URL"),
191
  outputs=gr.JSON(label="Optimized Content"),
192
  title="YouTube Video Optimization Tool",
193
+ description="Enter a YouTube URL to generate SEO-optimized titles, descriptions, and tags."
194
  )
195
 
196
  if __name__ == "__main__":