Spaces:

AnalysisWithMSR
/

SEO

Running

App Files Files Community

AnalysisWithMSR commited on Dec 14, 2024

Commit

655b975

verified ·

1 Parent(s): b12cec2

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -36

app.py CHANGED Viewed

@@ -13,17 +13,19 @@ from urllib.parse import urlparse, parse_qs
 import os
 import gradio as gr
-# Ensure your API keys are set as environment variables
 youtube_api_key = os.getenv("YOUTUBE_API_KEY")
 openai_api_key = os.getenv("OPENAI_API_KEY")
 openai.api_key = openai_api_key
 if not youtube_api_key:
     raise ValueError("YOUTUBE_API_KEY is not set. Please set it as an environment variable.")
 if not openai_api_key:
     raise ValueError("OPENAI_API_KEY is not set. Please set it as an environment variable.")
 def extract_video_id(url):
     """Extracts the video ID from a YouTube URL."""
     try:
@@ -36,8 +38,10 @@ def extract_video_id(url):
         else:
             return None
     except Exception as e:
         return None
 def get_video_duration(video_id, api_key):
     """Fetches the video duration in minutes."""
     try:
@@ -54,8 +58,10 @@ def get_video_duration(video_id, api_key):
         else:
             return None
     except Exception as e:
         return None
 def download_and_transcribe_with_whisper(youtube_url):
     """Downloads audio from YouTube and transcribes it using Whisper."""
     try:
@@ -71,63 +77,57 @@ def download_and_transcribe_with_whisper(youtube_url):
                     'preferredquality': '192',
                 }],
             }
-            # Download audio using yt-dlp
             with yt_dlp.YoutubeDL(ydl_opts) as ydl:
                 ydl.download([youtube_url])
-            # Convert to wav for Whisper
             audio = AudioSegment.from_file(temp_audio_file)
             wav_file = os.path.join(temp_dir, "audio.wav")
             audio.export(wav_file, format="wav")
-            # Run Whisper transcription
             model = whisper.load_model("large")
             result = model.transcribe(wav_file)
-            transcript = result['text']
-            return transcript
     except Exception as e:
         return None
 def get_transcript_from_youtube_api(video_id, video_length):
     """Fetches transcript using YouTube API if available."""
     try:
         transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
         for transcript in transcript_list:
             if not transcript.is_generated:
                 segments = transcript.fetch()
                 return " ".join(segment['text'] for segment in segments)
-        if video_length > 15:
             auto_transcript = transcript_list.find_generated_transcript(['en'])
             if auto_transcript:
                 segments = auto_transcript.fetch()
                 return " ".join(segment['text'] for segment in segments)
         return None
     except Exception as e:
         return None
 def get_transcript(youtube_url):
-    """Gets transcript from YouTube API or Whisper if unavailable."""
     video_id = extract_video_id(youtube_url)
     if not video_id:
-        return "Invalid or unsupported YouTube URL."
     video_length = get_video_duration(video_id, youtube_api_key)
     if video_length is not None:
         transcript = get_transcript_from_youtube_api(video_id, video_length)
         if transcript:
             return transcript
         return download_and_transcribe_with_whisper(youtube_url)
-    else:
-        return "Error fetching video duration."
-def summarize_text_huggingface(text):
-    """Summarizes text using a Hugging Face summarization model."""
     summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=0 if torch.cuda.is_available() else -1)
     max_input_length = 1024
     chunk_overlap = 100
@@ -141,8 +141,9 @@ def summarize_text_huggingface(text):
     ]
     return " ".join(summaries)
-def generate_optimized_content(summarized_transcript):
-    """Generates optimized video metadata using OpenAI's GPT model."""
     prompt = f"""
     Analyze the following summarized YouTube video transcript and:
     1. Extract the top 10 keywords.
@@ -151,9 +152,9 @@ def generate_optimized_content(summarized_transcript):
     4. Generate related tags for the video.
     Summarized Transcript:
-    {summarized_transcript}
-    Provide the results in the following JSON format:
     {{
         "keywords": ["keyword1", "keyword2", ..., "keyword10"],
         "title": "Generated Title",
@@ -161,35 +162,35 @@ def generate_optimized_content(summarized_transcript):
         "tags": ["tag1", "tag2", ..., "tag10"]
     }}
     """
     try:
-        response = openai.chat.completions.create(
             model="gpt-3.5-turbo",
             messages=[
-                {"role": "system", "content": "You are a helpful assistant."},
                 {"role": "user", "content": prompt}
             ]
         )
-        return json.loads(response.choices[0].message.content)
     except Exception as e:
         return {"error": str(e)}
 def process_video(youtube_url):
-    """Processes a YouTube URL to generate optimized metadata."""
     transcript = get_transcript(youtube_url)
     if not transcript:
-        return {"error": "Could not fetch the transcript. Please try another video."}
-    summary = summarize_text_huggingface(transcript)
-    optimized_content = generate_optimized_content(summary)
-    return optimized_content
 iface = gr.Interface(
     fn=process_video,
     inputs=gr.Textbox(label="Enter a YouTube video URL"),
     outputs=gr.JSON(label="Optimized Content"),
     title="YouTube Video Optimization Tool",
-    description="Enter a YouTube URL to generate optimized titles, descriptions, and tags."
 )
 if __name__ == "__main__":

 import os
 import gradio as gr
+# Set up API keys (ensure these are provided as environment variables)
 youtube_api_key = os.getenv("YOUTUBE_API_KEY")
 openai_api_key = os.getenv("OPENAI_API_KEY")
 openai.api_key = openai_api_key
+# Validate API keys
 if not youtube_api_key:
     raise ValueError("YOUTUBE_API_KEY is not set. Please set it as an environment variable.")
 if not openai_api_key:
     raise ValueError("OPENAI_API_KEY is not set. Please set it as an environment variable.")
 def extract_video_id(url):
     """Extracts the video ID from a YouTube URL."""
     try:
         else:
             return None
     except Exception as e:
+        print(f"Error parsing URL: {e}")
         return None
 def get_video_duration(video_id, api_key):
     """Fetches the video duration in minutes."""
     try:
         else:
             return None
     except Exception as e:
+        print(f"Error fetching video duration: {e}")
         return None
 def download_and_transcribe_with_whisper(youtube_url):
     """Downloads audio from YouTube and transcribes it using Whisper."""
     try:
                     'preferredquality': '192',
                 }],
             }
+            # Download audio
             with yt_dlp.YoutubeDL(ydl_opts) as ydl:
                 ydl.download([youtube_url])
+            # Convert to WAV
             audio = AudioSegment.from_file(temp_audio_file)
             wav_file = os.path.join(temp_dir, "audio.wav")
             audio.export(wav_file, format="wav")
+            # Transcribe using Whisper
             model = whisper.load_model("large")
             result = model.transcribe(wav_file)
+            return result['text']
     except Exception as e:
+        print(f"Error during transcription: {e}")
         return None
 def get_transcript_from_youtube_api(video_id, video_length):
     """Fetches transcript using YouTube API if available."""
     try:
         transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
         for transcript in transcript_list:
             if not transcript.is_generated:
                 segments = transcript.fetch()
                 return " ".join(segment['text'] for segment in segments)
+        if video_length > 15:  # Use generated transcript for longer videos
             auto_transcript = transcript_list.find_generated_transcript(['en'])
             if auto_transcript:
                 segments = auto_transcript.fetch()
                 return " ".join(segment['text'] for segment in segments)
         return None
     except Exception as e:
+        print(f"Error fetching transcript: {e}")
         return None
 def get_transcript(youtube_url):
+    """Gets transcript using YouTube API or Whisper."""
     video_id = extract_video_id(youtube_url)
     if not video_id:
+        return "Invalid YouTube URL."
     video_length = get_video_duration(video_id, youtube_api_key)
     if video_length is not None:
         transcript = get_transcript_from_youtube_api(video_id, video_length)
         if transcript:
             return transcript
         return download_and_transcribe_with_whisper(youtube_url)
+    return "Error fetching video duration."
+def summarize_text(text):
+    """Summarizes text using Hugging Face pipeline."""
     summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=0 if torch.cuda.is_available() else -1)
     max_input_length = 1024
     chunk_overlap = 100
     ]
     return " ".join(summaries)
+def generate_optimized_content(summary):
+    """Generates optimized content using OpenAI GPT."""
     prompt = f"""
     Analyze the following summarized YouTube video transcript and:
     1. Extract the top 10 keywords.
     4. Generate related tags for the video.
     Summarized Transcript:
+    {summary}
+    Provide the results in JSON format:
     {{
         "keywords": ["keyword1", "keyword2", ..., "keyword10"],
         "title": "Generated Title",
         "tags": ["tag1", "tag2", ..., "tag10"]
     }}
     """
     try:
+        response = openai.ChatCompletion.create(
             model="gpt-3.5-turbo",
             messages=[
+                {"role": "system", "content": "You are an SEO expert."},
                 {"role": "user", "content": prompt}
             ]
         )
+        return json.loads(response['choices'][0]['message']['content'])
     except Exception as e:
         return {"error": str(e)}
 def process_video(youtube_url):
+    """Processes video and returns optimized metadata."""
     transcript = get_transcript(youtube_url)
     if not transcript:
+        return {"error": "Could not fetch the transcript."}
+    summary = summarize_text(transcript)
+    return generate_optimized_content(summary)
+# Gradio Interface
 iface = gr.Interface(
     fn=process_video,
     inputs=gr.Textbox(label="Enter a YouTube video URL"),
     outputs=gr.JSON(label="Optimized Content"),
     title="YouTube Video Optimization Tool",
+    description="Enter a YouTube URL to generate SEO-optimized titles, descriptions, and tags."
 )
 if __name__ == "__main__":