Spaces:

AnalysisWithMSR
/

SEO

Sleeping

AnalysisWithMSR commited on Dec 14, 2024

Commit

e3eb307

verified ·

1 Parent(s): 3833cc4

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -10,6 +10,7 @@ from youtube_transcript_api import YouTubeTranscriptApi
 import openai
 import json
 import os
 from pytrends.request import TrendReq
 import torch
 from urllib.parse import urlparse, parse_qs
@@ -54,21 +55,16 @@ def get_video_duration(video_id, api_key):
 def download_and_transcribe_with_whisper(youtube_url):
     try:
         with tempfile.TemporaryDirectory() as temp_dir:
-            temp_audio_file = os.path.join(temp_dir, "audio.mp3")
-            ydl_opts = {
-                'format': 'bestaudio/best',
-                'outtmpl': temp_audio_file,
-                'extractaudio': True,
-                'audioquality': 1,
-            }
-            # Download audio using yt-dlp
-            with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-                ydl.download([youtube_url])
-            # Convert to wav for Whisper
             audio = AudioSegment.from_file(temp_audio_file)
             wav_file = os.path.join(temp_dir, "audio.wav")
             audio.export(wav_file, format="wav")

 import openai
 import json
 import os
+from pytube import YouTube
 from pytrends.request import TrendReq
 import torch
 from urllib.parse import urlparse, parse_qs
 def download_and_transcribe_with_whisper(youtube_url):
     try:
+        # Temporary directory for storing the downloaded audio
         with tempfile.TemporaryDirectory() as temp_dir:
+            temp_audio_file = os.path.join(temp_dir, "audio.mp4")  # Pytube downloads in mp4 format
+            # Download audio using pytube
+            yt = YouTube(youtube_url)
+            audio_stream = yt.streams.filter(only_audio=True).first()  # Get the first available audio stream
+            audio_stream.download(output_path=temp_dir, filename="audio.mp4")  # Download audio to temp dir
+            # Convert the downloaded audio (mp4) to wav for Whisper
             audio = AudioSegment.from_file(temp_audio_file)
             wav_file = os.path.join(temp_dir, "audio.wav")
             audio.export(wav_file, format="wav")