AnalysisWithMSR commited on
Commit
e3eb307
·
verified ·
1 Parent(s): 3833cc4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -14
app.py CHANGED
@@ -10,6 +10,7 @@ from youtube_transcript_api import YouTubeTranscriptApi
10
  import openai
11
  import json
12
  import os
 
13
  from pytrends.request import TrendReq
14
  import torch
15
  from urllib.parse import urlparse, parse_qs
@@ -54,21 +55,16 @@ def get_video_duration(video_id, api_key):
54
 
55
  def download_and_transcribe_with_whisper(youtube_url):
56
  try:
 
57
  with tempfile.TemporaryDirectory() as temp_dir:
58
- temp_audio_file = os.path.join(temp_dir, "audio.mp3")
59
-
60
- ydl_opts = {
61
- 'format': 'bestaudio/best',
62
- 'outtmpl': temp_audio_file,
63
- 'extractaudio': True,
64
- 'audioquality': 1,
65
- }
66
-
67
- # Download audio using yt-dlp
68
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
69
- ydl.download([youtube_url])
70
-
71
- # Convert to wav for Whisper
72
  audio = AudioSegment.from_file(temp_audio_file)
73
  wav_file = os.path.join(temp_dir, "audio.wav")
74
  audio.export(wav_file, format="wav")
 
10
  import openai
11
  import json
12
  import os
13
+ from pytube import YouTube
14
  from pytrends.request import TrendReq
15
  import torch
16
  from urllib.parse import urlparse, parse_qs
 
55
 
56
  def download_and_transcribe_with_whisper(youtube_url):
57
  try:
58
+ # Temporary directory for storing the downloaded audio
59
  with tempfile.TemporaryDirectory() as temp_dir:
60
+ temp_audio_file = os.path.join(temp_dir, "audio.mp4") # Pytube downloads in mp4 format
61
+
62
+ # Download audio using pytube
63
+ yt = YouTube(youtube_url)
64
+ audio_stream = yt.streams.filter(only_audio=True).first() # Get the first available audio stream
65
+ audio_stream.download(output_path=temp_dir, filename="audio.mp4") # Download audio to temp dir
66
+
67
+ # Convert the downloaded audio (mp4) to wav for Whisper
 
 
 
 
 
 
68
  audio = AudioSegment.from_file(temp_audio_file)
69
  wav_file = os.path.join(temp_dir, "audio.wav")
70
  audio.export(wav_file, format="wav")