seawolf2357
commited on
Commit
β’
f53f3e8
1
Parent(s):
9a1f06d
Update app.py
Browse files
app.py
CHANGED
@@ -13,7 +13,9 @@ from youtube_transcript_api.formatters import TextFormatter
|
|
13 |
from dotenv import load_dotenv
|
14 |
from pytube import YouTube
|
15 |
import whisper
|
16 |
-
|
|
|
|
|
17 |
# νκ²½ λ³μ λ‘λ
|
18 |
load_dotenv()
|
19 |
|
@@ -163,6 +165,8 @@ async def get_best_available_transcript(video_id, max_retries=5, delay=10):
|
|
163 |
|
164 |
return None, None
|
165 |
|
|
|
|
|
166 |
async def generate_whisper_transcript(video_id):
|
167 |
try:
|
168 |
# YouTube λΉλμ€ λ€μ΄λ‘λ
|
@@ -170,17 +174,29 @@ async def generate_whisper_transcript(video_id):
|
|
170 |
audio_stream = yt.streams.filter(only_audio=True).first()
|
171 |
audio_file = audio_stream.download(output_path='temp', filename=f'{video_id}.mp3')
|
172 |
|
173 |
-
#
|
174 |
-
|
175 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
176 |
|
177 |
# μμ νμΌ μμ
|
178 |
os.remove(audio_file)
|
179 |
|
180 |
-
return
|
181 |
except Exception as e:
|
182 |
logging.error(f'Whisper μλ§ μμ± μ€ν¨: {e}')
|
183 |
return None
|
|
|
184 |
|
185 |
async def get_video_comments(video_id):
|
186 |
comments = []
|
|
|
13 |
from dotenv import load_dotenv
|
14 |
from pytube import YouTube
|
15 |
import whisper
|
16 |
+
import torch
|
17 |
+
from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
|
18 |
+
import librosa
|
19 |
# νκ²½ λ³μ λ‘λ
|
20 |
load_dotenv()
|
21 |
|
|
|
165 |
|
166 |
return None, None
|
167 |
|
168 |
+
|
169 |
+
|
170 |
async def generate_whisper_transcript(video_id):
|
171 |
try:
|
172 |
# YouTube λΉλμ€ λ€μ΄λ‘λ
|
|
|
174 |
audio_stream = yt.streams.filter(only_audio=True).first()
|
175 |
audio_file = audio_stream.download(output_path='temp', filename=f'{video_id}.mp3')
|
176 |
|
177 |
+
# μ€λμ€ νμΌ λ‘λ
|
178 |
+
audio, sr = librosa.load(audio_file, sr=16000)
|
179 |
+
|
180 |
+
# Whisper λͺ¨λΈ λ° νλ‘μΈμ λ‘λ
|
181 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
182 |
+
processor = AutoProcessor.from_pretrained("openai/whisper-large-v3")
|
183 |
+
model = AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-large-v3").to(device)
|
184 |
+
|
185 |
+
# μ€λμ€ μ²λ¦¬
|
186 |
+
input_features = processor(audio, sampling_rate=sr, return_tensors="pt").input_features.to(device)
|
187 |
+
|
188 |
+
# μμ±
|
189 |
+
predicted_ids = model.generate(input_features)
|
190 |
+
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
|
191 |
|
192 |
# μμ νμΌ μμ
|
193 |
os.remove(audio_file)
|
194 |
|
195 |
+
return transcription[0]
|
196 |
except Exception as e:
|
197 |
logging.error(f'Whisper μλ§ μμ± μ€ν¨: {e}')
|
198 |
return None
|
199 |
+
|
200 |
|
201 |
async def get_video_comments(video_id):
|
202 |
comments = []
|