seawolf2357 commited on
Commit
f53f3e8
β€’
1 Parent(s): 9a1f06d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -5
app.py CHANGED
@@ -13,7 +13,9 @@ from youtube_transcript_api.formatters import TextFormatter
13
  from dotenv import load_dotenv
14
  from pytube import YouTube
15
  import whisper
16
-
 
 
17
  # ν™˜κ²½ λ³€μˆ˜ λ‘œλ“œ
18
  load_dotenv()
19
 
@@ -163,6 +165,8 @@ async def get_best_available_transcript(video_id, max_retries=5, delay=10):
163
 
164
  return None, None
165
 
 
 
166
  async def generate_whisper_transcript(video_id):
167
  try:
168
  # YouTube λΉ„λ””μ˜€ λ‹€μš΄λ‘œλ“œ
@@ -170,17 +174,29 @@ async def generate_whisper_transcript(video_id):
170
  audio_stream = yt.streams.filter(only_audio=True).first()
171
  audio_file = audio_stream.download(output_path='temp', filename=f'{video_id}.mp3')
172
 
173
- # Whisper λͺ¨λΈμ„ μ‚¬μš©ν•˜μ—¬ μŒμ„±μ„ ν…μŠ€νŠΈλ‘œ λ³€ν™˜
174
- with open(audio_file, "rb") as f:
175
- response = whisper_client.audio_transcription(audio=f, model="openai/whisper-large-v3")
 
 
 
 
 
 
 
 
 
 
 
176
 
177
  # μž„μ‹œ 파일 μ‚­μ œ
178
  os.remove(audio_file)
179
 
180
- return response['text']
181
  except Exception as e:
182
  logging.error(f'Whisper μžλ§‰ 생성 μ‹€νŒ¨: {e}')
183
  return None
 
184
 
185
  async def get_video_comments(video_id):
186
  comments = []
 
13
  from dotenv import load_dotenv
14
  from pytube import YouTube
15
  import whisper
16
+ import torch
17
+ from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
18
+ import librosa
19
  # ν™˜κ²½ λ³€μˆ˜ λ‘œλ“œ
20
  load_dotenv()
21
 
 
165
 
166
  return None, None
167
 
168
+
169
+
170
  async def generate_whisper_transcript(video_id):
171
  try:
172
  # YouTube λΉ„λ””μ˜€ λ‹€μš΄λ‘œλ“œ
 
174
  audio_stream = yt.streams.filter(only_audio=True).first()
175
  audio_file = audio_stream.download(output_path='temp', filename=f'{video_id}.mp3')
176
 
177
+ # μ˜€λ””μ˜€ 파일 λ‘œλ“œ
178
+ audio, sr = librosa.load(audio_file, sr=16000)
179
+
180
+ # Whisper λͺ¨λΈ 및 ν”„λ‘œμ„Έμ„œ λ‘œλ“œ
181
+ device = "cuda" if torch.cuda.is_available() else "cpu"
182
+ processor = AutoProcessor.from_pretrained("openai/whisper-large-v3")
183
+ model = AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-large-v3").to(device)
184
+
185
+ # μ˜€λ””μ˜€ 처리
186
+ input_features = processor(audio, sampling_rate=sr, return_tensors="pt").input_features.to(device)
187
+
188
+ # 생성
189
+ predicted_ids = model.generate(input_features)
190
+ transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
191
 
192
  # μž„μ‹œ 파일 μ‚­μ œ
193
  os.remove(audio_file)
194
 
195
+ return transcription[0]
196
  except Exception as e:
197
  logging.error(f'Whisper μžλ§‰ 생성 μ‹€νŒ¨: {e}')
198
  return None
199
+
200
 
201
  async def get_video_comments(video_id):
202
  comments = []