tomiwa1a commited on
Commit
f376027
1 Parent(s): 91f436b

fixed error in verbose and time transcription

Browse files
Files changed (1) hide show
  1. handler.py +13 -2
handler.py CHANGED
@@ -3,6 +3,7 @@ from transformers.pipelines.audio_utils import ffmpeg_read
3
  import whisper
4
  import torch
5
  import pytube
 
6
 
7
 
8
  class EndpointHandler():
@@ -13,7 +14,12 @@ class EndpointHandler():
13
  device = "cuda" if torch.cuda.is_available() else "cpu"
14
  print(f'whisper will use: {device}')
15
 
 
16
  whisper_model = whisper.load_model(MODEL_NAME).to(device)
 
 
 
 
17
 
18
 
19
  def __call__(self, data: Dict[str, bytes]) -> Dict[str, str]:
@@ -33,14 +39,19 @@ class EndpointHandler():
33
  # Realized this by running in verbose mode and seeing how much time
34
  # was spent on the decoding language step
35
  "language":"en",
36
- verbose: True
37
  }
38
  yt = pt.YouTube(video_url)
39
  stream = yt.streams.filter(only_audio=True)[0]
40
  path_to_audio = f"{yt.video_id}.mp3"
41
  stream.download(filename=path_to_audio)
42
-
43
  transcript = self.model.transcribe(path_to_audio, **decode_options)
 
 
 
 
 
44
 
45
  # postprocess the prediction
46
  return {"transcript": transcript}
 
3
  import whisper
4
  import torch
5
  import pytube
6
+ import time
7
 
8
 
9
  class EndpointHandler():
 
14
  device = "cuda" if torch.cuda.is_available() else "cpu"
15
  print(f'whisper will use: {device}')
16
 
17
+ t0 = time.time()
18
  whisper_model = whisper.load_model(MODEL_NAME).to(device)
19
+ t1 = time.time()
20
+
21
+ total = t1-t0
22
+ print(f'Finished loading model in {total} seconds')
23
 
24
 
25
  def __call__(self, data: Dict[str, bytes]) -> Dict[str, str]:
 
39
  # Realized this by running in verbose mode and seeing how much time
40
  # was spent on the decoding language step
41
  "language":"en",
42
+ "verbose": True
43
  }
44
  yt = pt.YouTube(video_url)
45
  stream = yt.streams.filter(only_audio=True)[0]
46
  path_to_audio = f"{yt.video_id}.mp3"
47
  stream.download(filename=path_to_audio)
48
+ t0 = time.time()
49
  transcript = self.model.transcribe(path_to_audio, **decode_options)
50
+ t1 = time.time()
51
+
52
+ total = t1-t0
53
+ print(f'Finished transcription in {total} seconds')
54
+
55
 
56
  # postprocess the prediction
57
  return {"transcript": transcript}