JiaenLiu commited on
Commit
32b8dd4
·
1 Parent(s): 9518a49

prompt solve whisper

Browse files

Former-commit-id: 7735223b44e4f9f285459419e031e50ad816d7ec

Files changed (1) hide show
  1. pipeline.py +4 -8
pipeline.py CHANGED
@@ -5,6 +5,8 @@ import os
5
  from tqdm import tqdm
6
  from SRT import SRT_script
7
  import stable_whisper
 
 
8
  import subprocess
9
 
10
  import time
@@ -95,14 +97,7 @@ elif args.video_file is not None:
95
  audio_file= open(args.audio_file, "rb")
96
  audio_path = args.audio_file
97
  else:
98
- # escaped_video_path = args.video_file.replace('(', '\(').replace(')', '\)').replace(' ', '\ ')
99
- # print(escaped_video_path)
100
- # os.system(f'ffmpeg -i {escaped_video_path} -f mp3 -ab 192000 -vn {DOWNLOAD_PATH}/audio/{VIDEO_NAME}.mp3')
101
- # audio_file= open(f'{DOWNLOAD_PATH}/audio/{VIDEO_NAME}.mp3', "rb")
102
- # audio_path = f'{DOWNLOAD_PATH}/audio/{VIDEO_NAME}.mp3'
103
  output_audio_path = f'{DOWNLOAD_PATH}/audio/{VIDEO_NAME}.mp3'
104
- # print(video_path)
105
- # print(output_audio_path)
106
  subprocess.run(['ffmpeg', '-i', video_path, '-f', 'mp3', '-ab', '192000', '-vn', output_audio_path])
107
  audio_file = open(output_audio_path, "rb")
108
  audio_path = output_audio_path
@@ -133,7 +128,7 @@ else:
133
 
134
  # use stable-whisper
135
  model = stable_whisper.load_model('base')
136
- transcript = model.transcribe(audio_path, regroup = False)
137
  (
138
  transcript
139
  .split_by_punctuation(['.', '。', '?'])
@@ -143,6 +138,7 @@ else:
143
  )
144
  # transcript.to_srt_vtt(srt_file_en)
145
  transcript = transcript.to_dict()
 
146
  srt = SRT_script(transcript['segments']) # read segments to SRT class
147
 
148
  #Write SRT file
 
5
  from tqdm import tqdm
6
  from SRT import SRT_script
7
  import stable_whisper
8
+ import whisper
9
+
10
  import subprocess
11
 
12
  import time
 
97
  audio_file= open(args.audio_file, "rb")
98
  audio_path = args.audio_file
99
  else:
 
 
 
 
 
100
  output_audio_path = f'{DOWNLOAD_PATH}/audio/{VIDEO_NAME}.mp3'
 
 
101
  subprocess.run(['ffmpeg', '-i', video_path, '-f', 'mp3', '-ab', '192000', '-vn', output_audio_path])
102
  audio_file = open(output_audio_path, "rb")
103
  audio_path = output_audio_path
 
128
 
129
  # use stable-whisper
130
  model = stable_whisper.load_model('base')
131
+ transcript = model.transcribe(audio_path, regroup = False, initial_prompt="Hello, welcome to my lecture. Are you good my friend?")
132
  (
133
  transcript
134
  .split_by_punctuation(['.', '。', '?'])
 
138
  )
139
  # transcript.to_srt_vtt(srt_file_en)
140
  transcript = transcript.to_dict()
141
+ # print(transcript)
142
  srt = SRT_script(transcript['segments']) # read segments to SRT class
143
 
144
  #Write SRT file