nickmuchi commited on
Commit
14eaae6
1 Parent(s): 8e77d9f

Update functions.py

Browse files
Files changed (1) hide show
  1. functions.py +133 -74
functions.py CHANGED
@@ -66,32 +66,6 @@ margin-bottom: 2.5rem">{}</div> """
66
 
67
  ###################### Functions #######################################################################################
68
 
69
- # @st.cache_data
70
- # def get_yt_audio(url):
71
- # temp_audio_file = os.path.join('output', 'audio')
72
-
73
- # ydl_opts = {
74
- # 'format': 'bestaudio/best',
75
- # 'postprocessors': [{
76
- # 'key': 'FFmpegExtractAudio',
77
- # 'preferredcodec': 'mp3',
78
- # 'preferredquality': '192',
79
- # }],
80
- # 'outtmpl': temp_audio_file,
81
- # 'quiet': True,
82
- # }
83
-
84
- # with yt_dlp.YoutubeDL(ydl_opts) as ydl:
85
-
86
- # info = ydl.extract_info(url, download=False)
87
- # title = info.get('title', None)
88
- # ydl.download([url])
89
-
90
- # #with open(temp_audio_file+'.mp3', 'rb') as file:
91
- # audio_file = os.path.join('output', 'audio.mp3')
92
-
93
- # return audio_file, title
94
-
95
  #load all required models and cache
96
  @st.cache_resource
97
  def load_models():
@@ -134,6 +108,43 @@ def get_yt_audio(url):
134
 
135
  return audio_stream, title
136
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  @st.cache_data
138
  def load_whisper_api(audio):
139
 
@@ -144,12 +155,97 @@ def load_whisper_api(audio):
144
  return transcript
145
 
146
  @st.cache_data
147
- def load_asr_model(model_name):
 
148
 
149
- '''Load the open source whisper model in cases where the API is not working'''
150
- model = whisper.load_model(model_name)
151
 
152
- return model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
 
154
  @st.cache_data
155
  def inference(link, upload, _asr_model):
@@ -161,46 +257,7 @@ def inference(link, upload, _asr_model):
161
 
162
  st.info("`Downloading YT audio...`")
163
 
164
- audio_file, title = get_yt_audio(link)
165
-
166
- print(f'audio_file:{audio_file}')
167
-
168
- st.session_state['audio'] = audio_file
169
-
170
- print(f"audio_file_session_state:{st.session_state['audio'] }")
171
-
172
- #Get size of audio file
173
- audio_size = round(os.path.getsize(st.session_state['audio'])/(1024*1024),1)
174
-
175
- #Check if file is > 24mb, if not then use Whisper API
176
- if audio_size <= 25:
177
-
178
- st.info("`Transcribing YT audio...`")
179
-
180
- #Use whisper API
181
- results = load_whisper_api(st.session_state['audio'])['text']
182
-
183
- else:
184
-
185
- st.warning('File size larger than 24mb, applying chunking and transcription',icon="⚠️")
186
-
187
- song = AudioSegment.from_file(st.session_state['audio'], format='mp4')
188
-
189
- # PyDub handles time in milliseconds
190
- twenty_minutes = 20 * 60 * 1000
191
-
192
- chunks = song[::twenty_minutes]
193
-
194
- transcriptions = []
195
-
196
- video_id = extract.video_id(link)
197
- for i, chunk in enumerate(chunks):
198
- chunk.export(f'output/chunk_{i}_{video_id}.mp4', format='mp4')
199
- transcriptions.append(load_whisper_api(f'output/chunk_{i}_{video_id}.mp4')['text'])
200
-
201
- results = ','.join(transcriptions)
202
-
203
- st.info("`YT Video transcription process complete...`")
204
 
205
  return results, title
206
 
@@ -244,12 +301,14 @@ def inference(link, upload, _asr_model):
244
 
245
  except Exception as e:
246
 
247
- st.error(f'''Whisper API Error: {e},
248
- Using Whisper module from GitHub, might take longer than expected''',icon="🚨")
 
 
249
 
250
- results = _asr_model.transcribe(st.session_state['audio'], task='transcribe', language='en')
251
 
252
- return results['text'], title
253
 
254
  @st.cache_data
255
  def clean_text(text):
 
66
 
67
  ###################### Functions #######################################################################################
68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  #load all required models and cache
70
  @st.cache_resource
71
  def load_models():
 
108
 
109
  return audio_stream, title
110
 
111
+ @st.cache_data
112
+ def get_yt_audio_dl(url):
113
+
114
+ '''Back up for when pytube is down'''
115
+
116
+ temp_audio_file = os.path.join('output', 'audio')
117
+
118
+ ydl_opts = {
119
+ 'format': 'bestaudio/best',
120
+ 'postprocessors': [{
121
+ 'key': 'FFmpegExtractAudio',
122
+ 'preferredcodec': 'mp3',
123
+ 'preferredquality': '192',
124
+ }],
125
+ 'outtmpl': temp_audio_file,
126
+ 'quiet': True,
127
+ }
128
+
129
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
130
+
131
+ info = ydl.extract_info(url, download=False)
132
+ title = info.get('title', None)
133
+ ydl.download([url])
134
+
135
+ #with open(temp_audio_file+'.mp3', 'rb') as file:
136
+ audio_file = os.path.join('output', 'audio.mp3')
137
+
138
+ return audio_file, title
139
+
140
+ @st.cache_data
141
+ def load_asr_model(model_name):
142
+
143
+ '''Load the open source whisper model in cases where the API is not working'''
144
+ model = whisper.load_model(model_name)
145
+
146
+ return model
147
+
148
  @st.cache_data
149
  def load_whisper_api(audio):
150
 
 
155
  return transcript
156
 
157
  @st.cache_data
158
+ def transcribe_yt_video(url, py_tube=True):
159
+ '''Transcribe YouTube video'''
160
 
161
+ if py_tube:
 
162
 
163
+ audio_file, title = get_yt_audio(link)
164
+
165
+ print(f'audio_file:{audio_file}')
166
+
167
+ st.session_state['audio'] = audio_file
168
+
169
+ print(f"audio_file_session_state:{st.session_state['audio'] }")
170
+
171
+ #Get size of audio file
172
+ audio_size = round(os.path.getsize(st.session_state['audio'])/(1024*1024),1)
173
+
174
+ #Check if file is > 24mb, if not then use Whisper API
175
+ if audio_size <= 25:
176
+
177
+ st.info("`Transcribing YT audio...`")
178
+
179
+ #Use whisper API
180
+ results = load_whisper_api(st.session_state['audio'])['text']
181
+
182
+ else:
183
+
184
+ st.warning('File size larger than 24mb, applying chunking and transcription',icon="⚠️")
185
+
186
+ song = AudioSegment.from_file(st.session_state['audio'], format='mp4')
187
+
188
+ # PyDub handles time in milliseconds
189
+ twenty_minutes = 20 * 60 * 1000
190
+
191
+ chunks = song[::twenty_minutes]
192
+
193
+ transcriptions = []
194
+
195
+ video_id = extract.video_id(link)
196
+
197
+ for i, chunk in enumerate(chunks):
198
+ chunk.export(f'output/chunk_{i}_{video_id}.mp4', format='mp4')
199
+ transcriptions.append(load_whisper_api(f'output/chunk_{i}_{video_id}.mp4')['text'])
200
+
201
+ results = ','.join(transcriptions)
202
+
203
+ else:
204
+
205
+ audio_file, title = get_yt_audio_dl(link)
206
+
207
+ print(f'audio_file:{audio_file}')
208
+
209
+ st.session_state['audio'] = audio_file
210
+
211
+ print(f"audio_file_session_state:{st.session_state['audio'] }")
212
+
213
+ #Get size of audio file
214
+ audio_size = round(os.path.getsize(st.session_state['audio'])/(1024*1024),1)
215
+
216
+ #Check if file is > 24mb, if not then use Whisper API
217
+ if audio_size <= 25:
218
+
219
+ st.info("`Transcribing YT audio...`")
220
+
221
+ #Use whisper API
222
+ results = load_whisper_api(st.session_state['audio'])['text']
223
+
224
+ else:
225
+
226
+ st.warning('File size larger than 24mb, applying chunking and transcription',icon="⚠️")
227
+
228
+ song = AudioSegment.from_file(st.session_state['audio'], format='mp4')
229
+
230
+ # PyDub handles time in milliseconds
231
+ twenty_minutes = 20 * 60 * 1000
232
+
233
+ chunks = song[::twenty_minutes]
234
+
235
+ transcriptions = []
236
+
237
+ video_id = extract.video_id(link)
238
+
239
+ for i, chunk in enumerate(chunks):
240
+ chunk.export(f'output/chunk_{i}_{video_id}.mp4', format='mp4')
241
+ transcriptions.append(load_whisper_api(f'output/chunk_{i}_{video_id}.mp4')['text'])
242
+
243
+ results = ','.join(transcriptions)
244
+
245
+
246
+ st.info("`YT Video transcription process complete...`")
247
+
248
+ return results, title
249
 
250
  @st.cache_data
251
  def inference(link, upload, _asr_model):
 
257
 
258
  st.info("`Downloading YT audio...`")
259
 
260
+ results, title = transcribe_yt_video(link)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
261
 
262
  return results, title
263
 
 
301
 
302
  except Exception as e:
303
 
304
+ st.error(f'''PyTube Error: {e},
305
+ Using yt_dlp module, might take longer than expected''',icon="🚨")
306
+
307
+ results, title = transcribe_yt_video(link, py_tube=False)
308
 
309
+ # results = _asr_model.transcribe(st.session_state['audio'], task='transcribe', language='en')
310
 
311
+ return results, title
312
 
313
  @st.cache_data
314
  def clean_text(text):