Spaces:

nickmuchi
/

Earnings-Call-Analysis-Whisperer

Running

App Files Files Community

nickmuchi commited on Aug 13, 2023

Commit

14eaae6

•

1 Parent(s): 8e77d9f

Update functions.py

Browse files

Files changed (1) hide show

functions.py +133 -74

functions.py CHANGED Viewed

@@ -66,32 +66,6 @@ margin-bottom: 2.5rem">{}</div> """
 ###################### Functions #######################################################################################
-# @st.cache_data
-# def get_yt_audio(url):
-#     temp_audio_file = os.path.join('output', 'audio')
-#     ydl_opts = {
-#         'format': 'bestaudio/best',
-#         'postprocessors': [{
-#             'key': 'FFmpegExtractAudio',
-#             'preferredcodec': 'mp3',
-#             'preferredquality': '192',
-#         }],
-#         'outtmpl': temp_audio_file,
-#         'quiet': True,
-#     }
-#     with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-#         info = ydl.extract_info(url, download=False)
-#         title = info.get('title', None)
-#         ydl.download([url])
-#     #with open(temp_audio_file+'.mp3', 'rb') as file:
-#     audio_file = os.path.join('output', 'audio.mp3')
-#     return audio_file, title
 #load all required models and cache
 @st.cache_resource
 def load_models():
@@ -134,6 +108,43 @@ def get_yt_audio(url):
     return audio_stream, title
 @st.cache_data
 def load_whisper_api(audio):
@@ -144,12 +155,97 @@ def load_whisper_api(audio):
     return transcript
 @st.cache_data
-def load_asr_model(model_name):
-    '''Load the open source  whisper model in cases where the API is not working'''
-    model = whisper.load_model(model_name)
-    return model
 @st.cache_data
 def inference(link, upload, _asr_model):
@@ -161,46 +257,7 @@ def inference(link, upload, _asr_model):
             st.info("`Downloading YT audio...`")
-            audio_file, title = get_yt_audio(link)
-            print(f'audio_file:{audio_file}')
-            st.session_state['audio'] = audio_file
-            print(f"audio_file_session_state:{st.session_state['audio'] }")
-            #Get size of audio file
-            audio_size = round(os.path.getsize(st.session_state['audio'])/(1024*1024),1)
-            #Check if file is > 24mb, if not then use Whisper API
-            if audio_size <= 25:
-                st.info("`Transcribing YT audio...`")
-                #Use whisper API
-                results = load_whisper_api(st.session_state['audio'])['text']
-            else:
-                st.warning('File size larger than 24mb, applying chunking and transcription',icon="⚠️")
-                song = AudioSegment.from_file(st.session_state['audio'], format='mp4')
-                # PyDub handles time in milliseconds
-                twenty_minutes = 20 * 60 * 1000
-                chunks = song[::twenty_minutes]
-                transcriptions = []
-                video_id = extract.video_id(link)
-                for i, chunk in enumerate(chunks):
-                    chunk.export(f'output/chunk_{i}_{video_id}.mp4', format='mp4')
-                    transcriptions.append(load_whisper_api(f'output/chunk_{i}_{video_id}.mp4')['text'])
-                results = ','.join(transcriptions)
-            st.info("`YT Video transcription process complete...`")
             return results, title
@@ -244,12 +301,14 @@ def inference(link, upload, _asr_model):
     except Exception as e:
-        st.error(f'''Whisper API Error: {e},
-                    Using Whisper module from GitHub, might take longer than expected''',icon="🚨")
-        results = _asr_model.transcribe(st.session_state['audio'], task='transcribe', language='en')
-        return results['text'], title
 @st.cache_data
 def clean_text(text):

 ###################### Functions #######################################################################################
 #load all required models and cache
 @st.cache_resource
 def load_models():
     return audio_stream, title
+@st.cache_data
+def get_yt_audio_dl(url):
+    '''Back up for when pytube is down'''
+    temp_audio_file = os.path.join('output', 'audio')
+    ydl_opts = {
+        'format': 'bestaudio/best',
+        'postprocessors': [{
+            'key': 'FFmpegExtractAudio',
+            'preferredcodec': 'mp3',
+            'preferredquality': '192',
+        }],
+        'outtmpl': temp_audio_file,
+        'quiet': True,
+    }
+    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+        info = ydl.extract_info(url, download=False)
+        title = info.get('title', None)
+        ydl.download([url])
+    #with open(temp_audio_file+'.mp3', 'rb') as file:
+    audio_file = os.path.join('output', 'audio.mp3')
+    return audio_file, title
+@st.cache_data
+def load_asr_model(model_name):
+    '''Load the open source  whisper model in cases where the API is not working'''
+    model = whisper.load_model(model_name)
+    return model
 @st.cache_data
 def load_whisper_api(audio):
     return transcript
 @st.cache_data
+def transcribe_yt_video(url, py_tube=True):
+    '''Transcribe YouTube video'''
+    if py_tube:
+        audio_file, title = get_yt_audio(link)
+        print(f'audio_file:{audio_file}')
+        st.session_state['audio'] = audio_file
+        print(f"audio_file_session_state:{st.session_state['audio'] }")
+        #Get size of audio file
+        audio_size = round(os.path.getsize(st.session_state['audio'])/(1024*1024),1)
+        #Check if file is > 24mb, if not then use Whisper API
+        if audio_size <= 25:
+            st.info("`Transcribing YT audio...`")
+            #Use whisper API
+            results = load_whisper_api(st.session_state['audio'])['text']
+        else:
+            st.warning('File size larger than 24mb, applying chunking and transcription',icon="⚠️")
+            song = AudioSegment.from_file(st.session_state['audio'], format='mp4')
+            # PyDub handles time in milliseconds
+            twenty_minutes = 20 * 60 * 1000
+            chunks = song[::twenty_minutes]
+            transcriptions = []
+            video_id = extract.video_id(link)
+            for i, chunk in enumerate(chunks):
+                chunk.export(f'output/chunk_{i}_{video_id}.mp4', format='mp4')
+                transcriptions.append(load_whisper_api(f'output/chunk_{i}_{video_id}.mp4')['text'])
+            results = ','.join(transcriptions)
+    else:
+        audio_file, title = get_yt_audio_dl(link)
+        print(f'audio_file:{audio_file}')
+        st.session_state['audio'] = audio_file
+        print(f"audio_file_session_state:{st.session_state['audio'] }")
+        #Get size of audio file
+        audio_size = round(os.path.getsize(st.session_state['audio'])/(1024*1024),1)
+        #Check if file is > 24mb, if not then use Whisper API
+        if audio_size <= 25:
+            st.info("`Transcribing YT audio...`")
+            #Use whisper API
+            results = load_whisper_api(st.session_state['audio'])['text']
+        else:
+            st.warning('File size larger than 24mb, applying chunking and transcription',icon="⚠️")
+            song = AudioSegment.from_file(st.session_state['audio'], format='mp4')
+            # PyDub handles time in milliseconds
+            twenty_minutes = 20 * 60 * 1000
+            chunks = song[::twenty_minutes]
+            transcriptions = []
+            video_id = extract.video_id(link)
+            for i, chunk in enumerate(chunks):
+                chunk.export(f'output/chunk_{i}_{video_id}.mp4', format='mp4')
+                transcriptions.append(load_whisper_api(f'output/chunk_{i}_{video_id}.mp4')['text'])
+            results = ','.join(transcriptions)
+    st.info("`YT Video transcription process complete...`")
+    return results, title
 @st.cache_data
 def inference(link, upload, _asr_model):
             st.info("`Downloading YT audio...`")
+            results, title = transcribe_yt_video(link)
             return results, title
     except Exception as e:
+        st.error(f'''PyTube Error: {e},
+                    Using yt_dlp module, might take longer than expected''',icon="🚨")
+        results, title = transcribe_yt_video(link, py_tube=False)
+        # results = _asr_model.transcribe(st.session_state['audio'], task='transcribe', language='en')
+        return results, title
 @st.cache_data
 def clean_text(text):