Spaces:

deven367
/

yt-video-annotator-hf

Running

App Files Files Community

deven367 commited on Jan 29, 2023

Commit

2433253

1 Parent(s): 67b58a9

bug fixes

Browse files

Files changed (2) hide show

annotator/utils.py +15 -6
app.py +49 -22

annotator/utils.py CHANGED Viewed

@@ -3,8 +3,9 @@ import datetime
 import pandas as pd
 import numpy as np
 import subprocess
-from fastcore.foundation import working_directory
 from pathlib import Path
 def start_app():
@@ -17,9 +18,15 @@ def get_audio(url: str):
         # subprocess.run(['youtube-dl', '-F', 'bestaudio[ext=m4a]', url])
         subprocess.run(["youtube-dl", "-x", "--audio-format", "mp3", url])
 def annotate(audio_src, model_size="tiny"):
-    model = whisper.load_model(model_size, device="cpu")
     result = model.transcribe(audio_src)
     return result
@@ -36,13 +43,15 @@ def df_from_result(result):
 def find_word_timestamp(df, *words):
     for word in words:
         vals = df["text"].str.find(word).values
         arr = np.where(vals > 1)
-        times = df.iloc[arr]["start"].values
-        for t in times:
-            t = t.split(".")[:-1]
-            print(f"{word} is said on {t} timestamp")
 def generate_srt(df):

 import pandas as pd
 import numpy as np
 import subprocess
+from fastcore.foundation import working_directory, L
 from pathlib import Path
+import torch
 def start_app():
         # subprocess.run(['youtube-dl', '-F', 'bestaudio[ext=m4a]', url])
         subprocess.run(["youtube-dl", "-x", "--audio-format", "mp3", url])
+def get_v_from_url(url):
+    _, val = url.split('?v=')
+    return val.split('&')[0]
 def annotate(audio_src, model_size="tiny"):
+    device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
+    model = whisper.load_model(model_size, device=device)
     result = model.transcribe(audio_src)
     return result
 def find_word_timestamp(df, *words):
+    l = L()
     for word in words:
         vals = df["text"].str.find(word).values
         arr = np.where(vals > 1)
+        times = list(df.iloc[arr]["start"].values)
+        nt = L(times).map(lambda x: x.split(".")[:-1])
+        l.append(nt)
+    return l
 def generate_srt(df):

app.py CHANGED Viewed

@@ -14,32 +14,59 @@ if not AUDIO_PATH.exists(): AUDIO_PATH.mkdir(exist_ok=True)
 def make_sidebar():
     with st.sidebar:
-        st.write('App')
-        st.write('YouTube')
 def main():
     make_sidebar()
-    # st.write('This is it!')
-    url = st.text_input('Enter URL for the YT video')
-    if st.button('Generate SRT'):
-        audio_src = get_audio(url)
-        audio_src = globtastic(AUDIO_PATH, file_glob='*.mp3')[0]
-        result = annotate(audio_src)
-        df = df_from_result(result)
-        # st.write(result.get('segments', 'wrong key'))
-        st.write(df)
-        name = Path(audio_src).stem
-        s = generate_srt(df)
-        with working_directory(SRT_PATH):
-            write_srt(s, name)
-        with working_directory(SRT_PATH):
-            srt = globtastic('.', file_glob='*.srt')[0]
-            with open(srt) as f:
-                st.download_button('Download SRT', f, file_name=f'{name}.srt')
     # subprocess.run(['rm', '-rf', 'audio'])
     # subprocess.run(['rm', '-rf', 'srt'])

 def make_sidebar():
     with st.sidebar:
+        st.markdown('## yt-video-annotator')
+        st.write('Link to the GitHub repo')
+@st.cache(allow_output_mutation=True)
+def caption_from_url(url):
+    audio_src = get_audio(url)
+    v = get_v_from_url(url)
+    audio_src = globtastic(AUDIO_PATH, file_glob='*.mp3', file_re=v)[0]
+    result = annotate(audio_src)
+    df = df_from_result(result)
+    return audio_src, df
 def main():
+    url, name = None, None
     make_sidebar()
+    col1, col2 = st.columns([1.2, 1])
+    with col1:
+        url = st.text_input('Enter URL for the YT video')
+        st.video(url)
+    with col2:
+        default_opt = 'Search for words'
+        opt = st.radio('What do you wish to do?', [default_opt, 'Generate subtitles for the entire video'])
+        if opt == default_opt:
+            st.markdown('### Search for words in the video')
+            words = st.text_input('Enter words separated by a comma')
+            words = words.split(',')
+            if st.button('Get Timestamps'):
+                audio_src, df = caption_from_url(url)
+                times = find_word_timestamp(df, *words)
+                times = np.asarray(times).reshape(len(words), -1)
+                # st.write(times)
+                for i, word in enumerate(words):
+                    st.write(f"{word} is said on {times[i].flatten()} timestamp")
+        else:
+            if st.button('Generate SRT'):
+                audio_src, df = caption_from_url(url)
+                name = Path(audio_src).stem
+                s = generate_srt(df)
+                with working_directory(SRT_PATH):
+                    write_srt(s, name)
+        if name is not None:
+            with working_directory(SRT_PATH):
+                key = get_v_from_url(url)
+                srt = globtastic('.', file_glob='*.srt', file_re=key)[0]
+                with open(srt) as f:
+                    st.download_button('Download SRT', f, file_name=f'{name}.srt')
     # subprocess.run(['rm', '-rf', 'audio'])
     # subprocess.run(['rm', '-rf', 'srt'])