deven367 commited on
Commit
2433253
1 Parent(s): 67b58a9
Files changed (2) hide show
  1. annotator/utils.py +15 -6
  2. app.py +49 -22
annotator/utils.py CHANGED
@@ -3,8 +3,9 @@ import datetime
3
  import pandas as pd
4
  import numpy as np
5
  import subprocess
6
- from fastcore.foundation import working_directory
7
  from pathlib import Path
 
8
 
9
 
10
  def start_app():
@@ -17,9 +18,15 @@ def get_audio(url: str):
17
  # subprocess.run(['youtube-dl', '-F', 'bestaudio[ext=m4a]', url])
18
  subprocess.run(["youtube-dl", "-x", "--audio-format", "mp3", url])
19
 
 
 
 
 
 
20
 
21
  def annotate(audio_src, model_size="tiny"):
22
- model = whisper.load_model(model_size, device="cpu")
 
23
  result = model.transcribe(audio_src)
24
  return result
25
 
@@ -36,13 +43,15 @@ def df_from_result(result):
36
 
37
 
38
  def find_word_timestamp(df, *words):
 
39
  for word in words:
40
  vals = df["text"].str.find(word).values
41
  arr = np.where(vals > 1)
42
- times = df.iloc[arr]["start"].values
43
- for t in times:
44
- t = t.split(".")[:-1]
45
- print(f"{word} is said on {t} timestamp")
 
46
 
47
 
48
  def generate_srt(df):
 
3
  import pandas as pd
4
  import numpy as np
5
  import subprocess
6
+ from fastcore.foundation import working_directory, L
7
  from pathlib import Path
8
+ import torch
9
 
10
 
11
  def start_app():
 
18
  # subprocess.run(['youtube-dl', '-F', 'bestaudio[ext=m4a]', url])
19
  subprocess.run(["youtube-dl", "-x", "--audio-format", "mp3", url])
20
 
21
+ def get_v_from_url(url):
22
+ _, val = url.split('?v=')
23
+ return val.split('&')[0]
24
+
25
+
26
 
27
  def annotate(audio_src, model_size="tiny"):
28
+ device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
29
+ model = whisper.load_model(model_size, device=device)
30
  result = model.transcribe(audio_src)
31
  return result
32
 
 
43
 
44
 
45
  def find_word_timestamp(df, *words):
46
+ l = L()
47
  for word in words:
48
  vals = df["text"].str.find(word).values
49
  arr = np.where(vals > 1)
50
+ times = list(df.iloc[arr]["start"].values)
51
+ nt = L(times).map(lambda x: x.split(".")[:-1])
52
+ l.append(nt)
53
+ return l
54
+
55
 
56
 
57
  def generate_srt(df):
app.py CHANGED
@@ -14,32 +14,59 @@ if not AUDIO_PATH.exists(): AUDIO_PATH.mkdir(exist_ok=True)
14
 
15
  def make_sidebar():
16
  with st.sidebar:
17
- st.write('App')
18
- st.write('YouTube')
 
 
 
 
 
 
 
 
 
 
 
19
 
20
 
21
  def main():
 
22
  make_sidebar()
23
- # st.write('This is it!')
24
- url = st.text_input('Enter URL for the YT video')
25
-
26
- if st.button('Generate SRT'):
27
- audio_src = get_audio(url)
28
- audio_src = globtastic(AUDIO_PATH, file_glob='*.mp3')[0]
29
- result = annotate(audio_src)
30
- df = df_from_result(result)
31
-
32
- # st.write(result.get('segments', 'wrong key'))
33
- st.write(df)
34
- name = Path(audio_src).stem
35
- s = generate_srt(df)
36
- with working_directory(SRT_PATH):
37
- write_srt(s, name)
38
-
39
- with working_directory(SRT_PATH):
40
- srt = globtastic('.', file_glob='*.srt')[0]
41
- with open(srt) as f:
42
- st.download_button('Download SRT', f, file_name=f'{name}.srt')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
  # subprocess.run(['rm', '-rf', 'audio'])
45
  # subprocess.run(['rm', '-rf', 'srt'])
 
14
 
15
  def make_sidebar():
16
  with st.sidebar:
17
+ st.markdown('## yt-video-annotator')
18
+ st.write('Link to the GitHub repo')
19
+
20
+ @st.cache(allow_output_mutation=True)
21
+ def caption_from_url(url):
22
+ audio_src = get_audio(url)
23
+ v = get_v_from_url(url)
24
+ audio_src = globtastic(AUDIO_PATH, file_glob='*.mp3', file_re=v)[0]
25
+ result = annotate(audio_src)
26
+ df = df_from_result(result)
27
+ return audio_src, df
28
+
29
+
30
 
31
 
32
  def main():
33
+ url, name = None, None
34
  make_sidebar()
35
+ col1, col2 = st.columns([1.2, 1])
36
+ with col1:
37
+ url = st.text_input('Enter URL for the YT video')
38
+ st.video(url)
39
+
40
+ with col2:
41
+ default_opt = 'Search for words'
42
+ opt = st.radio('What do you wish to do?', [default_opt, 'Generate subtitles for the entire video'])
43
+ if opt == default_opt:
44
+ st.markdown('### Search for words in the video')
45
+ words = st.text_input('Enter words separated by a comma')
46
+ words = words.split(',')
47
+
48
+ if st.button('Get Timestamps'):
49
+ audio_src, df = caption_from_url(url)
50
+ times = find_word_timestamp(df, *words)
51
+ times = np.asarray(times).reshape(len(words), -1)
52
+ # st.write(times)
53
+ for i, word in enumerate(words):
54
+ st.write(f"{word} is said on {times[i].flatten()} timestamp")
55
+
56
+ else:
57
+ if st.button('Generate SRT'):
58
+ audio_src, df = caption_from_url(url)
59
+ name = Path(audio_src).stem
60
+ s = generate_srt(df)
61
+ with working_directory(SRT_PATH):
62
+ write_srt(s, name)
63
+
64
+ if name is not None:
65
+ with working_directory(SRT_PATH):
66
+ key = get_v_from_url(url)
67
+ srt = globtastic('.', file_glob='*.srt', file_re=key)[0]
68
+ with open(srt) as f:
69
+ st.download_button('Download SRT', f, file_name=f'{name}.srt')
70
 
71
  # subprocess.run(['rm', '-rf', 'audio'])
72
  # subprocess.run(['rm', '-rf', 'srt'])