deven367's picture
sync with github
6dae90f
raw
history blame contribute delete
No virus
1.78 kB
import datetime
import subprocess
from pathlib import Path
import numpy as np
import pandas as pd
import torch
import whisper
from fastcore.foundation import L, working_directory
def start_app():
subprocess.run(["streamlit", "run", "app.py"])
def get_audio(url: str):
audio_path = Path("./audio")
with working_directory(audio_path):
# subprocess.run(['youtube-dl', '-F', 'bestaudio[ext=m4a]', url])
subprocess.run(["yt-dlp", "-x", "--audio-format", "wav", url])
def get_v_from_url(url):
_, val = url.split('?v=')
return val.split('&')[0]
def annotate(audio_src, model_size="tiny"):
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
model = whisper.load_model(model_size, device=device)
result = model.transcribe(audio_src)
return result
def get_time(seconds):
return "{:0>8}".format(str(datetime.timedelta(seconds=seconds)))
def df_from_result(result):
df = pd.json_normalize(result["segments"])
df["start"] = df["start"].apply(get_time)
df["end"] = df["end"].apply(get_time)
return df
def find_word_timestamp(df, *words):
l = L()
for word in words:
vals = df["text"].str.find(word).values
arr = np.where(vals > 1)
times = list(df.iloc[arr]["start"].values)
nt = L(times).map(lambda x: x.split(".")[:-1])
l.append(nt)
return l
def generate_srt(df):
s = ""
for i, (start, end, text) in enumerate(df[["start", "end", "text"]].values):
start = start.replace(".", ",")
end = end.replace(".", ",")
s += f"{i}\n"
s += f"{start} --> {end}\n"
s += f"{text.strip()}\n\n"
return s
def write_srt(s, name):
with open(f"{name}.srt", "w") as f:
f.write(s)
f.close()