subtify / app.py
Maximofn's picture
Refactor project structure and update dependencies
e015c08
raw
history blame
18.9 kB
import gradio as gr
import argparse
import spaces
import os
import torch
import shutil
from time import sleep
from tqdm import tqdm
from lang_list import union_language_dict
# import pyperclip
import re
from PIL import Image
# import urllib.request
from ui_config import (
BACKGROUND_COLOR, BUTTON_COLOR, SVG_COLOR, PANEL_COLOR,
PRIMARY_TEXT_COLOR, SUBDUED_TEXT_COLOR, BACKGROUND_PRIMARY_COLOR,
BACKGROUND_SECONDARY_COLOR, PRIMARY_BODER_COLOR, BLOCK_TITLE_TEXT_COLOR,
INPUT_BACKGROUND_COLOR, INPUT_BORDER_COLOR, INPUT_PLACEHOLDER_COLOR,
ERROR_BACKGROUND_COLOR, ERROR_TEXT_COLOR, ERROR_BORDER_COLOR,
BUTTON_SECONDARY_BACKGROUND_COLOR, BUTTON_SECONDARY_BORDER_COLOR,
BUTTON_SECONDARY_TEXT_COLOR, RED, GREEN, BLUE,
html_social_media, get_html_subtify_logo, html_buy_me_a_coffe
)
# from url_manager import get_youtube_thumbnail, is_valid_youtube_url, is_valid_twitch_url, is_valid_url
from slice_audio import slice_audio as slice_audio_main
from audio import get_audio_from_video
from transcribe import transcribe, get_language_dict
NUMBER = 100
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
# DEVICE = "cpu"
DOWNLOAD = True
SLICE_AUDIO = True
TRANSCRIBE_AUDIO = True
CONCATENATE_TRANSCRIPTIONS = True
TRANSLATE_TRANSCRIPTIONS = True
ADD_SUBTITLES_TO_VIDEO = True
REMOVE_FILES = True
if DEVICE == "cpu":
# I supose that I am on huggingface server
# Get RAM space
# ram = int(os.popen("free -m | grep Mem | awk '{print $2}'").read())
ram = 16000
factor = 1
CHUNK_SECONDS = int(ram*factor)
CHUNK_SECONDS = 30
CHUNK_OVERLAP_SECONDS = 5
print(f"RAM: {ram}, CHUNK_SECONDS: {CHUNK_SECONDS}, CHUNK_OVERLAP_SECONDS: {CHUNK_OVERLAP_SECONDS}")
else:
# I supose that I am on my computer
# Get VRAM space
CHUNK_SECONDS = 30
CHUNK_OVERLAP_SECONDS = 5
YOUTUBE = "youtube"
TWITCH = "twitch"
ERROR = "error"
subtify_logo = Image.open("assets/subtify_logo-scaled.png")
subtify_logo_width, subtify_logo_height = subtify_logo.size
factor = 4
new_width = subtify_logo_width // factor
new_height = subtify_logo_height // factor
html_subtify_logo = get_html_subtify_logo(new_width, new_height)
language_dict = union_language_dict()
def remove_all_files():
if os.path.exists("audios"):
command = f"rm -r audios"
os.system(command)
if os.path.exists("chunks"):
command = f"rm -r chunks"
os.system(command)
if os.path.exists("concatenated_transcriptions"):
command = f"rm -r concatenated_transcriptions"
os.system(command)
if os.path.exists("transcriptions"):
command = f"rm -r transcriptions"
os.system(command)
if os.path.exists("translated_transcriptions"):
command = f"rm -r translated_transcriptions"
os.system(command)
if os.path.exists("videos"):
command = f"rm -r videos"
os.system(command)
if os.path.exists("vocals"):
command = f"rm -r vocals"
os.system(command)
def reset_frontend():
visible = False
return (
None,
gr.Image(visible=visible),
gr.Dropdown(visible=visible),
gr.Dropdown(visible=visible),
gr.Dropdown(visible=visible),
gr.Accordion(visible=visible),
gr.Button(visible=visible),
gr.Textbox(visible=visible),
gr.Textbox(visible=visible),
gr.Textbox(visible=visible),
gr.Textbox(visible=visible),
gr.Textbox(visible=visible),
gr.Textbox(visible=visible),
gr.Textbox(visible=visible),
gr.Textbox(visible=visible),
gr.Textbox(visible=visible),
gr.Video(visible=visible),
)
def show_auxiliar_block1():
return gr.Textbox(value="URL checked", visible=False)
def change_visibility_texboxes():
return (
gr.update(value="Done"), # auxiliar_block1
gr.update(visible=True), # get_audio_from_video_info
gr.update(visible=True), # video_sliced_progress_info
gr.update(visible=True), # video_transcribed_progress_info
gr.update(visible=True), # transcriptions_concatenated_progress_info
gr.update(visible=True), # video_translated_progress_info
gr.update(visible=True), # video_subtitled_progress_info
)
def get_audio(video_path):
print('*'*NUMBER)
print(f"Getting audio from video {video_path}")
audios_folder = "audios"
try:
audio_path = get_audio_from_video(video_path, audios_folder)
return [
gr.update(value="Ok"), # get_audio_from_video_info
gr.update(value=audio_path) # original_audio_path
]
except Exception as e:
print(f"Error: {str(e)}")
return [
gr.update(value="Error"), # get_audio_from_video_info
gr.update(value="") # original_audio_path
]
def slice_audio(input_audio_path):
print('*'*NUMBER)
print(f"Slicing audio {input_audio_path} in chunks of {CHUNK_SECONDS} seconds with {CHUNK_OVERLAP_SECONDS} seconds overlap")
# Create vocals and chunks folders
print("Creating vocals and chunks folders")
folder_vocals = "vocals"
folder_chunck = "chunks"
if not os.path.exists(folder_vocals):
os.makedirs(folder_vocals)
if not os.path.exists(folder_chunck):
os.makedirs(folder_chunck)
slice_audio_main(input_audio_path, folder_chunck, CHUNK_SECONDS, CHUNK_OVERLAP_SECONDS)
return (
gr.update(value="Ok"), # video_sliced_progress_info
)
def trascribe_audio(input_audio_path, source_languaje):
print('*'*NUMBER)
print(f"Transcript {input_audio_path}")
# Get language dict
language_dict = get_language_dict()
# Transcribe audio file
transcribe(input_audio_path, language_dict[source_languaje]["transcriber"], DEVICE, CHUNK_SECONDS, CHUNK_OVERLAP_SECONDS)
return (
gr.Textbox(value="Ok")
)
def concatenate_transcriptions():
print('*'*NUMBER)
print("Concatenate transcriptions")
folder_concatenated = "concatenated_transcriptions"
if not os.path.exists(folder_concatenated):
os.makedirs(folder_concatenated)
chunck_file = "chunks/output_files.txt"
python_file = "concat_transcriptions.py"
command = f"python {python_file} {chunck_file} {CHUNK_SECONDS} {CHUNK_OVERLAP_SECONDS}"
os.system(command)
with open(chunck_file, 'r') as f:
files = f.read().splitlines()
for file in files:
file_name, _ = file.split(".")
_, file_name = file_name.split("/")
transcriptions_folder = "transcriptions"
transcription_extension = "srt"
command = f"rm {transcriptions_folder}/{file_name}.{transcription_extension}"
os.system(command)
audio_transcribed = "concatenated_transcriptions/download_audio.srt"
return (
gr.Textbox(value="Ok"),
gr.Textbox(value=audio_transcribed),
)
def translate_transcription(original_audio_transcribed_path, source_languaje, target_languaje):
print('*'*NUMBER)
print("Translate transcription")
folder_translated_transcriptions = "translated_transcriptions"
if not os.path.exists(folder_translated_transcriptions):
os.makedirs(folder_translated_transcriptions)
python_file = "translate_transcriptions.py"
command = f"python {python_file} {original_audio_transcribed_path} --source_languaje {source_languaje} --target_languaje {target_languaje} --device {DEVICE}"
os.system(command)
translated_transcription = f"translated_transcriptions/download_audio_{target_languaje}.srt"
transcription_file = "concatenated_transcriptions/download_audio.srt"
if os.path.exists(transcription_file):
command = f"rm {transcription_file}"
os.system(command)
return (
gr.Textbox(value="Ok"),
gr.Textbox(value=translated_transcription)
)
def add_translated_subtitles_to_video(original_video_path, original_audio_path, original_audio_translated_path):
print('*'*NUMBER)
print("Add subtitles to video")
python_file = "add_subtitles_to_video.py"
command = f"python {python_file} {original_audio_translated_path} {original_video_path} {original_audio_path}"
os.system(command)
if os.path.exists(original_video_path):
command = f"rm {original_video_path}"
os.system(command)
if os.path.exists(original_audio_path):
command = f"rm {original_audio_path}"
os.system(command)
if os.path.exists(original_audio_translated_path):
command = f"rm {original_audio_translated_path}"
os.system(command)
if os.path.exists("chunks/output_files.txt"):
command = f"rm chunks/output_files.txt"
os.system(command)
subtitled_video = "videos/download_video_with_subtitles.mp4"
visible = False
return (
gr.Video(value=subtitled_video, visible=True),
gr.Textbox(value="Ok", visible=visible),
gr.Textbox(value="Ok"),
)
def hide_textbobes_progress_info():
visible = False
return (
gr.Textbox(value="Waiting", visible=visible),
gr.Textbox(value="Waiting", visible=visible),
gr.Textbox(value="Waiting", visible=visible),
gr.Textbox(value="Waiting", visible=visible),
gr.Textbox(value="Waiting", visible=visible),
gr.Textbox(value="Waiting", visible=visible),
)
def process_uploaded_video(video_path):
# Create videos folder
videos_folder = "videos"
if not os.path.exists(videos_folder):
os.makedirs(videos_folder)
# Copy uploaded video to videos folder
new_video_path = os.path.join(videos_folder, "download_video.mp4")
shutil.copy(video_path, new_video_path)
# Return updated config block with new scale and the new video path
return [
gr.update(label="Video uploaded"), # video_input
gr.update(visible=True), # config_block
gr.update(value=new_video_path) # original_video_path
]
@spaces.GPU
def subtify():
with gr.Blocks(
theme=gr.themes.Default().set
(
body_background_fill=BACKGROUND_COLOR,
body_background_fill_dark=BACKGROUND_COLOR,
body_text_color=PRIMARY_TEXT_COLOR,
body_text_color_dark=PRIMARY_TEXT_COLOR,
body_text_color_subdued=SUBDUED_TEXT_COLOR,
body_text_color_subdued_dark=SUBDUED_TEXT_COLOR,
background_fill_primary=BACKGROUND_PRIMARY_COLOR,
background_fill_primary_dark=BACKGROUND_PRIMARY_COLOR,
background_fill_secondary=BACKGROUND_SECONDARY_COLOR,
background_fill_secondary_dark=BACKGROUND_SECONDARY_COLOR,
border_color_primary=PRIMARY_BODER_COLOR,
border_color_primary_dark=PRIMARY_BODER_COLOR,
block_background_fill=BACKGROUND_PRIMARY_COLOR,
block_background_fill_dark=BACKGROUND_PRIMARY_COLOR,
block_title_text_color=BLOCK_TITLE_TEXT_COLOR,
block_title_text_color_dark=BLOCK_TITLE_TEXT_COLOR,
input_background_fill=INPUT_BACKGROUND_COLOR,
input_background_fill_dark=INPUT_BACKGROUND_COLOR,
input_border_color=INPUT_BORDER_COLOR,
input_border_color_dark=INPUT_BORDER_COLOR,
input_placeholder_color=INPUT_PLACEHOLDER_COLOR,
input_placeholder_color_dark=INPUT_PLACEHOLDER_COLOR,
error_background_fill=ERROR_BACKGROUND_COLOR,
error_background_fill_dark=ERROR_BACKGROUND_COLOR,
error_text_color=ERROR_TEXT_COLOR,
error_text_color_dark=ERROR_TEXT_COLOR,
error_border_color=ERROR_BORDER_COLOR,
error_border_color_dark=ERROR_BORDER_COLOR,
button_secondary_background_fill=BUTTON_SECONDARY_BACKGROUND_COLOR,
button_secondary_background_fill_dark=BUTTON_SECONDARY_BACKGROUND_COLOR,
button_secondary_border_color=BUTTON_SECONDARY_BORDER_COLOR,
button_primary_background_fill_dark=BUTTON_SECONDARY_BORDER_COLOR,
button_secondary_text_color=BUTTON_SECONDARY_TEXT_COLOR,
button_secondary_text_color_dark=BUTTON_SECONDARY_TEXT_COLOR,
)
) as demo:
num_speaker = []
for i in range(100, 0, -1):
num_speaker.append(i)
# Layout
gr.HTML(html_social_media)
gr.HTML("<h1 style='text-align: center;'>Subtify</h1>")
gr.HTML(html_subtify_logo)
# Input block, where the user can upload a video and configure the subtify process
visible = False
input_block = gr.Row(variant="panel")
with input_block:
input_video_block = gr.Row(scale=2)
with input_video_block:
video_input = gr.Video(
label="Upload video",
sources=["upload"],
scale=1,
interactive=True
)
delete_button = gr.Button(size="sm", icon="icons/delete.svg", value="clear", min_width="10px", scale=0)
config_block = gr.Column(scale=1, visible=visible)
with config_block:
with gr.Row():
source_languaje = gr.Dropdown(visible=True, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True, info="Language of the video")
target_languaje = gr.Dropdown(visible=True, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True, info="Language to translate the subtitles")
with gr.Accordion("Advanced settings", open=False, visible=True) as Advanced_setings:
number_of_speakers = gr.Dropdown(visible=True, label="Number of speakers", show_label=True, value=10, choices=num_speaker, scale=1, interactive=True, info="Number of speakers in the video, if you don't know, select 10")
subtify_button = gr.Button(size="lg", value="subtify", min_width="10px", scale=0, visible=True)
auxiliar_block1 = gr.Textbox(placeholder="", interactive=False, visible=visible)
with gr.Row():
get_audio_from_video_info = gr.Textbox(placeholder="Waiting", label="Get audio from video info", elem_id="get_audio_from_video_info", interactive=False, visible=visible)
video_transcribed_progress_info = gr.Textbox(placeholder="Waiting", label="Transcribe progress info", elem_id="video_transcribed_progress_info", interactive=False, visible=visible)
transcriptions_concatenated_progress_info = gr.Textbox(placeholder="Waiting", label="Concatenate progress info", elem_id="transcriptions_concatenated_progress_info", interactive=False, visible=visible)
video_translated_progress_info = gr.Textbox(placeholder="Waiting", label="Translate progress info", elem_id="transcription_translated_progress_info", interactive=False, visible=visible)
video_subtitled_progress_info = gr.Textbox(placeholder="Waiting", label="Video subtitle progress info", elem_id="video_subtitled_progress_info", interactive=False, visible=visible)
original_audio_path = gr.Textbox(label="Original audio path", elem_id="original_audio_path", visible=visible)
original_video_path = gr.Textbox(label="Original video path", visible=visible)
original_audio_transcribed_path = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", visible=visible)
original_audio_translated_path = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", visible=visible)
subtitled_video = gr.Video(label="Subtitled video", elem_id="subtitled_video", visible=visible, interactive=visible)
auxiliar_block3 = gr.Textbox(placeholder="Waiting", label="Auxiliar block 3", elem_id="auxiliar_block3", interactive=False, visible=visible)
gr.HTML(html_buy_me_a_coffe)
# Events
# paste_button.click(fn=paste_url_from_clipboard, outputs=url_textbox)
delete_button.click(
fn=reset_frontend,
outputs=[
video_input,
source_languaje,
target_languaje,
Advanced_setings,
number_of_speakers,
subtify_button,
auxiliar_block1,
video_transcribed_progress_info,
transcriptions_concatenated_progress_info,
video_translated_progress_info,
video_subtitled_progress_info,
subtitled_video,
]
)
video_input.change(
fn=process_uploaded_video,
inputs=[video_input],
outputs=[video_input, config_block, original_video_path]
)
subtify_button.click(
fn=change_visibility_texboxes,
outputs=[auxiliar_block1, get_audio_from_video_info, video_transcribed_progress_info, transcriptions_concatenated_progress_info, video_translated_progress_info, video_subtitled_progress_info]
)
auxiliar_block1.change(
fn=get_audio,
inputs=[original_video_path],
outputs=[get_audio_from_video_info, original_audio_path]
)
get_audio_from_video_info.change(
fn=trascribe_audio,
inputs=[original_audio_path, source_languaje],
outputs=[video_transcribed_progress_info]
)
# video_transcribed_progress_info.change(
# fn=concatenate_transcriptions,
# outputs=[transcriptions_concatenated_progress_info, original_audio_transcribed_path]
# )
# transcriptions_concatenated_progress_info.change(
# fn=translate_transcription,
# inputs=[original_audio_transcribed_path, source_languaje, target_languaje],
# outputs=[video_translated_progress_info, original_audio_translated_path]
# )
# video_translated_progress_info.change(
# fn=add_translated_subtitles_to_video,
# inputs=[original_video_path, original_audio_path, original_audio_translated_path],
# outputs=[subtitled_video, video_subtitled_progress_info, auxiliar_block3]
# )
# auxiliar_block3.change(
# fn=hide_textbobes_progress_info,
# outputs=[video_sliced_progress_info, video_transcribed_progress_info, transcriptions_concatenated_progress_info, video_translated_progress_info, video_subtitled_progress_info]
# )
demo.launch()
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--no_ui", action="store_true")
parser.add_argument("--remove_all_files", action="store_true")
args = parser.parse_args()
if args.no_ui:
subtify_no_ui()
elif args.remove_all_files:
remove_all_files()
else:
subtify()