|
import gradio as gr |
|
import argparse |
|
import spaces |
|
import os |
|
import torch |
|
import shutil |
|
from time import sleep |
|
from tqdm import tqdm |
|
from lang_list import union_language_dict |
|
|
|
import re |
|
from PIL import Image |
|
|
|
from ui_config import ( |
|
BACKGROUND_COLOR, BUTTON_COLOR, SVG_COLOR, PANEL_COLOR, |
|
PRIMARY_TEXT_COLOR, SUBDUED_TEXT_COLOR, BACKGROUND_PRIMARY_COLOR, |
|
BACKGROUND_SECONDARY_COLOR, PRIMARY_BODER_COLOR, BLOCK_TITLE_TEXT_COLOR, |
|
INPUT_BACKGROUND_COLOR, INPUT_BORDER_COLOR, INPUT_PLACEHOLDER_COLOR, |
|
ERROR_BACKGROUND_COLOR, ERROR_TEXT_COLOR, ERROR_BORDER_COLOR, |
|
BUTTON_SECONDARY_BACKGROUND_COLOR, BUTTON_SECONDARY_BORDER_COLOR, |
|
BUTTON_SECONDARY_TEXT_COLOR, RED, GREEN, BLUE, |
|
html_social_media, get_html_subtify_logo, html_buy_me_a_coffe |
|
) |
|
|
|
from slice_audio import slice_audio as slice_audio_main |
|
from audio import get_audio_from_video |
|
from transcribe import transcribe, get_language_dict |
|
|
|
NUMBER = 100 |
|
DEVICE = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
|
DOWNLOAD = True |
|
SLICE_AUDIO = True |
|
TRANSCRIBE_AUDIO = True |
|
CONCATENATE_TRANSCRIPTIONS = True |
|
TRANSLATE_TRANSCRIPTIONS = True |
|
ADD_SUBTITLES_TO_VIDEO = True |
|
REMOVE_FILES = True |
|
if DEVICE == "cpu": |
|
|
|
|
|
|
|
ram = 16000 |
|
factor = 1 |
|
CHUNK_SECONDS = int(ram*factor) |
|
CHUNK_SECONDS = 30 |
|
CHUNK_OVERLAP_SECONDS = 5 |
|
print(f"RAM: {ram}, CHUNK_SECONDS: {CHUNK_SECONDS}, CHUNK_OVERLAP_SECONDS: {CHUNK_OVERLAP_SECONDS}") |
|
else: |
|
|
|
|
|
CHUNK_SECONDS = 30 |
|
CHUNK_OVERLAP_SECONDS = 5 |
|
|
|
YOUTUBE = "youtube" |
|
TWITCH = "twitch" |
|
ERROR = "error" |
|
|
|
subtify_logo = Image.open("assets/subtify_logo-scaled.png") |
|
subtify_logo_width, subtify_logo_height = subtify_logo.size |
|
factor = 4 |
|
new_width = subtify_logo_width // factor |
|
new_height = subtify_logo_height // factor |
|
|
|
html_subtify_logo = get_html_subtify_logo(new_width, new_height) |
|
|
|
language_dict = union_language_dict() |
|
|
|
def remove_all_files(): |
|
if os.path.exists("audios"): |
|
command = f"rm -r audios" |
|
os.system(command) |
|
if os.path.exists("chunks"): |
|
command = f"rm -r chunks" |
|
os.system(command) |
|
if os.path.exists("concatenated_transcriptions"): |
|
command = f"rm -r concatenated_transcriptions" |
|
os.system(command) |
|
if os.path.exists("transcriptions"): |
|
command = f"rm -r transcriptions" |
|
os.system(command) |
|
if os.path.exists("translated_transcriptions"): |
|
command = f"rm -r translated_transcriptions" |
|
os.system(command) |
|
if os.path.exists("videos"): |
|
command = f"rm -r videos" |
|
os.system(command) |
|
if os.path.exists("vocals"): |
|
command = f"rm -r vocals" |
|
os.system(command) |
|
|
|
def reset_frontend(): |
|
visible = False |
|
return ( |
|
None, |
|
gr.Image(visible=visible), |
|
gr.Dropdown(visible=visible), |
|
gr.Dropdown(visible=visible), |
|
gr.Dropdown(visible=visible), |
|
gr.Accordion(visible=visible), |
|
gr.Button(visible=visible), |
|
gr.Textbox(visible=visible), |
|
gr.Textbox(visible=visible), |
|
gr.Textbox(visible=visible), |
|
gr.Textbox(visible=visible), |
|
gr.Textbox(visible=visible), |
|
gr.Textbox(visible=visible), |
|
gr.Textbox(visible=visible), |
|
gr.Textbox(visible=visible), |
|
gr.Textbox(visible=visible), |
|
gr.Video(visible=visible), |
|
) |
|
|
|
def show_auxiliar_block1(): |
|
return gr.Textbox(value="URL checked", visible=False) |
|
|
|
def change_visibility_texboxes(): |
|
return ( |
|
gr.update(value="Done"), |
|
gr.update(visible=True), |
|
gr.update(visible=True), |
|
gr.update(visible=True), |
|
gr.update(visible=True), |
|
gr.update(visible=True), |
|
gr.update(visible=True), |
|
) |
|
|
|
def get_audio(video_path): |
|
print('*'*NUMBER) |
|
print(f"Getting audio from video {video_path}") |
|
|
|
audios_folder = "audios" |
|
try: |
|
audio_path = get_audio_from_video(video_path, audios_folder) |
|
return [ |
|
gr.update(value="Ok"), |
|
gr.update(value=audio_path) |
|
] |
|
except Exception as e: |
|
print(f"Error: {str(e)}") |
|
return [ |
|
gr.update(value="Error"), |
|
gr.update(value="") |
|
] |
|
|
|
def slice_audio(input_audio_path): |
|
print('*'*NUMBER) |
|
print(f"Slicing audio {input_audio_path} in chunks of {CHUNK_SECONDS} seconds with {CHUNK_OVERLAP_SECONDS} seconds overlap") |
|
|
|
|
|
print("Creating vocals and chunks folders") |
|
folder_vocals = "vocals" |
|
folder_chunck = "chunks" |
|
if not os.path.exists(folder_vocals): |
|
os.makedirs(folder_vocals) |
|
if not os.path.exists(folder_chunck): |
|
os.makedirs(folder_chunck) |
|
|
|
slice_audio_main(input_audio_path, folder_chunck, CHUNK_SECONDS, CHUNK_OVERLAP_SECONDS) |
|
|
|
return ( |
|
gr.update(value="Ok"), |
|
) |
|
|
|
def trascribe_audio(input_audio_path, source_languaje): |
|
print('*'*NUMBER) |
|
print(f"Transcript {input_audio_path}") |
|
|
|
|
|
language_dict = get_language_dict() |
|
|
|
|
|
transcribe(input_audio_path, language_dict[source_languaje]["transcriber"], DEVICE, CHUNK_SECONDS, CHUNK_OVERLAP_SECONDS) |
|
|
|
return ( |
|
gr.Textbox(value="Ok") |
|
) |
|
|
|
def concatenate_transcriptions(): |
|
print('*'*NUMBER) |
|
print("Concatenate transcriptions") |
|
|
|
folder_concatenated = "concatenated_transcriptions" |
|
if not os.path.exists(folder_concatenated): |
|
os.makedirs(folder_concatenated) |
|
|
|
chunck_file = "chunks/output_files.txt" |
|
python_file = "concat_transcriptions.py" |
|
command = f"python {python_file} {chunck_file} {CHUNK_SECONDS} {CHUNK_OVERLAP_SECONDS}" |
|
os.system(command) |
|
|
|
with open(chunck_file, 'r') as f: |
|
files = f.read().splitlines() |
|
for file in files: |
|
file_name, _ = file.split(".") |
|
_, file_name = file_name.split("/") |
|
transcriptions_folder = "transcriptions" |
|
transcription_extension = "srt" |
|
command = f"rm {transcriptions_folder}/{file_name}.{transcription_extension}" |
|
os.system(command) |
|
|
|
audio_transcribed = "concatenated_transcriptions/download_audio.srt" |
|
|
|
return ( |
|
gr.Textbox(value="Ok"), |
|
gr.Textbox(value=audio_transcribed), |
|
) |
|
|
|
def translate_transcription(original_audio_transcribed_path, source_languaje, target_languaje): |
|
print('*'*NUMBER) |
|
print("Translate transcription") |
|
|
|
folder_translated_transcriptions = "translated_transcriptions" |
|
if not os.path.exists(folder_translated_transcriptions): |
|
os.makedirs(folder_translated_transcriptions) |
|
|
|
python_file = "translate_transcriptions.py" |
|
command = f"python {python_file} {original_audio_transcribed_path} --source_languaje {source_languaje} --target_languaje {target_languaje} --device {DEVICE}" |
|
os.system(command) |
|
|
|
translated_transcription = f"translated_transcriptions/download_audio_{target_languaje}.srt" |
|
|
|
transcription_file = "concatenated_transcriptions/download_audio.srt" |
|
if os.path.exists(transcription_file): |
|
command = f"rm {transcription_file}" |
|
os.system(command) |
|
|
|
return ( |
|
gr.Textbox(value="Ok"), |
|
gr.Textbox(value=translated_transcription) |
|
) |
|
|
|
def add_translated_subtitles_to_video(original_video_path, original_audio_path, original_audio_translated_path): |
|
print('*'*NUMBER) |
|
print("Add subtitles to video") |
|
|
|
python_file = "add_subtitles_to_video.py" |
|
command = f"python {python_file} {original_audio_translated_path} {original_video_path} {original_audio_path}" |
|
os.system(command) |
|
|
|
if os.path.exists(original_video_path): |
|
command = f"rm {original_video_path}" |
|
os.system(command) |
|
if os.path.exists(original_audio_path): |
|
command = f"rm {original_audio_path}" |
|
os.system(command) |
|
if os.path.exists(original_audio_translated_path): |
|
command = f"rm {original_audio_translated_path}" |
|
os.system(command) |
|
if os.path.exists("chunks/output_files.txt"): |
|
command = f"rm chunks/output_files.txt" |
|
os.system(command) |
|
|
|
subtitled_video = "videos/download_video_with_subtitles.mp4" |
|
|
|
visible = False |
|
return ( |
|
gr.Video(value=subtitled_video, visible=True), |
|
gr.Textbox(value="Ok", visible=visible), |
|
gr.Textbox(value="Ok"), |
|
) |
|
|
|
def hide_textbobes_progress_info(): |
|
visible = False |
|
return ( |
|
gr.Textbox(value="Waiting", visible=visible), |
|
gr.Textbox(value="Waiting", visible=visible), |
|
gr.Textbox(value="Waiting", visible=visible), |
|
gr.Textbox(value="Waiting", visible=visible), |
|
gr.Textbox(value="Waiting", visible=visible), |
|
gr.Textbox(value="Waiting", visible=visible), |
|
) |
|
|
|
def process_uploaded_video(video_path): |
|
|
|
videos_folder = "videos" |
|
if not os.path.exists(videos_folder): |
|
os.makedirs(videos_folder) |
|
|
|
|
|
new_video_path = os.path.join(videos_folder, "download_video.mp4") |
|
shutil.copy(video_path, new_video_path) |
|
|
|
|
|
return [ |
|
gr.update(label="Video uploaded"), |
|
gr.update(visible=True), |
|
gr.update(value=new_video_path) |
|
] |
|
|
|
@spaces.GPU |
|
def subtify(): |
|
with gr.Blocks( |
|
theme=gr.themes.Default().set |
|
( |
|
body_background_fill=BACKGROUND_COLOR, |
|
body_background_fill_dark=BACKGROUND_COLOR, |
|
body_text_color=PRIMARY_TEXT_COLOR, |
|
body_text_color_dark=PRIMARY_TEXT_COLOR, |
|
body_text_color_subdued=SUBDUED_TEXT_COLOR, |
|
body_text_color_subdued_dark=SUBDUED_TEXT_COLOR, |
|
background_fill_primary=BACKGROUND_PRIMARY_COLOR, |
|
background_fill_primary_dark=BACKGROUND_PRIMARY_COLOR, |
|
background_fill_secondary=BACKGROUND_SECONDARY_COLOR, |
|
background_fill_secondary_dark=BACKGROUND_SECONDARY_COLOR, |
|
border_color_primary=PRIMARY_BODER_COLOR, |
|
border_color_primary_dark=PRIMARY_BODER_COLOR, |
|
block_background_fill=BACKGROUND_PRIMARY_COLOR, |
|
block_background_fill_dark=BACKGROUND_PRIMARY_COLOR, |
|
block_title_text_color=BLOCK_TITLE_TEXT_COLOR, |
|
block_title_text_color_dark=BLOCK_TITLE_TEXT_COLOR, |
|
input_background_fill=INPUT_BACKGROUND_COLOR, |
|
input_background_fill_dark=INPUT_BACKGROUND_COLOR, |
|
input_border_color=INPUT_BORDER_COLOR, |
|
input_border_color_dark=INPUT_BORDER_COLOR, |
|
input_placeholder_color=INPUT_PLACEHOLDER_COLOR, |
|
input_placeholder_color_dark=INPUT_PLACEHOLDER_COLOR, |
|
error_background_fill=ERROR_BACKGROUND_COLOR, |
|
error_background_fill_dark=ERROR_BACKGROUND_COLOR, |
|
error_text_color=ERROR_TEXT_COLOR, |
|
error_text_color_dark=ERROR_TEXT_COLOR, |
|
error_border_color=ERROR_BORDER_COLOR, |
|
error_border_color_dark=ERROR_BORDER_COLOR, |
|
button_secondary_background_fill=BUTTON_SECONDARY_BACKGROUND_COLOR, |
|
button_secondary_background_fill_dark=BUTTON_SECONDARY_BACKGROUND_COLOR, |
|
button_secondary_border_color=BUTTON_SECONDARY_BORDER_COLOR, |
|
button_primary_background_fill_dark=BUTTON_SECONDARY_BORDER_COLOR, |
|
button_secondary_text_color=BUTTON_SECONDARY_TEXT_COLOR, |
|
button_secondary_text_color_dark=BUTTON_SECONDARY_TEXT_COLOR, |
|
) |
|
) as demo: |
|
num_speaker = [] |
|
for i in range(100, 0, -1): |
|
num_speaker.append(i) |
|
|
|
|
|
gr.HTML(html_social_media) |
|
gr.HTML("<h1 style='text-align: center;'>Subtify</h1>") |
|
gr.HTML(html_subtify_logo) |
|
|
|
|
|
visible = False |
|
input_block = gr.Row(variant="panel") |
|
with input_block: |
|
input_video_block = gr.Row(scale=2) |
|
with input_video_block: |
|
video_input = gr.Video( |
|
label="Upload video", |
|
sources=["upload"], |
|
scale=1, |
|
interactive=True |
|
) |
|
delete_button = gr.Button(size="sm", icon="icons/delete.svg", value="clear", min_width="10px", scale=0) |
|
|
|
config_block = gr.Column(scale=1, visible=visible) |
|
with config_block: |
|
with gr.Row(): |
|
source_languaje = gr.Dropdown(visible=True, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True, info="Language of the video") |
|
target_languaje = gr.Dropdown(visible=True, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True, info="Language to translate the subtitles") |
|
with gr.Accordion("Advanced settings", open=False, visible=True) as Advanced_setings: |
|
number_of_speakers = gr.Dropdown(visible=True, label="Number of speakers", show_label=True, value=10, choices=num_speaker, scale=1, interactive=True, info="Number of speakers in the video, if you don't know, select 10") |
|
subtify_button = gr.Button(size="lg", value="subtify", min_width="10px", scale=0, visible=True) |
|
|
|
auxiliar_block1 = gr.Textbox(placeholder="", interactive=False, visible=visible) |
|
with gr.Row(): |
|
get_audio_from_video_info = gr.Textbox(placeholder="Waiting", label="Get audio from video info", elem_id="get_audio_from_video_info", interactive=False, visible=visible) |
|
video_transcribed_progress_info = gr.Textbox(placeholder="Waiting", label="Transcribe progress info", elem_id="video_transcribed_progress_info", interactive=False, visible=visible) |
|
transcriptions_concatenated_progress_info = gr.Textbox(placeholder="Waiting", label="Concatenate progress info", elem_id="transcriptions_concatenated_progress_info", interactive=False, visible=visible) |
|
video_translated_progress_info = gr.Textbox(placeholder="Waiting", label="Translate progress info", elem_id="transcription_translated_progress_info", interactive=False, visible=visible) |
|
video_subtitled_progress_info = gr.Textbox(placeholder="Waiting", label="Video subtitle progress info", elem_id="video_subtitled_progress_info", interactive=False, visible=visible) |
|
|
|
original_audio_path = gr.Textbox(label="Original audio path", elem_id="original_audio_path", visible=visible) |
|
original_video_path = gr.Textbox(label="Original video path", visible=visible) |
|
original_audio_transcribed_path = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", visible=visible) |
|
original_audio_translated_path = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", visible=visible) |
|
subtitled_video = gr.Video(label="Subtitled video", elem_id="subtitled_video", visible=visible, interactive=visible) |
|
auxiliar_block3 = gr.Textbox(placeholder="Waiting", label="Auxiliar block 3", elem_id="auxiliar_block3", interactive=False, visible=visible) |
|
|
|
gr.HTML(html_buy_me_a_coffe) |
|
|
|
|
|
|
|
delete_button.click( |
|
fn=reset_frontend, |
|
outputs=[ |
|
video_input, |
|
source_languaje, |
|
target_languaje, |
|
Advanced_setings, |
|
number_of_speakers, |
|
subtify_button, |
|
auxiliar_block1, |
|
video_transcribed_progress_info, |
|
transcriptions_concatenated_progress_info, |
|
video_translated_progress_info, |
|
video_subtitled_progress_info, |
|
subtitled_video, |
|
] |
|
) |
|
video_input.change( |
|
fn=process_uploaded_video, |
|
inputs=[video_input], |
|
outputs=[video_input, config_block, original_video_path] |
|
) |
|
subtify_button.click( |
|
fn=change_visibility_texboxes, |
|
outputs=[auxiliar_block1, get_audio_from_video_info, video_transcribed_progress_info, transcriptions_concatenated_progress_info, video_translated_progress_info, video_subtitled_progress_info] |
|
) |
|
auxiliar_block1.change( |
|
fn=get_audio, |
|
inputs=[original_video_path], |
|
outputs=[get_audio_from_video_info, original_audio_path] |
|
) |
|
get_audio_from_video_info.change( |
|
fn=trascribe_audio, |
|
inputs=[original_audio_path, source_languaje], |
|
outputs=[video_transcribed_progress_info] |
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
demo.launch() |
|
|
|
|
|
if __name__ == "__main__": |
|
parser = argparse.ArgumentParser() |
|
parser.add_argument("--no_ui", action="store_true") |
|
parser.add_argument("--remove_all_files", action="store_true") |
|
args = parser.parse_args() |
|
|
|
if args.no_ui: |
|
subtify_no_ui() |
|
elif args.remove_all_files: |
|
remove_all_files() |
|
else: |
|
subtify() |
|
|