Spaces:

Maximofn
/

subtify

Runtime error

App Files Files Community

subtify / app.py

Maximofn

Refactor project structure and update dependencies

e015c08 2 months ago

raw

history blame

18.9 kB

	import gradio as gr
	import argparse
	import spaces
	import os
	import torch
	import shutil
	from time import sleep
	from tqdm import tqdm
	from lang_list import union_language_dict
	# import pyperclip
	import re
	from PIL import Image
	# import urllib.request
	from ui_config import (
	BACKGROUND_COLOR, BUTTON_COLOR, SVG_COLOR, PANEL_COLOR,
	PRIMARY_TEXT_COLOR, SUBDUED_TEXT_COLOR, BACKGROUND_PRIMARY_COLOR,
	BACKGROUND_SECONDARY_COLOR, PRIMARY_BODER_COLOR, BLOCK_TITLE_TEXT_COLOR,
	INPUT_BACKGROUND_COLOR, INPUT_BORDER_COLOR, INPUT_PLACEHOLDER_COLOR,
	ERROR_BACKGROUND_COLOR, ERROR_TEXT_COLOR, ERROR_BORDER_COLOR,
	BUTTON_SECONDARY_BACKGROUND_COLOR, BUTTON_SECONDARY_BORDER_COLOR,
	BUTTON_SECONDARY_TEXT_COLOR, RED, GREEN, BLUE,
	html_social_media, get_html_subtify_logo, html_buy_me_a_coffe
	)
	# from url_manager import get_youtube_thumbnail, is_valid_youtube_url, is_valid_twitch_url, is_valid_url
	from slice_audio import slice_audio as slice_audio_main
	from audio import get_audio_from_video
	from transcribe import transcribe, get_language_dict

	NUMBER = 100
	DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
	# DEVICE = "cpu"
	DOWNLOAD = True
	SLICE_AUDIO = True
	TRANSCRIBE_AUDIO = True
	CONCATENATE_TRANSCRIPTIONS = True
	TRANSLATE_TRANSCRIPTIONS = True
	ADD_SUBTITLES_TO_VIDEO = True
	REMOVE_FILES = True
	if DEVICE == "cpu":
	# I supose that I am on huggingface server
	# Get RAM space
	# ram = int(os.popen("free -m \| grep Mem \| awk '{print $2}'").read())
	ram = 16000
	factor = 1
	CHUNK_SECONDS = int(ram*factor)
	CHUNK_SECONDS = 30
	CHUNK_OVERLAP_SECONDS = 5
	print(f"RAM: {ram}, CHUNK_SECONDS: {CHUNK_SECONDS}, CHUNK_OVERLAP_SECONDS: {CHUNK_OVERLAP_SECONDS}")
	else:
	# I supose that I am on my computer
	# Get VRAM space
	CHUNK_SECONDS = 30
	CHUNK_OVERLAP_SECONDS = 5

	YOUTUBE = "youtube"
	TWITCH = "twitch"
	ERROR = "error"

	subtify_logo = Image.open("assets/subtify_logo-scaled.png")
	subtify_logo_width, subtify_logo_height = subtify_logo.size
	factor = 4
	new_width = subtify_logo_width // factor
	new_height = subtify_logo_height // factor

	html_subtify_logo = get_html_subtify_logo(new_width, new_height)

	language_dict = union_language_dict()

	def remove_all_files():
	if os.path.exists("audios"):
	command = f"rm -r audios"
	os.system(command)
	if os.path.exists("chunks"):
	command = f"rm -r chunks"
	os.system(command)
	if os.path.exists("concatenated_transcriptions"):
	command = f"rm -r concatenated_transcriptions"
	os.system(command)
	if os.path.exists("transcriptions"):
	command = f"rm -r transcriptions"
	os.system(command)
	if os.path.exists("translated_transcriptions"):
	command = f"rm -r translated_transcriptions"
	os.system(command)
	if os.path.exists("videos"):
	command = f"rm -r videos"
	os.system(command)
	if os.path.exists("vocals"):
	command = f"rm -r vocals"
	os.system(command)

	def reset_frontend():
	visible = False
	return (
	None,
	gr.Image(visible=visible),
	gr.Dropdown(visible=visible),
	gr.Dropdown(visible=visible),
	gr.Dropdown(visible=visible),
	gr.Accordion(visible=visible),
	gr.Button(visible=visible),
	gr.Textbox(visible=visible),
	gr.Textbox(visible=visible),
	gr.Textbox(visible=visible),
	gr.Textbox(visible=visible),
	gr.Textbox(visible=visible),
	gr.Textbox(visible=visible),
	gr.Textbox(visible=visible),
	gr.Textbox(visible=visible),
	gr.Textbox(visible=visible),
	gr.Video(visible=visible),
	)

	def show_auxiliar_block1():
	return gr.Textbox(value="URL checked", visible=False)

	def change_visibility_texboxes():
	return (
	gr.update(value="Done"), # auxiliar_block1
	gr.update(visible=True), # get_audio_from_video_info
	gr.update(visible=True), # video_sliced_progress_info
	gr.update(visible=True), # video_transcribed_progress_info
	gr.update(visible=True), # transcriptions_concatenated_progress_info
	gr.update(visible=True), # video_translated_progress_info
	gr.update(visible=True), # video_subtitled_progress_info
	)

	def get_audio(video_path):
	print(''NUMBER)
	print(f"Getting audio from video {video_path}")

	audios_folder = "audios"
	try:
	audio_path = get_audio_from_video(video_path, audios_folder)
	return [
	gr.update(value="Ok"), # get_audio_from_video_info
	gr.update(value=audio_path) # original_audio_path
	]
	except Exception as e:
	print(f"Error: {str(e)}")
	return [
	gr.update(value="Error"), # get_audio_from_video_info
	gr.update(value="") # original_audio_path
	]

	def slice_audio(input_audio_path):
	print(''NUMBER)
	print(f"Slicing audio {input_audio_path} in chunks of {CHUNK_SECONDS} seconds with {CHUNK_OVERLAP_SECONDS} seconds overlap")

	# Create vocals and chunks folders
	print("Creating vocals and chunks folders")
	folder_vocals = "vocals"
	folder_chunck = "chunks"
	if not os.path.exists(folder_vocals):
	os.makedirs(folder_vocals)
	if not os.path.exists(folder_chunck):
	os.makedirs(folder_chunck)

	slice_audio_main(input_audio_path, folder_chunck, CHUNK_SECONDS, CHUNK_OVERLAP_SECONDS)

	return (
	gr.update(value="Ok"), # video_sliced_progress_info
	)

	def trascribe_audio(input_audio_path, source_languaje):
	print(''NUMBER)
	print(f"Transcript {input_audio_path}")

	# Get language dict
	language_dict = get_language_dict()

	# Transcribe audio file
	transcribe(input_audio_path, language_dict[source_languaje]["transcriber"], DEVICE, CHUNK_SECONDS, CHUNK_OVERLAP_SECONDS)

	return (
	gr.Textbox(value="Ok")
	)

	def concatenate_transcriptions():
	print(''NUMBER)
	print("Concatenate transcriptions")

	folder_concatenated = "concatenated_transcriptions"
	if not os.path.exists(folder_concatenated):
	os.makedirs(folder_concatenated)

	chunck_file = "chunks/output_files.txt"
	python_file = "concat_transcriptions.py"
	command = f"python {python_file} {chunck_file} {CHUNK_SECONDS} {CHUNK_OVERLAP_SECONDS}"
	os.system(command)

	with open(chunck_file, 'r') as f:
	files = f.read().splitlines()
	for file in files:
	file_name, _ = file.split(".")
	_, file_name = file_name.split("/")
	transcriptions_folder = "transcriptions"
	transcription_extension = "srt"
	command = f"rm {transcriptions_folder}/{file_name}.{transcription_extension}"
	os.system(command)

	audio_transcribed = "concatenated_transcriptions/download_audio.srt"

	return (
	gr.Textbox(value="Ok"),
	gr.Textbox(value=audio_transcribed),
	)

	def translate_transcription(original_audio_transcribed_path, source_languaje, target_languaje):
	print(''NUMBER)
	print("Translate transcription")

	folder_translated_transcriptions = "translated_transcriptions"
	if not os.path.exists(folder_translated_transcriptions):
	os.makedirs(folder_translated_transcriptions)

	python_file = "translate_transcriptions.py"
	command = f"python {python_file} {original_audio_transcribed_path} --source_languaje {source_languaje} --target_languaje {target_languaje} --device {DEVICE}"
	os.system(command)

	translated_transcription = f"translated_transcriptions/download_audio_{target_languaje}.srt"

	transcription_file = "concatenated_transcriptions/download_audio.srt"
	if os.path.exists(transcription_file):
	command = f"rm {transcription_file}"
	os.system(command)

	return (
	gr.Textbox(value="Ok"),
	gr.Textbox(value=translated_transcription)
	)

	def add_translated_subtitles_to_video(original_video_path, original_audio_path, original_audio_translated_path):
	print(''NUMBER)
	print("Add subtitles to video")

	python_file = "add_subtitles_to_video.py"
	command = f"python {python_file} {original_audio_translated_path} {original_video_path} {original_audio_path}"
	os.system(command)

	if os.path.exists(original_video_path):
	command = f"rm {original_video_path}"
	os.system(command)
	if os.path.exists(original_audio_path):
	command = f"rm {original_audio_path}"
	os.system(command)
	if os.path.exists(original_audio_translated_path):
	command = f"rm {original_audio_translated_path}"
	os.system(command)
	if os.path.exists("chunks/output_files.txt"):
	command = f"rm chunks/output_files.txt"
	os.system(command)

	subtitled_video = "videos/download_video_with_subtitles.mp4"

	visible = False
	return (
	gr.Video(value=subtitled_video, visible=True),
	gr.Textbox(value="Ok", visible=visible),
	gr.Textbox(value="Ok"),
	)

	def hide_textbobes_progress_info():
	visible = False
	return (
	gr.Textbox(value="Waiting", visible=visible),
	gr.Textbox(value="Waiting", visible=visible),
	gr.Textbox(value="Waiting", visible=visible),
	gr.Textbox(value="Waiting", visible=visible),
	gr.Textbox(value="Waiting", visible=visible),
	gr.Textbox(value="Waiting", visible=visible),
	)

	def process_uploaded_video(video_path):
	# Create videos folder
	videos_folder = "videos"
	if not os.path.exists(videos_folder):
	os.makedirs(videos_folder)

	# Copy uploaded video to videos folder
	new_video_path = os.path.join(videos_folder, "download_video.mp4")
	shutil.copy(video_path, new_video_path)

	# Return updated config block with new scale and the new video path
	return [
	gr.update(label="Video uploaded"), # video_input
	gr.update(visible=True), # config_block
	gr.update(value=new_video_path) # original_video_path
	]

	@spaces.GPU
	def subtify():
	with gr.Blocks(
	theme=gr.themes.Default().set
	(
	body_background_fill=BACKGROUND_COLOR,
	body_background_fill_dark=BACKGROUND_COLOR,
	body_text_color=PRIMARY_TEXT_COLOR,
	body_text_color_dark=PRIMARY_TEXT_COLOR,
	body_text_color_subdued=SUBDUED_TEXT_COLOR,
	body_text_color_subdued_dark=SUBDUED_TEXT_COLOR,
	background_fill_primary=BACKGROUND_PRIMARY_COLOR,
	background_fill_primary_dark=BACKGROUND_PRIMARY_COLOR,
	background_fill_secondary=BACKGROUND_SECONDARY_COLOR,
	background_fill_secondary_dark=BACKGROUND_SECONDARY_COLOR,
	border_color_primary=PRIMARY_BODER_COLOR,
	border_color_primary_dark=PRIMARY_BODER_COLOR,
	block_background_fill=BACKGROUND_PRIMARY_COLOR,
	block_background_fill_dark=BACKGROUND_PRIMARY_COLOR,
	block_title_text_color=BLOCK_TITLE_TEXT_COLOR,
	block_title_text_color_dark=BLOCK_TITLE_TEXT_COLOR,
	input_background_fill=INPUT_BACKGROUND_COLOR,
	input_background_fill_dark=INPUT_BACKGROUND_COLOR,
	input_border_color=INPUT_BORDER_COLOR,
	input_border_color_dark=INPUT_BORDER_COLOR,
	input_placeholder_color=INPUT_PLACEHOLDER_COLOR,
	input_placeholder_color_dark=INPUT_PLACEHOLDER_COLOR,
	error_background_fill=ERROR_BACKGROUND_COLOR,
	error_background_fill_dark=ERROR_BACKGROUND_COLOR,
	error_text_color=ERROR_TEXT_COLOR,
	error_text_color_dark=ERROR_TEXT_COLOR,
	error_border_color=ERROR_BORDER_COLOR,
	error_border_color_dark=ERROR_BORDER_COLOR,
	button_secondary_background_fill=BUTTON_SECONDARY_BACKGROUND_COLOR,
	button_secondary_background_fill_dark=BUTTON_SECONDARY_BACKGROUND_COLOR,
	button_secondary_border_color=BUTTON_SECONDARY_BORDER_COLOR,
	button_primary_background_fill_dark=BUTTON_SECONDARY_BORDER_COLOR,
	button_secondary_text_color=BUTTON_SECONDARY_TEXT_COLOR,
	button_secondary_text_color_dark=BUTTON_SECONDARY_TEXT_COLOR,
	)
	) as demo:
	num_speaker = []
	for i in range(100, 0, -1):
	num_speaker.append(i)

	# Layout
	gr.HTML(html_social_media)
	gr.HTML("<h1 style='text-align: center;'>Subtify</h1>")
	gr.HTML(html_subtify_logo)

	# Input block, where the user can upload a video and configure the subtify process
	visible = False
	input_block = gr.Row(variant="panel")
	with input_block:
	input_video_block = gr.Row(scale=2)
	with input_video_block:
	video_input = gr.Video(
	label="Upload video",
	sources=["upload"],
	scale=1,
	interactive=True
	)
	delete_button = gr.Button(size="sm", icon="icons/delete.svg", value="clear", min_width="10px", scale=0)

	config_block = gr.Column(scale=1, visible=visible)
	with config_block:
	with gr.Row():
	source_languaje = gr.Dropdown(visible=True, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True, info="Language of the video")
	target_languaje = gr.Dropdown(visible=True, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True, info="Language to translate the subtitles")
	with gr.Accordion("Advanced settings", open=False, visible=True) as Advanced_setings:
	number_of_speakers = gr.Dropdown(visible=True, label="Number of speakers", show_label=True, value=10, choices=num_speaker, scale=1, interactive=True, info="Number of speakers in the video, if you don't know, select 10")
	subtify_button = gr.Button(size="lg", value="subtify", min_width="10px", scale=0, visible=True)

	auxiliar_block1 = gr.Textbox(placeholder="", interactive=False, visible=visible)
	with gr.Row():
	get_audio_from_video_info = gr.Textbox(placeholder="Waiting", label="Get audio from video info", elem_id="get_audio_from_video_info", interactive=False, visible=visible)
	video_transcribed_progress_info = gr.Textbox(placeholder="Waiting", label="Transcribe progress info", elem_id="video_transcribed_progress_info", interactive=False, visible=visible)
	transcriptions_concatenated_progress_info = gr.Textbox(placeholder="Waiting", label="Concatenate progress info", elem_id="transcriptions_concatenated_progress_info", interactive=False, visible=visible)
	video_translated_progress_info = gr.Textbox(placeholder="Waiting", label="Translate progress info", elem_id="transcription_translated_progress_info", interactive=False, visible=visible)
	video_subtitled_progress_info = gr.Textbox(placeholder="Waiting", label="Video subtitle progress info", elem_id="video_subtitled_progress_info", interactive=False, visible=visible)

	original_audio_path = gr.Textbox(label="Original audio path", elem_id="original_audio_path", visible=visible)
	original_video_path = gr.Textbox(label="Original video path", visible=visible)
	original_audio_transcribed_path = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", visible=visible)
	original_audio_translated_path = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", visible=visible)
	subtitled_video = gr.Video(label="Subtitled video", elem_id="subtitled_video", visible=visible, interactive=visible)
	auxiliar_block3 = gr.Textbox(placeholder="Waiting", label="Auxiliar block 3", elem_id="auxiliar_block3", interactive=False, visible=visible)

	gr.HTML(html_buy_me_a_coffe)

	# Events
	# paste_button.click(fn=paste_url_from_clipboard, outputs=url_textbox)
	delete_button.click(
	fn=reset_frontend,
	outputs=[
	video_input,
	source_languaje,
	target_languaje,
	Advanced_setings,
	number_of_speakers,
	subtify_button,
	auxiliar_block1,
	video_transcribed_progress_info,
	transcriptions_concatenated_progress_info,
	video_translated_progress_info,
	video_subtitled_progress_info,
	subtitled_video,
	]
	)
	video_input.change(
	fn=process_uploaded_video,
	inputs=[video_input],
	outputs=[video_input, config_block, original_video_path]
	)
	subtify_button.click(
	fn=change_visibility_texboxes,
	outputs=[auxiliar_block1, get_audio_from_video_info, video_transcribed_progress_info, transcriptions_concatenated_progress_info, video_translated_progress_info, video_subtitled_progress_info]
	)
	auxiliar_block1.change(
	fn=get_audio,
	inputs=[original_video_path],
	outputs=[get_audio_from_video_info, original_audio_path]
	)
	get_audio_from_video_info.change(
	fn=trascribe_audio,
	inputs=[original_audio_path, source_languaje],
	outputs=[video_transcribed_progress_info]
	)
	# video_transcribed_progress_info.change(
	# fn=concatenate_transcriptions,
	# outputs=[transcriptions_concatenated_progress_info, original_audio_transcribed_path]
	# )
	# transcriptions_concatenated_progress_info.change(
	# fn=translate_transcription,
	# inputs=[original_audio_transcribed_path, source_languaje, target_languaje],
	# outputs=[video_translated_progress_info, original_audio_translated_path]
	# )
	# video_translated_progress_info.change(
	# fn=add_translated_subtitles_to_video,
	# inputs=[original_video_path, original_audio_path, original_audio_translated_path],
	# outputs=[subtitled_video, video_subtitled_progress_info, auxiliar_block3]
	# )
	# auxiliar_block3.change(
	# fn=hide_textbobes_progress_info,
	# outputs=[video_sliced_progress_info, video_transcribed_progress_info, transcriptions_concatenated_progress_info, video_translated_progress_info, video_subtitled_progress_info]
	# )

	demo.launch()


	if __name__ == "__main__":
	parser = argparse.ArgumentParser()
	parser.add_argument("--no_ui", action="store_true")
	parser.add_argument("--remove_all_files", action="store_true")
	args = parser.parse_args()

	if args.no_ui:
	subtify_no_ui()
	elif args.remove_all_files:
	remove_all_files()
	else:
	subtify()