import os import traceback from enum import Enum import gradio as gr from gui.asset_components import AssetComponentsUtils from gui.ui_abstract_component import AbstractComponentUI from gui.ui_components_html import GradioComponentsHTML from shortGPT.audio.edge_voice_module import EdgeTTSVoiceModule from shortGPT.audio.eleven_voice_module import ElevenLabsVoiceModule from shortGPT.audio.coqui_voice_module import CoquiVoiceModule from shortGPT.config.api_db import ApiKeyManager from shortGPT.config.languages import (EDGE_TTS_VOICENAME_MAPPING, ELEVEN_SUPPORTED_LANGUAGES, COQUI_SUPPORTED_LANGUAGES, LANGUAGE_ACRONYM_MAPPING, Language) from shortGPT.engine.content_video_engine import ContentVideoEngine from shortGPT.gpt import gpt_chat_video class Chatstate(Enum): ASK_ORIENTATION = 1 ASK_VOICE_MODULE = 2 ASK_LANGUAGE = 3 ASK_DESCRIPTION = 4 GENERATE_SCRIPT = 5 ASK_SATISFACTION = 6 MAKE_VIDEO = 7 ASK_CORRECTION = 8 class VideoAutomationUI(AbstractComponentUI): def __init__(self, shortGptUI: gr.Blocks): self.shortGptUI = shortGptUI self.state = Chatstate.ASK_ORIENTATION self.isVertical = None self.voice_module = None self.language = None self.script = "" self.video_html = "" self.videoVisible = False self.video_automation = None self.chatbot = None self.msg = None self.restart_button = None self.video_folder = None self.errorHTML = None self.outHTML = None def is_key_missing(self): openai_key = ApiKeyManager.get_api_key("OPENAI") if not openai_key: return "Your OpenAI key is missing. Please go to the config tab and enter the API key." pexels_api_key = ApiKeyManager.get_api_key("PEXELS") if not pexels_api_key: return "Your Pexels API key is missing. Please go to the config tab and enter the API key." def generate_script(self, message, language): return gpt_chat_video.generateScript(message, language) def correct_script(self, script, correction): return gpt_chat_video.correctScript(script, correction) def make_video(self, script, voice_module, isVertical, progress): videoEngine = ContentVideoEngine(voiceModule=voice_module, script=script, isVerticalFormat=isVertical) num_steps = videoEngine.get_total_steps() progress_counter = 0 def logger(prog_str): progress(progress_counter / (num_steps), f"Creating video - {progress_counter} - {prog_str}") videoEngine.set_logger(logger) for step_num, step_info in videoEngine.makeContent(): progress(progress_counter / (num_steps), f"Creating video - {step_info}") progress_counter += 1 video_path = videoEngine.get_video_output_path() return video_path def reset_components(self): return gr.Chatbot.update(value=self.initialize_conversation()), gr.update(visible=True), gr.HTML.update(value="", visible=False), gr.HTML.update(value="", visible=False) def chatbot_conversation(self): def respond(message, chat_history, progress=gr.Progress()): # global self.state, isVertical, voice_module, language, script, videoVisible, video_html error_html = "" errorVisible = False inputVisible = True folderVisible = False if self.state == Chatstate.ASK_ORIENTATION: errorMessage = self.is_key_missing() if errorMessage: bot_message = errorMessage else: self.isVertical = "vertical" in message.lower() or "short" in message.lower() self.state = Chatstate.ASK_VOICE_MODULE bot_message = "Which voice module do you want to use? Please type 'ElevenLabs' for high quality, 'EdgeTTS' for free but medium quality voice or 'CoquiTTS' for free and good quality voice but requires a powerful GPU." elif self.state == Chatstate.ASK_VOICE_MODULE: if "elevenlabs" in message.lower(): eleven_labs_key = ApiKeyManager.get_api_key("ELEVEN LABS") if not eleven_labs_key: bot_message = "Your Eleven Labs API key is missing. Please go to the config tab and enter the API key." return self.voice_module = ElevenLabsVoiceModule language_choices = [lang.value for lang in ELEVEN_SUPPORTED_LANGUAGES] elif "edgetts" in message.lower(): self.voice_module = EdgeTTSVoiceModule language_choices = [lang.value for lang in Language] elif "coquitts" in message.lower(): self.voice_module = CoquiVoiceModule language_choices = [lang.value for lang in COQUI_SUPPORTED_LANGUAGES] else: bot_message = "Invalid voice module. Please type 'ElevenLabs' or 'EdgeTTS'." return self.state = Chatstate.ASK_LANGUAGE bot_message = f"πWhat language will be used in the video?π Choose from one of these ({', '.join(language_choices)})" elif self.state == Chatstate.ASK_LANGUAGE: self.language = next((lang for lang in Language if lang.value.lower() in message.lower()), None) self.language = self.language if self.language else Language.ENGLISH if self.voice_module == ElevenLabsVoiceModule: self.voice_module = ElevenLabsVoiceModule(ApiKeyManager.get_api_key('ELEVEN LABS'), "Antoni", checkElevenCredits=True) elif self.voice_module == EdgeTTSVoiceModule: self.voice_module = EdgeTTSVoiceModule(EDGE_TTS_VOICENAME_MAPPING[self.language]['male']) elif self.voice_module == CoquiVoiceModule: self.voice_module = CoquiVoiceModule("Ana Florence", LANGUAGE_ACRONYM_MAPPING[self.language]) self.state = Chatstate.ASK_DESCRIPTION bot_message = "Amazing π₯ ! πCan you describe thoroughly the subject of your video?π I will next generate you a script based on that description" elif self.state == Chatstate.ASK_DESCRIPTION: self.script = self.generate_script(message, self.language.value) self.state = Chatstate.ASK_SATISFACTION bot_message = f"π Here is your generated script: \n\n--------------\n{self.script}\n\nγ»Are you satisfied with the script and ready to proceed with creating the video? Please respond with 'YES' or 'NO'. ππ" elif self.state == Chatstate.ASK_SATISFACTION: if "yes" in message.lower(): self.state = Chatstate.MAKE_VIDEO inputVisible = False yield gr.update(visible=False), gr.Chatbot.update(value=[[None, "Your video is being made now! π¬"]]), gr.HTML.update(value="", visible=False), gr.HTML.update(value=error_html, visible=errorVisible), gr.update(visible=folderVisible), gr.update(visible=False) try: video_path = self.make_video(self.script, self.voice_module, self.isVertical, progress=progress) file_name = video_path.split("/")[-1].split("\\")[-1] current_url = self.shortGptUI.share_url+"/" if self.shortGptUI.share else self.shortGptUI.local_url file_url_path = f"{current_url}file={video_path}" self.video_html = f'''
''' self.videoVisible = True folderVisible = True bot_message = "Your video is completed !π¬. Scroll down below to open its file location." except Exception as e: traceback_str = ''.join(traceback.format_tb(e.__traceback__)) error_name = type(e).__name__.capitalize() + " : " + f"{e.args[0]}" errorVisible = True gradio_content_automation_ui_error_template = GradioComponentsHTML.get_html_error_template() error_html = gradio_content_automation_ui_error_template.format(error_message=error_name, stack_trace=traceback_str) bot_message = "We encountered an error while making this video β" print("Error", traceback_str) yield gr.update(visible=False), gr.Chatbot.update(value=[[None, "Your video is being made now! π¬"]]), gr.HTML.update(value="", visible=False), gr.HTML.update(value=error_html, visible=errorVisible), gr.update(visible=folderVisible), gr.update(visible=True) else: self.state = Chatstate.ASK_CORRECTION # change self.state to ASK_CORRECTION bot_message = "Explain me what you want different in the script" elif self.state == Chatstate.ASK_CORRECTION: # new self.state self.script = self.correct_script(self.script, message) # call generateScript with correct=True self.state = Chatstate.ASK_SATISFACTION bot_message = f"π Here is your corrected script: \n\n--------------\n{self.script}\n\nγ»Are you satisfied with the script and ready to proceed with creating the video? Please respond with 'YES' or 'NO'. ππ" chat_history.append((message, bot_message)) yield gr.update(value="", visible=inputVisible), gr.Chatbot.update(value=chat_history), gr.HTML.update(value=self.video_html, visible=self.videoVisible), gr.HTML.update(value=error_html, visible=errorVisible), gr.update(visible=folderVisible), gr.update(visible=True) return respond def initialize_conversation(self): self.state = Chatstate.ASK_ORIENTATION self.isVertical = None self.language = None self.script = "" self.video_html = "" self.videoVisible = False return [[None, "π€ Welcome to ShortGPT! π I'm a python framework aiming to simplify and automate your video editing tasks.\nLet's get started! π₯π¬\n\n Do you want your video to be in landscape or vertical format? (landscape OR vertical)"]] def reset_conversation(self): self.state = Chatstate.ASK_ORIENTATION self.isVertical = None self.language = None self.script = "" self.video_html = "" self.videoVisible = False def create_ui(self): with gr.Row(visible=False) as self.video_automation: with gr.Column(): self.chatbot = gr.Chatbot(self.initialize_conversation, height=365) self.msg = gr.Textbox() self.restart_button = gr.Button("Restart") self.video_folder = gr.Button("π", visible=False) self.video_folder.click(lambda _: AssetComponentsUtils.start_file(os.path.abspath("videos/"))) respond = self.chatbot_conversation() self.errorHTML = gr.HTML(visible=False) self.outHTML = gr.HTML(visible=False) self.restart_button.click(self.reset_components, [], [self.chatbot, self.msg, self.errorHTML, self.outHTML]) self.restart_button.click(self.reset_conversation, []) self.msg.submit(respond, [self.msg, self.chatbot], [self.msg, self.chatbot, self.outHTML, self.errorHTML, self.video_folder, self.restart_button]) return self.video_automation