File size: 12,600 Bytes
b293d47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
import os
import traceback
from enum import Enum

import gradio as gr

from gui.asset_components import AssetComponentsUtils
from gui.ui_abstract_component import AbstractComponentUI
from gui.ui_components_html import GradioComponentsHTML
from shortGPT.audio.edge_voice_module import EdgeTTSVoiceModule
from shortGPT.audio.eleven_voice_module import ElevenLabsVoiceModule
from shortGPT.audio.coqui_voice_module import CoquiVoiceModule
from shortGPT.config.api_db import ApiKeyManager
from shortGPT.config.languages import (EDGE_TTS_VOICENAME_MAPPING,
                                        ELEVEN_SUPPORTED_LANGUAGES,
                                        COQUI_SUPPORTED_LANGUAGES,
                                        LANGUAGE_ACRONYM_MAPPING,
                                        Language)
from shortGPT.engine.content_video_engine import ContentVideoEngine
from shortGPT.gpt import gpt_chat_video


class Chatstate(Enum):
    ASK_ORIENTATION = 1
    ASK_VOICE_MODULE = 2
    ASK_LANGUAGE = 3
    ASK_DESCRIPTION = 4
    GENERATE_SCRIPT = 5
    ASK_SATISFACTION = 6
    MAKE_VIDEO = 7
    ASK_CORRECTION = 8


class VideoAutomationUI(AbstractComponentUI):
    def __init__(self, shortGptUI: gr.Blocks):
        self.shortGptUI = shortGptUI
        self.state = Chatstate.ASK_ORIENTATION
        self.isVertical = None
        self.voice_module = None
        self.language = None
        self.script = ""
        self.video_html = ""
        self.videoVisible = False
        self.video_automation = None
        self.chatbot = None
        self.msg = None
        self.restart_button = None
        self.video_folder = None
        self.errorHTML = None
        self.outHTML = None

    def is_key_missing(self):
        openai_key = ApiKeyManager.get_api_key("OPENAI")
        if not openai_key:
            return "Your OpenAI key is missing. Please go to the config tab and enter the API key."

        pexels_api_key = ApiKeyManager.get_api_key("PEXELS")
        if not pexels_api_key:
            return "Your Pexels API key is missing. Please go to the config tab and enter the API key."

    def generate_script(self, message, language):
        return gpt_chat_video.generateScript(message, language)

    def correct_script(self, script, correction):
        return gpt_chat_video.correctScript(script, correction)

    def make_video(self, script, voice_module, isVertical, progress):
        videoEngine = ContentVideoEngine(voiceModule=voice_module, script=script, isVerticalFormat=isVertical)
        num_steps = videoEngine.get_total_steps()
        progress_counter = 0

        def logger(prog_str):
            progress(progress_counter / (num_steps), f"Creating video - {progress_counter} - {prog_str}")
        videoEngine.set_logger(logger)
        for step_num, step_info in videoEngine.makeContent():
            progress(progress_counter / (num_steps), f"Creating video - {step_info}")
            progress_counter += 1

        video_path = videoEngine.get_video_output_path()
        return video_path

    def reset_components(self):
        return gr.Chatbot.update(value=self.initialize_conversation()), gr.update(visible=True), gr.HTML.update(value="", visible=False), gr.HTML.update(value="", visible=False)

    def chatbot_conversation(self):
        def respond(message, chat_history, progress=gr.Progress()):
            # global self.state, isVertical, voice_module, language, script, videoVisible, video_html
            error_html = ""
            errorVisible = False
            inputVisible = True
            folderVisible = False
            if self.state == Chatstate.ASK_ORIENTATION:
                errorMessage = self.is_key_missing()
                if errorMessage:
                    bot_message = errorMessage
                else:
                    self.isVertical = "vertical" in message.lower() or "short" in message.lower()
                    self.state = Chatstate.ASK_VOICE_MODULE
                    bot_message = "Which voice module do you want to use? Please type 'ElevenLabs' for high quality, 'EdgeTTS' for free but medium quality voice or 'CoquiTTS' for free and good quality voice but requires a powerful GPU."
            elif self.state == Chatstate.ASK_VOICE_MODULE:
                if "elevenlabs" in message.lower():
                    eleven_labs_key = ApiKeyManager.get_api_key("ELEVEN LABS")
                    if not eleven_labs_key:
                        bot_message = "Your Eleven Labs API key is missing. Please go to the config tab and enter the API key."
                        return
                    self.voice_module = ElevenLabsVoiceModule
                    language_choices = [lang.value for lang in ELEVEN_SUPPORTED_LANGUAGES]
                elif "edgetts" in message.lower():
                    self.voice_module = EdgeTTSVoiceModule
                    language_choices = [lang.value for lang in Language]
                elif "coquitts" in message.lower():
                    self.voice_module = CoquiVoiceModule
                    language_choices = [lang.value for lang in COQUI_SUPPORTED_LANGUAGES]
                else:
                    bot_message = "Invalid voice module. Please type 'ElevenLabs' or 'EdgeTTS'."
                    return
                self.state = Chatstate.ASK_LANGUAGE
                bot_message = f"🌐What language will be used in the video?🌐 Choose from one of these ({', '.join(language_choices)})"
            elif self.state == Chatstate.ASK_LANGUAGE:
                self.language = next((lang for lang in Language if lang.value.lower() in message.lower()), None)
                self.language = self.language if self.language else Language.ENGLISH
                if self.voice_module == ElevenLabsVoiceModule:
                    self.voice_module = ElevenLabsVoiceModule(ApiKeyManager.get_api_key('ELEVEN LABS'), "Antoni", checkElevenCredits=True)
                elif self.voice_module == EdgeTTSVoiceModule:
                    self.voice_module = EdgeTTSVoiceModule(EDGE_TTS_VOICENAME_MAPPING[self.language]['male'])
                elif self.voice_module == CoquiVoiceModule:
                    self.voice_module = CoquiVoiceModule("Ana Florence", LANGUAGE_ACRONYM_MAPPING[self.language])
                self.state = Chatstate.ASK_DESCRIPTION
                bot_message = "Amazing πŸ”₯ ! πŸ“Can you describe thoroughly the subject of your video?πŸ“ I will next generate you a script based on that description"
            elif self.state == Chatstate.ASK_DESCRIPTION:
                self.script = self.generate_script(message, self.language.value)
                self.state = Chatstate.ASK_SATISFACTION
                bot_message = f"πŸ“ Here is your generated script: \n\n--------------\n{self.script}\n\n・Are you satisfied with the script and ready to proceed with creating the video? Please respond with 'YES' or 'NO'. πŸ‘πŸ‘Ž"
            elif self.state == Chatstate.ASK_SATISFACTION:
                if "yes" in message.lower():
                    self.state = Chatstate.MAKE_VIDEO
                    inputVisible = False
                    yield gr.update(visible=False), gr.Chatbot.update(value=[[None, "Your video is being made now! 🎬"]]), gr.HTML.update(value="", visible=False), gr.HTML.update(value=error_html, visible=errorVisible), gr.update(visible=folderVisible), gr.update(visible=False)
                    try:
                        video_path = self.make_video(self.script, self.voice_module, self.isVertical, progress=progress)
                        file_name = video_path.split("/")[-1].split("\\")[-1]
                        current_url = self.shortGptUI.share_url+"/" if self.shortGptUI.share else self.shortGptUI.local_url
                        file_url_path = f"{current_url}file={video_path}"
                        self.video_html = f'''
                            <div style="display: flex; flex-direction: column; align-items: center;">
                                <video width="{600}" height="{300}" style="max-height: 100%;" controls>
                                    <source src="{file_url_path}" type="video/mp4">
                                    Your browser does not support the video tag.
                                </video>
                                <a href="{file_url_path}" download="{file_name}" style="margin-top: 10px;">
                                    <button style="font-size: 1em; padding: 10px; border: none; cursor: pointer; color: white; background: #007bff;">Download Video</button>
                                </a>
                            </div>'''
                        self.videoVisible = True
                        folderVisible = True
                        bot_message = "Your video is completed !🎬. Scroll down below to open its file location."
                    except Exception as e:
                        traceback_str = ''.join(traceback.format_tb(e.__traceback__))
                        error_name = type(e).__name__.capitalize() + " : " + f"{e.args[0]}"
                        errorVisible = True
                        gradio_content_automation_ui_error_template = GradioComponentsHTML.get_html_error_template()
                        error_html = gradio_content_automation_ui_error_template.format(error_message=error_name, stack_trace=traceback_str)
                        bot_message = "We encountered an error while making this video ❌"
                        print("Error", traceback_str)
                        yield gr.update(visible=False), gr.Chatbot.update(value=[[None, "Your video is being made now! 🎬"]]), gr.HTML.update(value="", visible=False),
                        gr.HTML.update(value=error_html, visible=errorVisible), gr.update(visible=folderVisible), gr.update(visible=True)

                else:
                    self.state = Chatstate.ASK_CORRECTION  # change self.state to ASK_CORRECTION
                    bot_message = "Explain me what you want different in the script"
            elif self.state == Chatstate.ASK_CORRECTION:  # new self.state
                self.script = self.correct_script(self.script, message)  # call generateScript with correct=True
                self.state = Chatstate.ASK_SATISFACTION
                bot_message = f"πŸ“ Here is your corrected script: \n\n--------------\n{self.script}\n\n・Are you satisfied with the script and ready to proceed with creating the video? Please respond with 'YES' or 'NO'. πŸ‘πŸ‘Ž"
            chat_history.append((message, bot_message))
            yield gr.update(value="", visible=inputVisible), gr.Chatbot.update(value=chat_history), gr.HTML.update(value=self.video_html, visible=self.videoVisible), gr.HTML.update(value=error_html, visible=errorVisible), gr.update(visible=folderVisible), gr.update(visible=True)

        return respond

    def initialize_conversation(self):
        self.state = Chatstate.ASK_ORIENTATION
        self.isVertical = None
        self.language = None
        self.script = ""
        self.video_html = ""
        self.videoVisible = False
        return [[None, "πŸ€– Welcome to ShortGPT! πŸš€ I'm a python framework aiming to simplify and automate your video editing tasks.\nLet's get started! πŸŽ₯🎬\n\n Do you want your video to be in landscape or vertical format? (landscape OR vertical)"]]

    def reset_conversation(self):
        self.state = Chatstate.ASK_ORIENTATION
        self.isVertical = None
        self.language = None
        self.script = ""
        self.video_html = ""
        self.videoVisible = False

    def create_ui(self):
        with gr.Row(visible=False) as self.video_automation:
            with gr.Column():
                self.chatbot = gr.Chatbot(self.initialize_conversation, height=365)
                self.msg = gr.Textbox()
                self.restart_button = gr.Button("Restart")
                self.video_folder = gr.Button("πŸ“", visible=False)
                self.video_folder.click(lambda _: AssetComponentsUtils.start_file(os.path.abspath("videos/")))
                respond = self.chatbot_conversation()

            self.errorHTML = gr.HTML(visible=False)
            self.outHTML = gr.HTML(visible=False)
            self.restart_button.click(self.reset_components, [], [self.chatbot, self.msg, self.errorHTML, self.outHTML])
            self.restart_button.click(self.reset_conversation, [])
            self.msg.submit(respond, [self.msg, self.chatbot], [self.msg, self.chatbot, self.outHTML, self.errorHTML, self.video_folder, self.restart_button])
        return self.video_automation