File size: 17,927 Bytes
67b7ca7
 
 
 
ea0d1f7
 
 
67b7ca7
 
 
 
 
402b53e
67b7ca7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ea0d1f7
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
import random  # Import the random module
from groq import Groq
from openai import OpenAI
import os
import io
import base64
from huggingface_hub import InferenceApi
class PromptClass:
    def __init__(self):
        self.huggingface_token = os.environ.get("HF_TOKEN")
        self.groq_api_key = os.environ.get("GROQ_TOKEN")
        self.sambanova_api_key = os.environ.get("SAMBANOVA_TOKEN")
        print(self.sambanova_api_key)
        self.huggingface_client = OpenAI(
            base_url="https://api-inference.huggingface.co/v1/",
            api_key=self.huggingface_token,
        )
        self.groq_client = Groq(api_key=self.groq_api_key)
        self.sambanova_client = OpenAI(
            api_key=self.sambanova_api_key,
            base_url="https://api.sambanova.ai/v1",
        )
        self.download_models()

    def download_models(self):
        from huggingface_hub import hf_hub_download
    
        hf_hub_download(
            repo_id="stabilityai/stable-diffusion-3.5-large",
            filename="mmdit.png",
            local_dir = "./models",
            token = self.huggingface_token
        )
    
        hf_hub_download(
            repo_id="stabilityai/stable-diffusion-3.5-large-turbo",
            filename="LICENSE.md",
            local_dir = "./models",
            token = self.huggingface_token
        )

    def generate_prompt(self, dynamic_seed, prompt_type, custom_input):
        """
        Generates a prompt based on the provided seed, prompt type, and custom input.
        """
        random.seed(dynamic_seed)
        if custom_input and custom_input.strip():
            prompt = custom_input
        else:
            prompt = f"Create a random prompt based on the '{prompt_type}' type."

        # Additional logic can be added here if needed
        print(f"Generated prompt: {prompt}")  # Debug statement
        return prompt

    def generate(
        self,
        input_text,
        long_talk,
        compress,
        compression_level,
        poster,
        prompt_type,
        custom_base_prompt="",
        provider="Hugging Face",
        model=None,
    ):
        try:
            # Define prompts
            default_long_prompt = """Create a detailed visually descriptive caption of this description, 
which will be used as a prompt for a text to image AI system (caption only, no instructions like "create an image"). 
Remove any mention of digital artwork or artwork style. Give detailed visual descriptions of the character(s), including ethnicity, skin tone, expression etc. 
Imagine using keywords for a still for someone who has aphantasia. Describe the image style, e.g., any photographic or art styles/techniques utilized. 
Make sure to fully describe all aspects of the cinematography, with abundant technical details and visual descriptions. 
If there is more than one image, combine the elements and characters from all of the images creatively into a single 
cohesive composition with a single background, inventing an interaction between the characters. 
Be creative in combining the characters into a single cohesive scene. 
Focus on two primary characters (or one) and describe an interesting interaction between them, such as a hug, a kiss, a fight, giving an object, 
an emotional reaction/interaction. If there is more than one background in the images, pick the most appropriate one. 
Your output is only the caption itself, no comments or extra formatting. 
The caption is in a single long paragraph. 
If you feel the images are inappropriate, invent a new scene/characters inspired by these. 
Additionally, incorporate a specific movie director's visual style and describe the lighting setup in detail, 
including the type, color, and placement of light sources to create the desired mood and atmosphere. 
Always frame the scene, including details about the film grain, color grading, and any artifacts or characteristics specific."""

            default_simple_prompt = """Create a brief, straightforward caption for this description, suitable for a text-to-image AI system. 
Focus on the main elements, key characters, and overall scene without elaborate details. 
Provide a clear and concise description in one or two sentences. Your output is only the caption itself, no comments or extra formatting. 
The caption is in a single long paragraph."""

            poster_prompt = """Analyze the provided description and extract key information to create a movie poster style description. Format the output as follows:
Title: A catchy, intriguing title that captures the essence of the scene, place the title in "".
Main character: Give a description of the main character.
Background: Describe the background in detail.
Supporting characters: Describe the supporting characters.
Branding type: Describe the branding type.
Tagline: Include a tagline that captures the essence of the movie.
Visual style: Ensure that the visual style fits the branding type and tagline.
You are allowed to make up film and branding names, and do them like 80's, 90's or modern movie posters. 
Your output is only the caption itself, no comments or extra formatting. The caption is in a single long paragraph."""

            only_objects_prompt = """Create a highly detailed and visually rich description focusing solely on inanimate objects, 
without including any human or animal figures. Describe the objects' shapes, sizes, colors, textures, and materials in great detail. 
Pay attention to their arrangement, positioning, and how they interact with light and shadow. Include information about the setting 
or environment these objects are in, such as indoor/outdoor, time of day, weather conditions, and any atmospheric effects. 
Mention any unique features, patterns, or imperfections on the objects. Describe the overall composition, perspective, and 
any artistic techniques that might be employed to render these objects (e.g., photorealism, impressionistic style, etc.). 
Your description should paint a vivid picture that allows someone to imagine the scene without seeing it, focusing on the beauty, 
complexity, or significance of everyday objects. Your output is only the caption itself, no comments or extra formatting. 
The caption is in a single long paragraph."""

            no_figure_prompt = """Generate a comprehensive and visually evocative description of a scene 
or landscape without including any human or animal figures. Focus on the environment, natural elements, and man-made structures if present. 
Describe the topography, vegetation, weather conditions, and time of day in great detail. 
Pay attention to colors, textures, and how light interacts with different elements of the scene. 
If there are buildings or other structures, describe their architecture, condition, and how they fit into the landscape. 
Include sensory details beyond just visual elements - mention sounds, smells, and the overall atmosphere or mood of the scene. 
Describe any notable features like bodies of water, geological formations, or sky phenomena. 
Consider the perspective from which the scene is viewed and how this affects the composition. 
Your description should transport the reader to this location, allowing them to vividly imagine the scene without any living subjects present. 
 Your output is only the caption itself, no comments or extra formatting. The caption is in a single long paragraph."""

            landscape_prompt = """Create an immersive and detailed description of a landscape, 
focusing on its natural beauty and geographical features. 
Begin with the overall topography - is it mountainous, coastal, forested, desert, or a combination? 
Describe the horizon and how land meets sky. Detail the vegetation, noting types of trees, flowers, or grass, 
and how they're distributed across the landscape. Include information about any water features - 
rivers, lakes, oceans - and how they interact with the land. Describe the sky, including cloud formations, 
color gradients, and any celestial bodies visible. 
Pay attention to the quality of light, time of day, and season, explaining how these factors affect the colors and shadows in the scene. 
Include details about weather conditions and how they impact the landscape. 
Mention any geological features like rock formations, cliffs, or unique land patterns. 
If there are any distant man-made elements, describe how they integrate with the natural setting. 
Your description should capture the grandeur and mood of the landscape, 
allowing the reader to feel as if they're standing within this awe-inspiring natural scene. 
Your output is only the caption itself, no comments or extra formatting. The caption is in a single long paragraph."""

            fantasy_prompt = """Craft an extraordinarily detailed and imaginative description of a fantasy scene, 
blending elements of magic, otherworldly creatures, and fantastical environments. Begin by setting the overall tone - 
is this a dark and foreboding realm, a whimsical fairytale setting, or an epic high-fantasy world? 
Describe the landscape, including any impossible or magical geographical features like floating islands, 
crystal forests, or rivers of starlight. Detail the flora and fauna, 
focusing on fantastical plants and creatures that don't exist in our world. 
Include descriptions of any structures or ruins, emphasizing their otherworldly architecture and magical properties. 
Describe the sky and any celestial bodies, considering how they might differ from our reality. 
Include details about the presence of magic - how it manifests visually, 
its effects on the environment, and any magical phenomena occurring in the scene. 
If there are characters present, describe their appearance, focusing on non-human features, magical auras, or 
fantastical clothing and accessories. Pay attention to colors, textures, and light sources, 
especially those that couldn't exist in the real world. Your description should transport the 
reader to a realm of pure imagination, where the laws of physics and nature as we know them don't apply. 
Your output is only the caption itself, no comments or extra formatting. The caption is in a single long paragraph."""
            
            prompt_types = {
                "Long": default_long_prompt,
                "Short": default_simple_prompt,
                "Medium": poster_prompt,
                "OnlyObjects": only_objects_prompt,
                "NoFigure": no_figure_prompt,
                "Landscape": landscape_prompt,
                "Fantasy": fantasy_prompt,
            }

            # Determine the base prompt
            print(f"Received prompt_type: '{prompt_type}'")  # Debug print
            if prompt_type == "Random":
                prompt_type = random.choice(list(prompt_types.keys()))
                print(f"Randomly selected prompt type: {prompt_type}")
            
            if prompt_type and prompt_type.strip() and prompt_type in prompt_types:
                base_prompt = prompt_types[prompt_type]
                print(f"Using {prompt_type} prompt")
            elif custom_base_prompt.strip():
                base_prompt = custom_base_prompt
                print("Using custom base prompt")
            else:
                base_prompt = default_long_prompt
                print(f"Warning: Unknown or empty prompt type '{prompt_type}'. Using default long prompt.")

            # Handle compression if applicable
            if compress and not poster:
                compression_chars = {
                    "soft": 600 if long_talk else 300,
                    "medium": 400 if long_talk else 200,
                    "hard": 200 if long_talk else 100,
                }
                char_limit = compression_chars.get(compression_level, 200)
                base_prompt += f" Compress the output to be concise while retaining key visual details. MAX OUTPUT SIZE no more than {char_limit} characters."

            # Construct messages for the LLM
            system_message = "You are a helpful assistant. Try your best to give the best response possible to the user."

            if input_text.startswith("Create a random prompt based on"):
                user_message = f"Create a random description based on this\nInstructions: {base_prompt}"
            else:
                user_message = f"{base_prompt}\nDescription: {input_text}"

            # Generate a random seed
            seed = random.randint(0, 10000)
            print(f"Generated seed: {seed}")  # Debug print

            # Select the appropriate provider
            if provider == "Hugging Face":
                response = self.huggingface_client.chat.completions.create(
                    model=model or "meta-llama/Meta-Llama-3.1-70B-Instruct",
                    max_tokens=1024,
                    temperature=1.0,
                    top_p=0.95,
                    messages=[
                        {"role": "system", "content": system_message},
                        {"role": "user", "content": user_message},
                    ],
                    seed=seed  # Pass the seed parameter
                )
                output = response.choices[0].message.content.strip()

            elif provider == "Groq":
                response = self.groq_client.chat.completions.create(
                    model=model or "llama-3.1-70b-versatile",
                    max_tokens=1024,
                    temperature=1.0,
                    messages=[
                        {"role": "system", "content": system_message},
                        {"role": "user", "content": user_message},
                    ],
                    seed=seed  # Pass the seed parameter
                )
                output = response.choices[0].message.content.strip()

            elif provider == "SambaNova":
                response = self.sambanova_client.chat.completions.create(
                    model=model or "Meta-Llama-3.1-70B-Instruct",
                    max_tokens=1024,
                    temperature=1.0,
                    messages=[
                        {"role": "system", "content": system_message},
                        {"role": "user", "content": user_message},
                    ],
                    seed=seed  # Pass the seed parameter
                )
                output = response.choices[0].message.content.strip()

            else:
                raise ValueError(f"Unsupported provider: {provider}")

            # Clean up the output if necessary
            if ": " in output:
                output = output.split(": ", 1)[1].strip()
            elif output.lower().startswith("here"):
                sentences = output.split(". ")
                if len(sentences) > 1:
                    output = ". ".join(sentences[1:]).strip()

            return output

        except Exception as e:
            print(f"An error occurred: {e}")
            return f"Error occurred while processing the request: {str(e)}"
    
    def chat(self,provider="Hugging Face",model=None,input_text=None):
        seed = random.randint(0, 10000)
        if input_text != "":
            # Select the appropriate provider
            if provider == "Hugging Face":
                response = self.huggingface_client.chat.completions.create(
                    model=model or "meta-llama/Meta-Llama-3.1-70B-Instruct",
                    max_tokens=1024,
                    temperature=1.0,
                    top_p=0.95,
                    messages=input_text,
                    seed=seed  # Pass the seed parameter
                )
                output = response.choices[0].message.content.strip()

            elif provider == "Groq":
                response = self.groq_client.chat.completions.create(
                    model=model or "llama-3.1-70b-versatile",
                    max_tokens=1024,
                    temperature=1.0,
                    messages=[
                        {"role": "system", "content": "You are a helpful assistant"},
                        {"role": "user", "content": input_text},
                    ],
                    seed=seed  # Pass the seed parameter
                )
                output = response.choices[0].message.content.strip()
            elif provider == "SambaNova":
                response = self.sambanova_client.chat.completions.create(
                    model=model or "Meta-Llama-3.1-70B-Instruct",
                    max_tokens=1024,
                    temperature=1.0,
                    messages=[
                        {"role": "system", "content": "You are a helpful assistant"},
                        {"role": "user", "content": input_text},
                    ],
                    seed=seed  # Pass the seed parameter
                )
                output = response.choices[0].message.content.strip()
            else:
                raise ValueError(f"Unsupported provider: {provider}")
            # Clean up the output if necessary
            if ": " in output:
                output = output.split(": ", 1)[1].strip()
            elif output.lower().startswith("here"):
                sentences = output.split(". ")
                if len(sentences) > 1:
                    output = ". ".join(sentences[1:]).strip()
            return output
    def img2text(self,image=None):
        if image:
            # Select the appropriate provider
            inference = InferenceApi(repo_id="Salesforce/blip-image-captioning-base", token=self.huggingface_token)
            # Đọc file hình ảnh
            image_bytes = io.BytesIO()
            image.save(image_bytes, format="JPEG")
            image_data = image_bytes.getvalue()
            image_base64 = base64.b64encode(image_data).decode("utf-8")
            # Gửi yêu cầu API
            response = inference(inputs={"image":image_base64})
            return response[0]["generated_text"]