import random # Import the random module from groq import Groq from openai import OpenAI import os import io import base64 from huggingface_hub import InferenceApi class PromptClass: def __init__(self): self.huggingface_token = os.environ.get("HF_TOKEN") self.groq_api_key = os.environ.get("GROQ_TOKEN") self.sambanova_api_key = os.environ.get("SAMBANOVA_TOKEN") print(self.sambanova_api_key) self.huggingface_client = OpenAI( base_url="https://api-inference.huggingface.co/v1/", api_key=self.huggingface_token, ) self.groq_client = Groq(api_key=self.groq_api_key) self.sambanova_client = OpenAI( api_key=self.sambanova_api_key, base_url="https://api.sambanova.ai/v1", ) self.download_models() def download_models(self): from huggingface_hub import hf_hub_download hf_hub_download( repo_id="stabilityai/stable-diffusion-3.5-large", filename="mmdit.png", local_dir = "./models", token = self.huggingface_token ) hf_hub_download( repo_id="stabilityai/stable-diffusion-3.5-large-turbo", filename="LICENSE.md", local_dir = "./models", token = self.huggingface_token ) def generate_prompt(self, dynamic_seed, prompt_type, custom_input): """ Generates a prompt based on the provided seed, prompt type, and custom input. """ random.seed(dynamic_seed) if custom_input and custom_input.strip(): prompt = custom_input else: prompt = f"Create a random prompt based on the '{prompt_type}' type." # Additional logic can be added here if needed print(f"Generated prompt: {prompt}") # Debug statement return prompt def generate( self, input_text, long_talk, compress, compression_level, poster, prompt_type, custom_base_prompt="", provider="Hugging Face", model=None, ): try: # Define prompts default_long_prompt = """Create a detailed visually descriptive caption of this description, which will be used as a prompt for a text to image AI system (caption only, no instructions like "create an image"). Remove any mention of digital artwork or artwork style. Give detailed visual descriptions of the character(s), including ethnicity, skin tone, expression etc. Imagine using keywords for a still for someone who has aphantasia. Describe the image style, e.g., any photographic or art styles/techniques utilized. Make sure to fully describe all aspects of the cinematography, with abundant technical details and visual descriptions. If there is more than one image, combine the elements and characters from all of the images creatively into a single cohesive composition with a single background, inventing an interaction between the characters. Be creative in combining the characters into a single cohesive scene. Focus on two primary characters (or one) and describe an interesting interaction between them, such as a hug, a kiss, a fight, giving an object, an emotional reaction/interaction. If there is more than one background in the images, pick the most appropriate one. Your output is only the caption itself, no comments or extra formatting. The caption is in a single long paragraph. If you feel the images are inappropriate, invent a new scene/characters inspired by these. Additionally, incorporate a specific movie director's visual style and describe the lighting setup in detail, including the type, color, and placement of light sources to create the desired mood and atmosphere. Always frame the scene, including details about the film grain, color grading, and any artifacts or characteristics specific.""" default_simple_prompt = """Create a brief, straightforward caption for this description, suitable for a text-to-image AI system. Focus on the main elements, key characters, and overall scene without elaborate details. Provide a clear and concise description in one or two sentences. Your output is only the caption itself, no comments or extra formatting. The caption is in a single long paragraph.""" poster_prompt = """Analyze the provided description and extract key information to create a movie poster style description. Format the output as follows: Title: A catchy, intriguing title that captures the essence of the scene, place the title in "". Main character: Give a description of the main character. Background: Describe the background in detail. Supporting characters: Describe the supporting characters. Branding type: Describe the branding type. Tagline: Include a tagline that captures the essence of the movie. Visual style: Ensure that the visual style fits the branding type and tagline. You are allowed to make up film and branding names, and do them like 80's, 90's or modern movie posters. Your output is only the caption itself, no comments or extra formatting. The caption is in a single long paragraph.""" only_objects_prompt = """Create a highly detailed and visually rich description focusing solely on inanimate objects, without including any human or animal figures. Describe the objects' shapes, sizes, colors, textures, and materials in great detail. Pay attention to their arrangement, positioning, and how they interact with light and shadow. Include information about the setting or environment these objects are in, such as indoor/outdoor, time of day, weather conditions, and any atmospheric effects. Mention any unique features, patterns, or imperfections on the objects. Describe the overall composition, perspective, and any artistic techniques that might be employed to render these objects (e.g., photorealism, impressionistic style, etc.). Your description should paint a vivid picture that allows someone to imagine the scene without seeing it, focusing on the beauty, complexity, or significance of everyday objects. Your output is only the caption itself, no comments or extra formatting. The caption is in a single long paragraph.""" no_figure_prompt = """Generate a comprehensive and visually evocative description of a scene or landscape without including any human or animal figures. Focus on the environment, natural elements, and man-made structures if present. Describe the topography, vegetation, weather conditions, and time of day in great detail. Pay attention to colors, textures, and how light interacts with different elements of the scene. If there are buildings or other structures, describe their architecture, condition, and how they fit into the landscape. Include sensory details beyond just visual elements - mention sounds, smells, and the overall atmosphere or mood of the scene. Describe any notable features like bodies of water, geological formations, or sky phenomena. Consider the perspective from which the scene is viewed and how this affects the composition. Your description should transport the reader to this location, allowing them to vividly imagine the scene without any living subjects present. Your output is only the caption itself, no comments or extra formatting. The caption is in a single long paragraph.""" landscape_prompt = """Create an immersive and detailed description of a landscape, focusing on its natural beauty and geographical features. Begin with the overall topography - is it mountainous, coastal, forested, desert, or a combination? Describe the horizon and how land meets sky. Detail the vegetation, noting types of trees, flowers, or grass, and how they're distributed across the landscape. Include information about any water features - rivers, lakes, oceans - and how they interact with the land. Describe the sky, including cloud formations, color gradients, and any celestial bodies visible. Pay attention to the quality of light, time of day, and season, explaining how these factors affect the colors and shadows in the scene. Include details about weather conditions and how they impact the landscape. Mention any geological features like rock formations, cliffs, or unique land patterns. If there are any distant man-made elements, describe how they integrate with the natural setting. Your description should capture the grandeur and mood of the landscape, allowing the reader to feel as if they're standing within this awe-inspiring natural scene. Your output is only the caption itself, no comments or extra formatting. The caption is in a single long paragraph.""" fantasy_prompt = """Craft an extraordinarily detailed and imaginative description of a fantasy scene, blending elements of magic, otherworldly creatures, and fantastical environments. Begin by setting the overall tone - is this a dark and foreboding realm, a whimsical fairytale setting, or an epic high-fantasy world? Describe the landscape, including any impossible or magical geographical features like floating islands, crystal forests, or rivers of starlight. Detail the flora and fauna, focusing on fantastical plants and creatures that don't exist in our world. Include descriptions of any structures or ruins, emphasizing their otherworldly architecture and magical properties. Describe the sky and any celestial bodies, considering how they might differ from our reality. Include details about the presence of magic - how it manifests visually, its effects on the environment, and any magical phenomena occurring in the scene. If there are characters present, describe their appearance, focusing on non-human features, magical auras, or fantastical clothing and accessories. Pay attention to colors, textures, and light sources, especially those that couldn't exist in the real world. Your description should transport the reader to a realm of pure imagination, where the laws of physics and nature as we know them don't apply. Your output is only the caption itself, no comments or extra formatting. The caption is in a single long paragraph.""" prompt_types = { "Long": default_long_prompt, "Short": default_simple_prompt, "Medium": poster_prompt, "OnlyObjects": only_objects_prompt, "NoFigure": no_figure_prompt, "Landscape": landscape_prompt, "Fantasy": fantasy_prompt, } # Determine the base prompt print(f"Received prompt_type: '{prompt_type}'") # Debug print if prompt_type == "Random": prompt_type = random.choice(list(prompt_types.keys())) print(f"Randomly selected prompt type: {prompt_type}") if prompt_type and prompt_type.strip() and prompt_type in prompt_types: base_prompt = prompt_types[prompt_type] print(f"Using {prompt_type} prompt") elif custom_base_prompt.strip(): base_prompt = custom_base_prompt print("Using custom base prompt") else: base_prompt = default_long_prompt print(f"Warning: Unknown or empty prompt type '{prompt_type}'. Using default long prompt.") # Handle compression if applicable if compress and not poster: compression_chars = { "soft": 600 if long_talk else 300, "medium": 400 if long_talk else 200, "hard": 200 if long_talk else 100, } char_limit = compression_chars.get(compression_level, 200) base_prompt += f" Compress the output to be concise while retaining key visual details. MAX OUTPUT SIZE no more than {char_limit} characters." # Construct messages for the LLM system_message = "You are a helpful assistant. Try your best to give the best response possible to the user." if input_text.startswith("Create a random prompt based on"): user_message = f"Create a random description based on this\nInstructions: {base_prompt}" else: user_message = f"{base_prompt}\nDescription: {input_text}" # Generate a random seed seed = random.randint(0, 10000) print(f"Generated seed: {seed}") # Debug print # Select the appropriate provider if provider == "Hugging Face": response = self.huggingface_client.chat.completions.create( model=model or "meta-llama/Meta-Llama-3.1-70B-Instruct", max_tokens=1024, temperature=1.0, top_p=0.95, messages=[ {"role": "system", "content": system_message}, {"role": "user", "content": user_message}, ], seed=seed # Pass the seed parameter ) output = response.choices[0].message.content.strip() elif provider == "Groq": response = self.groq_client.chat.completions.create( model=model or "llama-3.1-70b-versatile", max_tokens=1024, temperature=1.0, messages=[ {"role": "system", "content": system_message}, {"role": "user", "content": user_message}, ], seed=seed # Pass the seed parameter ) output = response.choices[0].message.content.strip() elif provider == "SambaNova": response = self.sambanova_client.chat.completions.create( model=model or "Meta-Llama-3.1-70B-Instruct", max_tokens=1024, temperature=1.0, messages=[ {"role": "system", "content": system_message}, {"role": "user", "content": user_message}, ], seed=seed # Pass the seed parameter ) output = response.choices[0].message.content.strip() else: raise ValueError(f"Unsupported provider: {provider}") # Clean up the output if necessary if ": " in output: output = output.split(": ", 1)[1].strip() elif output.lower().startswith("here"): sentences = output.split(". ") if len(sentences) > 1: output = ". ".join(sentences[1:]).strip() return output except Exception as e: print(f"An error occurred: {e}") return f"Error occurred while processing the request: {str(e)}" def chat(self,provider="Hugging Face",model=None,input_text=None): seed = random.randint(0, 10000) if input_text != "": # Select the appropriate provider if provider == "Hugging Face": response = self.huggingface_client.chat.completions.create( model=model or "meta-llama/Meta-Llama-3.1-70B-Instruct", max_tokens=1024, temperature=1.0, top_p=0.95, messages=input_text, seed=seed # Pass the seed parameter ) output = response.choices[0].message.content.strip() elif provider == "Groq": response = self.groq_client.chat.completions.create( model=model or "llama-3.1-70b-versatile", max_tokens=1024, temperature=1.0, messages=[ {"role": "system", "content": "You are a helpful assistant"}, {"role": "user", "content": input_text}, ], seed=seed # Pass the seed parameter ) output = response.choices[0].message.content.strip() elif provider == "SambaNova": response = self.sambanova_client.chat.completions.create( model=model or "Meta-Llama-3.1-70B-Instruct", max_tokens=1024, temperature=1.0, messages=[ {"role": "system", "content": "You are a helpful assistant"}, {"role": "user", "content": input_text}, ], seed=seed # Pass the seed parameter ) output = response.choices[0].message.content.strip() else: raise ValueError(f"Unsupported provider: {provider}") # Clean up the output if necessary if ": " in output: output = output.split(": ", 1)[1].strip() elif output.lower().startswith("here"): sentences = output.split(". ") if len(sentences) > 1: output = ". ".join(sentences[1:]).strip() return output def img2text(self,image=None): if image: # Select the appropriate provider inference = InferenceApi(repo_id="Salesforce/blip-image-captioning-base", token=self.huggingface_token) # Đọc file hình ảnh image_bytes = io.BytesIO() image.save(image_bytes, format="JPEG") image_data = image_bytes.getvalue() image_base64 = base64.b64encode(image_data).decode("utf-8") # Gửi yêu cầu API response = inference(inputs={"image":image_base64}) return response[0]["generated_text"]